robotjs结合tesseract.js指那打那案例

2024-04-29 分类：前端之旅阅读(672) 评论(0)

示例代码:

import robot from 'robotjs';
import tesseract from 'tesseract.js'
import jimp from 'jimp';
/**
 * 屏幕截图并且转为 base64 返回
 * @returns
 */
export const swapRedAndBlueChannel = () => {
  console.log('开始截图...');
  const screenshot = robot.screen.capture();
  const swapRedAndBlueChannel = (bmp) => {
    for (let i = 0; i < bmp.width * bmp.height * 4; i += 4) {
      // swap red and blue channel
      [bmp.image[i], bmp.image[i + 2]] = [bmp.image[i + 2], bmp.image[i]]; // red channel;
    }
  };
  console.log('截图色彩转换...');
  swapRedAndBlueChannel(screenshot);
  const screenJimp = new jimp({
    data: screenshot.image,
    width: screenshot.width,
    height: screenshot.height,
  });
  screenJimp.write('screenshot.png');
  return new Promise((resolve, reject) => {
    console.log('截图转为BASE64...');
    screenJimp.getBase64(jimp.MIME_PNG, (err, base64) => {
      if (err) throw err;
      console.log('返回BASE64的截图...');
      resolve(base64);
    });
  });
};
/**
 *识别文字并且返回坐标
 * @param {*} base base64 图片
 * @param {*} targetText 要寻找的文本
 * @param {*} language 识别类型
 * chi_sim 中文
 * eng 英文
 * eng+chi_sim 中英文
 */
export const findDeskText = async (base, targetText = '你好', language = 'eng+chi_sim') => {
  console.log('开始识别文本');
  console.log('加载语言...', language);

  // 设置页面分割模式
  // psm 参数控制 tesseract.js 的页面分割模式，它的取值范围是 0 到 13。不是越大越好，而是要根据具体的文本布局来选择合适的值。
  // 不同的取值对应不同的分割模式，例如单个块、单个文本行、单个字等等。
  // 一般来说，如果您的图片包含了整个文档页面，可以尝试使用较大的 psm 值（例如 6 - 13），以便进行整页的文本识别。
  // 如果图片中只包含了单个文本块或者单个行的文本，可以尝试使用较小的 psm 值（例如 3 - 5）来提高识别的准确度。
  // 默认是 3
  const psm = 3; // 默认值
  // 设置 OCR 引擎模式
  // 当涉及 tesseract.js 中的 oem 参数时，它代表 Optical Engine Mode（光学引擎模式），用于控制 OCR 引擎的工作模式。具体来说：
  // 0: Legacy Tesseract only.（仅使用传统的 Tesseract 引擎。）
  // 1: Neural nets LSTM only.（仅使用基于神经网络的 LSTM 引擎。）
  // 2: Legacy Tesseract and LSTM.（同时使用传统的 Tesseract 引擎和 LSTM 引擎。）
  // 3: Default, based on what is available.（默认模式，根据可用情况选择引擎。）
  // 在实际使用中，您可以根据文本识别的需求和具体的场景选择合适的 oem 值。例如，如果您需要更准确的文本识别，可以尝试使用基于神经网络的 LSTM 引擎。
  // 如果需要兼顾速度和准确度，可以选择默认模式或同时使用传统和 LSTM 引擎的模式。
  // 默认是 1
  const oem = 1; // 默认值
  const { data: { text, words } } = await tesseract.recognize(base, language, { psm, oem });
  console.log('文本匹配完成...');
  for (let word of words) {
    const { baseline = {}, text = '', choices = {} } = word;
    const thatWork = text?.trim();
    if (thatWork?.indexOf(targetText) !== -1) {
      console.log(`找到目标`, baseline);
      console.log(`目标坐标`, choices);
      return {
        baseline,
        choices,
        text: targetText
      }
    } else {
      console.log(``);
      console.log(`匹配到文本`, thatWork);
      console.log(``);
    }
  }
  return null;
};

import robot from 'robotjs';

import tesseract from 'tesseract.js'

import jimp from 'jimp';

/**

* 屏幕截图并且转为 base64 返回

* @returns

export const swapRedAndBlueChannel = () => {

console.log('开始截图...');

const screenshot = robot.screen.capture();

const swapRedAndBlueChannel = (bmp) => {

for (let i = 0; i < bmp.width * bmp.height * 4; i += 4) {

// swap red and blue channel

[bmp.image[i], bmp.image[i + 2]] = [bmp.image[i + 2], bmp.image[i]]; // red channel;

}

};

console.log('截图色彩转换...');

swapRedAndBlueChannel(screenshot);

const screenJimp = new jimp({

data: screenshot.image,

width: screenshot.width,

height: screenshot.height,

});

screenJimp.write('screenshot.png');

return new Promise((resolve, reject) => {

console.log('截图转为BASE64...');

screenJimp.getBase64(jimp.MIME_PNG, (err, base64) => {

if (err) throw err;

console.log('返回BASE64的截图...');

resolve(base64);

});

};

/**

*识别文字并且返回坐标

* @param {*} base base64 图片

* @param {*} targetText 要寻找的文本

* @param {*} language 识别类型

* chi_sim 中文

* eng 英文

* eng+chi_sim 中英文

export const findDeskText = async (base, targetText = '你好', language = 'eng+chi_sim') => {

console.log('开始识别文本');

console.log('加载语言...', language);

// 设置页面分割模式

// psm 参数控制 tesseract.js 的页面分割模式，它的取值范围是 0 到 13。不是越大越好，而是要根据具体的文本布局来选择合适的值。

// 不同的取值对应不同的分割模式，例如单个块、单个文本行、单个字等等。

// 一般来说，如果您的图片包含了整个文档页面，可以尝试使用较大的 psm 值（例如 6 - 13），以便进行整页的文本识别。

// 如果图片中只包含了单个文本块或者单个行的文本，可以尝试使用较小的 psm 值（例如 3 - 5）来提高识别的准确度。

// 默认是 3

const psm = 3; // 默认值

// 设置 OCR 引擎模式

// 当涉及 tesseract.js 中的 oem 参数时，它代表 Optical Engine Mode（光学引擎模式），用于控制 OCR 引擎的工作模式。具体来说：

// 0: Legacy Tesseract only.（仅使用传统的 Tesseract 引擎。）

// 1: Neural nets LSTM only.（仅使用基于神经网络的 LSTM 引擎。）

// 2: Legacy Tesseract and LSTM.（同时使用传统的 Tesseract 引擎和 LSTM 引擎。）

// 3: Default, based on what is available.（默认模式，根据可用情况选择引擎。）

// 在实际使用中，您可以根据文本识别的需求和具体的场景选择合适的 oem 值。例如，如果您需要更准确的文本识别，可以尝试使用基于神经网络的 LSTM 引擎。

// 如果需要兼顾速度和准确度，可以选择默认模式或同时使用传统和 LSTM 引擎的模式。

// 默认是 1

const oem = 1; // 默认值

const { data: { text, words } } = await tesseract.recognize(base, language, { psm, oem });

console.log('文本匹配完成...');

for (let word of words) {

const { baseline = {}, text = '', choices = {} } = word;

const thatWork = text?.trim();

if (thatWork?.indexOf(targetText) !== -1) {

console.log(`找到目标`, baseline);

console.log(`目标坐标`, choices);

return {

baseline,

choices,

text: targetText

}

} else {

console.log(``);

console.log(`匹配到文本`, thatWork);

console.log(``);

}

return null;

};

tesseract.js 寻找坐标, 然后 robot.js 鼠标移动过去进行点击!

雅荷心语博客 -心之所向便是光

联系我们关于我们

一	二	三	四	五	六	日
« 六
				1	2	3
4	5	6	7	8	9	10
11	12	13	14	15	16	17
18	19	20	21	22	23	24
25	26	27	28	29	30	31

相关推荐

雅荷心语博客 -心之所向便是光