188 lines
5.0 KiB
TypeScript
188 lines
5.0 KiB
TypeScript
|
|
/**
|
||
|
|
* Baidu OCR Provider
|
||
|
|
* 特点:准确率高、速度快、国内访问快
|
||
|
|
* 缺点:需要付费 (有免费额度)、需要网络连接
|
||
|
|
* 官方文档: https://cloud.baidu.com/doc/OCR/index.html
|
||
|
|
*/
|
||
|
|
|
||
|
|
import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
|
||
|
|
import fs from 'fs';
|
||
|
|
|
||
|
|
interface BaiduOCRResponse {
|
||
|
|
words_result: Array<{
|
||
|
|
words: string;
|
||
|
|
probability?: {
|
||
|
|
average: number;
|
||
|
|
variance: number;
|
||
|
|
min: number;
|
||
|
|
};
|
||
|
|
}>;
|
||
|
|
error_code?: number;
|
||
|
|
error_msg?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
interface BaiduTokenResponse {
|
||
|
|
access_token: string;
|
||
|
|
expires_in: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
export class BaiduProvider extends BaseOCRProvider {
|
||
|
|
private apiKey: string;
|
||
|
|
private secretKey: string;
|
||
|
|
private accessToken: string | null = null;
|
||
|
|
private tokenExpireTime: number = 0;
|
||
|
|
private apiUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic';
|
||
|
|
|
||
|
|
constructor(config: OCRProviderConfig & { apiKey?: string; secretKey?: string } = {}) {
|
||
|
|
super(config);
|
||
|
|
this.apiKey = config.apiKey || process.env.BAIDU_OCR_API_KEY || '';
|
||
|
|
this.secretKey = config.secretKey || process.env.BAIDU_OCR_SECRET_KEY || '';
|
||
|
|
}
|
||
|
|
|
||
|
|
getName(): string {
|
||
|
|
return 'Baidu OCR';
|
||
|
|
}
|
||
|
|
|
||
|
|
getType(): 'local' | 'cloud' {
|
||
|
|
return 'cloud';
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 检查配置是否完整
|
||
|
|
*/
|
||
|
|
async isAvailable(): Promise<boolean> {
|
||
|
|
return !!(this.apiKey && this.secretKey);
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 获取访问令牌
|
||
|
|
*/
|
||
|
|
private async getAccessToken(): Promise<string> {
|
||
|
|
// 检查缓存
|
||
|
|
if (this.accessToken && Date.now() < this.tokenExpireTime) {
|
||
|
|
return this.accessToken;
|
||
|
|
}
|
||
|
|
|
||
|
|
const url = `https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${this.apiKey}&client_secret=${this.secretKey}`;
|
||
|
|
|
||
|
|
const response = await fetch(url);
|
||
|
|
const data = (await response.json()) as BaiduTokenResponse;
|
||
|
|
|
||
|
|
if (!data.access_token) {
|
||
|
|
throw new Error('获取百度 OCR access_token 失败');
|
||
|
|
}
|
||
|
|
|
||
|
|
this.accessToken = data.access_token;
|
||
|
|
this.tokenExpireTime = Date.now() + (data.expires_in - 300) * 1000; // 提前 5 分钟过期
|
||
|
|
|
||
|
|
return this.accessToken;
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 执行 OCR 识别
|
||
|
|
*/
|
||
|
|
async recognize(
|
||
|
|
source: IImageSource,
|
||
|
|
options?: OCRProviderConfig
|
||
|
|
): Promise<OCRRecognitionResult> {
|
||
|
|
const startTime = Date.now();
|
||
|
|
|
||
|
|
// 检查配置
|
||
|
|
const available = await this.isAvailable();
|
||
|
|
if (!available) {
|
||
|
|
throw new Error('百度 OCR 未配置。请设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY 环境变量');
|
||
|
|
}
|
||
|
|
|
||
|
|
// 获取图片 Base64
|
||
|
|
const imageBase64 = await this.getImageBase64(source);
|
||
|
|
|
||
|
|
// 获取访问令牌
|
||
|
|
const token = await this.getAccessToken();
|
||
|
|
|
||
|
|
// 调用 OCR API
|
||
|
|
const response = await this.withTimeout(
|
||
|
|
fetch(`${this.apiUrl}?access_token=${token}`, {
|
||
|
|
method: 'POST',
|
||
|
|
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||
|
|
body: `image=${encodeURIComponent(imageBase64)}`,
|
||
|
|
}),
|
||
|
|
options?.timeout || this.config.timeout || 10000
|
||
|
|
);
|
||
|
|
|
||
|
|
const data = (await response.json()) as BaiduOCRResponse;
|
||
|
|
const duration = Date.now() - startTime;
|
||
|
|
|
||
|
|
// 检查错误
|
||
|
|
if (data.error_code) {
|
||
|
|
throw new Error(`百度 OCR 错误: ${data.error_msg} (${data.error_code})`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 提取文本和置信度
|
||
|
|
const words = data.words_result || [];
|
||
|
|
const text = words.map((w) => w.words).join('\n');
|
||
|
|
|
||
|
|
// 计算平均置信度
|
||
|
|
let confidence = 0.9; // 默认置信度
|
||
|
|
if (words.length > 0 && words[0].probability) {
|
||
|
|
const sum = words.reduce((acc, w) => acc + (w.probability?.average || 0), 0);
|
||
|
|
confidence = (sum / words.length) / 100;
|
||
|
|
}
|
||
|
|
|
||
|
|
return {
|
||
|
|
text: text.trim(),
|
||
|
|
confidence,
|
||
|
|
duration,
|
||
|
|
extra: {
|
||
|
|
provider: 'baidu',
|
||
|
|
wordCount: words.length,
|
||
|
|
},
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
getRecommendations() {
|
||
|
|
return {
|
||
|
|
maxImageSize: 4 * 1024 * 1024, // 4MB 限制
|
||
|
|
supportedFormats: ['jpg', 'jpeg', 'png', 'bmp'],
|
||
|
|
notes: '百度 OCR 标准版 QPS 限制为 2,每日免费额度 1000 次。适合高精度需求场景。',
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 获取图片 Base64
|
||
|
|
*/
|
||
|
|
private async getImageBase64(source: IImageSource): Promise<string> {
|
||
|
|
if (source.base64) {
|
||
|
|
// 移除 data URL 前缀
|
||
|
|
return source.base64.replace(/^data:image\/\w+;base64,/, '');
|
||
|
|
}
|
||
|
|
|
||
|
|
if (source.buffer) {
|
||
|
|
return source.buffer.toString('base64');
|
||
|
|
}
|
||
|
|
|
||
|
|
if (source.path) {
|
||
|
|
// 使用基类的路径解析方法
|
||
|
|
const fullPath = this.resolveImagePath(source.path);
|
||
|
|
const buffer = fs.readFileSync(fullPath);
|
||
|
|
return buffer.toString('base64');
|
||
|
|
}
|
||
|
|
|
||
|
|
throw new Error('无效的图片来源');
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* 超时包装
|
||
|
|
*/
|
||
|
|
private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
|
||
|
|
return Promise.race([
|
||
|
|
promise,
|
||
|
|
new Promise<never>((_, reject) =>
|
||
|
|
setTimeout(() => reject(new Error('timeout')), timeout)
|
||
|
|
),
|
||
|
|
]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 导出单例实例
|
||
|
|
export const baiduProvider = new BaiduProvider();
|