## 主要变更 ### OCR 架构 - 新增多提供商 OCR 系统 (Tesseract.js, Baidu OCR, RapidOCR) - 添加 Provider 基类接口和工厂模式 - 支持 provider 自动选择和降级处理 - 新增 RapidOCR Python HTTP 服务 (端口 8080) ### 路径修复 - 修复 Windows 平台路径解析问题 - 统一路径处理工具 (lib/path.ts) - 修复 uploads 目录定位问题 ### 设置页面重构 - 三个标签页:API 配置、OCR 配置、AI 配置 - API 服务器地址配置 - OCR 服务商配置(Tesseract.js, RapidOCR, 百度 OCR) - AI 服务商配置(智谱 GLM, MiniMax, DeepSeek, Kimi, OpenAI, Anthropic) ### 端口配置 - 前端端口: 13056 - 后端端口: 13057 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
188 lines
5.0 KiB
TypeScript
188 lines
5.0 KiB
TypeScript
/**
|
||
* Baidu OCR Provider
|
||
* 特点:准确率高、速度快、国内访问快
|
||
* 缺点:需要付费 (有免费额度)、需要网络连接
|
||
* 官方文档: https://cloud.baidu.com/doc/OCR/index.html
|
||
*/
|
||
|
||
import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
|
||
import fs from 'fs';
|
||
|
||
interface BaiduOCRResponse {
|
||
words_result: Array<{
|
||
words: string;
|
||
probability?: {
|
||
average: number;
|
||
variance: number;
|
||
min: number;
|
||
};
|
||
}>;
|
||
error_code?: number;
|
||
error_msg?: string;
|
||
}
|
||
|
||
interface BaiduTokenResponse {
|
||
access_token: string;
|
||
expires_in: number;
|
||
}
|
||
|
||
export class BaiduProvider extends BaseOCRProvider {
|
||
private apiKey: string;
|
||
private secretKey: string;
|
||
private accessToken: string | null = null;
|
||
private tokenExpireTime: number = 0;
|
||
private apiUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic';
|
||
|
||
constructor(config: OCRProviderConfig & { apiKey?: string; secretKey?: string } = {}) {
|
||
super(config);
|
||
this.apiKey = config.apiKey || process.env.BAIDU_OCR_API_KEY || '';
|
||
this.secretKey = config.secretKey || process.env.BAIDU_OCR_SECRET_KEY || '';
|
||
}
|
||
|
||
getName(): string {
|
||
return 'Baidu OCR';
|
||
}
|
||
|
||
getType(): 'local' | 'cloud' {
|
||
return 'cloud';
|
||
}
|
||
|
||
/**
|
||
* 检查配置是否完整
|
||
*/
|
||
async isAvailable(): Promise<boolean> {
|
||
return !!(this.apiKey && this.secretKey);
|
||
}
|
||
|
||
/**
|
||
* 获取访问令牌
|
||
*/
|
||
private async getAccessToken(): Promise<string> {
|
||
// 检查缓存
|
||
if (this.accessToken && Date.now() < this.tokenExpireTime) {
|
||
return this.accessToken;
|
||
}
|
||
|
||
const url = `https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${this.apiKey}&client_secret=${this.secretKey}`;
|
||
|
||
const response = await fetch(url);
|
||
const data = (await response.json()) as BaiduTokenResponse;
|
||
|
||
if (!data.access_token) {
|
||
throw new Error('获取百度 OCR access_token 失败');
|
||
}
|
||
|
||
this.accessToken = data.access_token;
|
||
this.tokenExpireTime = Date.now() + (data.expires_in - 300) * 1000; // 提前 5 分钟过期
|
||
|
||
return this.accessToken;
|
||
}
|
||
|
||
/**
|
||
* 执行 OCR 识别
|
||
*/
|
||
async recognize(
|
||
source: IImageSource,
|
||
options?: OCRProviderConfig
|
||
): Promise<OCRRecognitionResult> {
|
||
const startTime = Date.now();
|
||
|
||
// 检查配置
|
||
const available = await this.isAvailable();
|
||
if (!available) {
|
||
throw new Error('百度 OCR 未配置。请设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY 环境变量');
|
||
}
|
||
|
||
// 获取图片 Base64
|
||
const imageBase64 = await this.getImageBase64(source);
|
||
|
||
// 获取访问令牌
|
||
const token = await this.getAccessToken();
|
||
|
||
// 调用 OCR API
|
||
const response = await this.withTimeout(
|
||
fetch(`${this.apiUrl}?access_token=${token}`, {
|
||
method: 'POST',
|
||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||
body: `image=${encodeURIComponent(imageBase64)}`,
|
||
}),
|
||
options?.timeout || this.config.timeout || 10000
|
||
);
|
||
|
||
const data = (await response.json()) as BaiduOCRResponse;
|
||
const duration = Date.now() - startTime;
|
||
|
||
// 检查错误
|
||
if (data.error_code) {
|
||
throw new Error(`百度 OCR 错误: ${data.error_msg} (${data.error_code})`);
|
||
}
|
||
|
||
// 提取文本和置信度
|
||
const words = data.words_result || [];
|
||
const text = words.map((w) => w.words).join('\n');
|
||
|
||
// 计算平均置信度
|
||
let confidence = 0.9; // 默认置信度
|
||
if (words.length > 0 && words[0].probability) {
|
||
const sum = words.reduce((acc, w) => acc + (w.probability?.average || 0), 0);
|
||
confidence = (sum / words.length) / 100;
|
||
}
|
||
|
||
return {
|
||
text: text.trim(),
|
||
confidence,
|
||
duration,
|
||
extra: {
|
||
provider: 'baidu',
|
||
wordCount: words.length,
|
||
},
|
||
};
|
||
}
|
||
|
||
getRecommendations() {
|
||
return {
|
||
maxImageSize: 4 * 1024 * 1024, // 4MB 限制
|
||
supportedFormats: ['jpg', 'jpeg', 'png', 'bmp'],
|
||
notes: '百度 OCR 标准版 QPS 限制为 2,每日免费额度 1000 次。适合高精度需求场景。',
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 获取图片 Base64
|
||
*/
|
||
private async getImageBase64(source: IImageSource): Promise<string> {
|
||
if (source.base64) {
|
||
// 移除 data URL 前缀
|
||
return source.base64.replace(/^data:image\/\w+;base64,/, '');
|
||
}
|
||
|
||
if (source.buffer) {
|
||
return source.buffer.toString('base64');
|
||
}
|
||
|
||
if (source.path) {
|
||
// 使用基类的路径解析方法
|
||
const fullPath = this.resolveImagePath(source.path);
|
||
const buffer = fs.readFileSync(fullPath);
|
||
return buffer.toString('base64');
|
||
}
|
||
|
||
throw new Error('无效的图片来源');
|
||
}
|
||
|
||
/**
|
||
* 超时包装
|
||
*/
|
||
private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
|
||
return Promise.race([
|
||
promise,
|
||
new Promise<never>((_, reject) =>
|
||
setTimeout(() => reject(new Error('timeout')), timeout)
|
||
),
|
||
]);
|
||
}
|
||
}
|
||
|
||
// 导出单例实例
|
||
export const baiduProvider = new BaiduProvider();
|