feat: 实现多 OCR 提供商架构和完整设置页面
## 主要变更 ### OCR 架构 - 新增多提供商 OCR 系统 (Tesseract.js, Baidu OCR, RapidOCR) - 添加 Provider 基类接口和工厂模式 - 支持 provider 自动选择和降级处理 - 新增 RapidOCR Python HTTP 服务 (端口 8080) ### 路径修复 - 修复 Windows 平台路径解析问题 - 统一路径处理工具 (lib/path.ts) - 修复 uploads 目录定位问题 ### 设置页面重构 - 三个标签页:API 配置、OCR 配置、AI 配置 - API 服务器地址配置 - OCR 服务商配置(Tesseract.js, RapidOCR, 百度 OCR) - AI 服务商配置(智谱 GLM, MiniMax, DeepSeek, Kimi, OpenAI, Anthropic) ### 端口配置 - 前端端口: 13056 - 后端端口: 13057 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,187 @@
|
||||
/**
|
||||
* Baidu OCR Provider
|
||||
* 特点:准确率高、速度快、国内访问快
|
||||
* 缺点:需要付费 (有免费额度)、需要网络连接
|
||||
* 官方文档: https://cloud.baidu.com/doc/OCR/index.html
|
||||
*/
|
||||
|
||||
import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
|
||||
import fs from 'fs';
|
||||
|
||||
interface BaiduOCRResponse {
|
||||
words_result: Array<{
|
||||
words: string;
|
||||
probability?: {
|
||||
average: number;
|
||||
variance: number;
|
||||
min: number;
|
||||
};
|
||||
}>;
|
||||
error_code?: number;
|
||||
error_msg?: string;
|
||||
}
|
||||
|
||||
interface BaiduTokenResponse {
|
||||
access_token: string;
|
||||
expires_in: number;
|
||||
}
|
||||
|
||||
export class BaiduProvider extends BaseOCRProvider {
|
||||
private apiKey: string;
|
||||
private secretKey: string;
|
||||
private accessToken: string | null = null;
|
||||
private tokenExpireTime: number = 0;
|
||||
private apiUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic';
|
||||
|
||||
constructor(config: OCRProviderConfig & { apiKey?: string; secretKey?: string } = {}) {
|
||||
super(config);
|
||||
this.apiKey = config.apiKey || process.env.BAIDU_OCR_API_KEY || '';
|
||||
this.secretKey = config.secretKey || process.env.BAIDU_OCR_SECRET_KEY || '';
|
||||
}
|
||||
|
||||
getName(): string {
|
||||
return 'Baidu OCR';
|
||||
}
|
||||
|
||||
getType(): 'local' | 'cloud' {
|
||||
return 'cloud';
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查配置是否完整
|
||||
*/
|
||||
async isAvailable(): Promise<boolean> {
|
||||
return !!(this.apiKey && this.secretKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取访问令牌
|
||||
*/
|
||||
private async getAccessToken(): Promise<string> {
|
||||
// 检查缓存
|
||||
if (this.accessToken && Date.now() < this.tokenExpireTime) {
|
||||
return this.accessToken;
|
||||
}
|
||||
|
||||
const url = `https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${this.apiKey}&client_secret=${this.secretKey}`;
|
||||
|
||||
const response = await fetch(url);
|
||||
const data = (await response.json()) as BaiduTokenResponse;
|
||||
|
||||
if (!data.access_token) {
|
||||
throw new Error('获取百度 OCR access_token 失败');
|
||||
}
|
||||
|
||||
this.accessToken = data.access_token;
|
||||
this.tokenExpireTime = Date.now() + (data.expires_in - 300) * 1000; // 提前 5 分钟过期
|
||||
|
||||
return this.accessToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 OCR 识别
|
||||
*/
|
||||
async recognize(
|
||||
source: IImageSource,
|
||||
options?: OCRProviderConfig
|
||||
): Promise<OCRRecognitionResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// 检查配置
|
||||
const available = await this.isAvailable();
|
||||
if (!available) {
|
||||
throw new Error('百度 OCR 未配置。请设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY 环境变量');
|
||||
}
|
||||
|
||||
// 获取图片 Base64
|
||||
const imageBase64 = await this.getImageBase64(source);
|
||||
|
||||
// 获取访问令牌
|
||||
const token = await this.getAccessToken();
|
||||
|
||||
// 调用 OCR API
|
||||
const response = await this.withTimeout(
|
||||
fetch(`${this.apiUrl}?access_token=${token}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
body: `image=${encodeURIComponent(imageBase64)}`,
|
||||
}),
|
||||
options?.timeout || this.config.timeout || 10000
|
||||
);
|
||||
|
||||
const data = (await response.json()) as BaiduOCRResponse;
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// 检查错误
|
||||
if (data.error_code) {
|
||||
throw new Error(`百度 OCR 错误: ${data.error_msg} (${data.error_code})`);
|
||||
}
|
||||
|
||||
// 提取文本和置信度
|
||||
const words = data.words_result || [];
|
||||
const text = words.map((w) => w.words).join('\n');
|
||||
|
||||
// 计算平均置信度
|
||||
let confidence = 0.9; // 默认置信度
|
||||
if (words.length > 0 && words[0].probability) {
|
||||
const sum = words.reduce((acc, w) => acc + (w.probability?.average || 0), 0);
|
||||
confidence = (sum / words.length) / 100;
|
||||
}
|
||||
|
||||
return {
|
||||
text: text.trim(),
|
||||
confidence,
|
||||
duration,
|
||||
extra: {
|
||||
provider: 'baidu',
|
||||
wordCount: words.length,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
getRecommendations() {
|
||||
return {
|
||||
maxImageSize: 4 * 1024 * 1024, // 4MB 限制
|
||||
supportedFormats: ['jpg', 'jpeg', 'png', 'bmp'],
|
||||
notes: '百度 OCR 标准版 QPS 限制为 2,每日免费额度 1000 次。适合高精度需求场景。',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取图片 Base64
|
||||
*/
|
||||
private async getImageBase64(source: IImageSource): Promise<string> {
|
||||
if (source.base64) {
|
||||
// 移除 data URL 前缀
|
||||
return source.base64.replace(/^data:image\/\w+;base64,/, '');
|
||||
}
|
||||
|
||||
if (source.buffer) {
|
||||
return source.buffer.toString('base64');
|
||||
}
|
||||
|
||||
if (source.path) {
|
||||
// 使用基类的路径解析方法
|
||||
const fullPath = this.resolveImagePath(source.path);
|
||||
const buffer = fs.readFileSync(fullPath);
|
||||
return buffer.toString('base64');
|
||||
}
|
||||
|
||||
throw new Error('无效的图片来源');
|
||||
}
|
||||
|
||||
/**
|
||||
* 超时包装
|
||||
*/
|
||||
private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
|
||||
return Promise.race([
|
||||
promise,
|
||||
new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('timeout')), timeout)
|
||||
),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例实例
|
||||
export const baiduProvider = new BaiduProvider();
|
||||
Reference in New Issue
Block a user