feat: 实现多 OCR 提供商架构和完整设置页面
## 主要变更 ### OCR 架构 - 新增多提供商 OCR 系统 (Tesseract.js, Baidu OCR, RapidOCR) - 添加 Provider 基类接口和工厂模式 - 支持 provider 自动选择和降级处理 - 新增 RapidOCR Python HTTP 服务 (端口 8080) ### 路径修复 - 修复 Windows 平台路径解析问题 - 统一路径处理工具 (lib/path.ts) - 修复 uploads 目录定位问题 ### 设置页面重构 - 三个标签页:API 配置、OCR 配置、AI 配置 - API 服务器地址配置 - OCR 服务商配置(Tesseract.js, RapidOCR, 百度 OCR) - AI 服务商配置(智谱 GLM, MiniMax, DeepSeek, Kimi, OpenAI, Anthropic) ### 端口配置 - 前端端口: 13056 - 后端端口: 13057 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,11 +6,16 @@ import express from 'express';
|
||||
import cors from 'cors';
|
||||
import dotenv from 'dotenv';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
import authRoutes from './routes/auth.routes';
|
||||
import documentRoutes from './routes/document.routes';
|
||||
import todoRoutes from './routes/todo.routes';
|
||||
import imageRoutes from './routes/image.routes';
|
||||
|
||||
// 获取当前文件的目录
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// Load environment variables
|
||||
dotenv.config();
|
||||
|
||||
@@ -24,8 +29,8 @@ app.use(cors({
|
||||
app.use(express.json());
|
||||
app.use(express.urlencoded({ extended: true }));
|
||||
|
||||
// Static files for uploads
|
||||
app.use('/uploads', express.static(path.join(process.cwd(), 'uploads')));
|
||||
// Static files for uploads (使用绝对路径指向 backend/uploads)
|
||||
app.use('/uploads', express.static(path.join(__dirname, '..', 'uploads')));
|
||||
|
||||
// Health check
|
||||
app.get('/api/health', (_req, res) => {
|
||||
|
||||
84
backend/src/lib/path.ts
Normal file
84
backend/src/lib/path.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
/**
|
||||
* 路径解析工具
|
||||
* 解决开发环境下路径解析问题
|
||||
*/
|
||||
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
/**
|
||||
* 获取项目根目录
|
||||
* 通过从当前文件向上查找 package.json 来确定
|
||||
*/
|
||||
export function getProjectRoot(): string {
|
||||
// 在开发环境使用 tsx 时,使用 process.cwd()
|
||||
// 在构建后的环境,使用 __dirname 的方式
|
||||
let currentDir: string;
|
||||
|
||||
try {
|
||||
// ESM 模式下获取当前文件目录
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
currentDir = path.dirname(__filename);
|
||||
} catch {
|
||||
// 回退到 process.cwd()
|
||||
currentDir = process.cwd();
|
||||
}
|
||||
|
||||
// Windows 路径处理(去除开头的 /)
|
||||
if (process.platform === 'win32' && currentDir.startsWith('/') && /^[a-zA-Z]:/.test(currentDir.slice(1))) {
|
||||
currentDir = currentDir.substring(1);
|
||||
}
|
||||
|
||||
// 从当前目录向上查找 package.json
|
||||
let searchDir = currentDir;
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const pkgPath = path.join(searchDir, 'package.json');
|
||||
if (fs.existsSync(pkgPath)) {
|
||||
return searchDir;
|
||||
}
|
||||
searchDir = path.dirname(searchDir);
|
||||
}
|
||||
|
||||
// 如果找不到,回退到 process.cwd()
|
||||
return process.cwd();
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取上传目录的绝对路径
|
||||
*/
|
||||
export function getUploadsDir(): string {
|
||||
const projectRoot = getProjectRoot();
|
||||
return path.join(projectRoot, 'uploads');
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析图片路径
|
||||
* 将数据库中存储的路径 (/uploads/xxx.png) 解析为绝对路径
|
||||
*/
|
||||
export function resolveImagePath(imagePath: string): string {
|
||||
// 在 Windows 上,path.isAbsolute 会将 /uploads/... 认为是绝对路径
|
||||
// 但这实际上是 Unix 风格的相对路径,需要特殊处理
|
||||
const isWindowsAbsPath = process.platform === 'win32'
|
||||
? /^[a-zA-Z]:\\/.test(imagePath) // Windows 真正的绝对路径如 C:\
|
||||
: path.isAbsolute(imagePath);
|
||||
|
||||
if (isWindowsAbsPath) {
|
||||
return imagePath;
|
||||
}
|
||||
|
||||
// 处理 /uploads/ 开头的相对路径
|
||||
if (imagePath.startsWith('/uploads/')) {
|
||||
return path.join(getUploadsDir(), imagePath.replace('/uploads/', ''));
|
||||
}
|
||||
|
||||
// 其他相对路径,使用项目根目录
|
||||
return path.join(getProjectRoot(), imagePath);
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成存储到数据库的路径
|
||||
*/
|
||||
export function generateDbPath(filename: string): string {
|
||||
return `/uploads/${filename}`;
|
||||
}
|
||||
@@ -5,9 +5,16 @@
|
||||
import multer from 'multer';
|
||||
import path from 'path';
|
||||
import fs from 'fs';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
// 获取当前文件的目录
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// uploads 目录放在 backend 根目录下
|
||||
const uploadDir = path.join(__dirname, '..', '..', 'uploads');
|
||||
|
||||
// Ensure upload directory exists
|
||||
const uploadDir = path.join(process.cwd(), 'uploads');
|
||||
if (!fs.existsSync(uploadDir)) {
|
||||
fs.mkdirSync(uploadDir, { recursive: true });
|
||||
}
|
||||
@@ -51,3 +58,6 @@ export const upload = multer({
|
||||
fileSize: 10 * 1024 * 1024, // 10MB limit
|
||||
},
|
||||
});
|
||||
|
||||
// 导出上传目录路径供其他模块使用
|
||||
export { uploadDir };
|
||||
|
||||
@@ -7,7 +7,8 @@ import { Router } from 'express';
|
||||
import { ImageController } from '../controllers/image.controller';
|
||||
import { authenticate } from '../middleware/auth.middleware';
|
||||
import { upload } from '../middleware/upload.middleware';
|
||||
import { triggerOCRProcessing } from '../services/ocr-processor.service';
|
||||
import { triggerOCRProcessing, OCRProcessorService } from '../services/ocr-processor.service';
|
||||
import { OCRProviderType } from '../services/ocr-providers';
|
||||
|
||||
const router = Router();
|
||||
|
||||
@@ -32,6 +33,27 @@ router.get('/', authenticate, ImageController.getUserImages);
|
||||
*/
|
||||
router.get('/pending', authenticate, ImageController.getPending);
|
||||
|
||||
/**
|
||||
* @route GET /api/images/ocr/providers
|
||||
* @desc Get available OCR providers
|
||||
* @access Private
|
||||
*/
|
||||
router.get('/ocr/providers', authenticate, async (_req, res) => {
|
||||
try {
|
||||
const providers = await OCRProcessorService.getAvailableProviders();
|
||||
res.json({
|
||||
success: true,
|
||||
data: providers,
|
||||
});
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : '获取 OCR 提供商失败';
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* @route GET /api/images/:id
|
||||
* @desc Get image by ID
|
||||
@@ -43,14 +65,16 @@ router.get('/:id', authenticate, ImageController.getById);
|
||||
* @route POST /api/images/:id/reprocess
|
||||
* @desc Re-trigger OCR processing
|
||||
* @access Private
|
||||
* @body { provider?: 'tesseract' | 'baidu' | 'rapidocr' | 'auto' }
|
||||
*/
|
||||
router.post('/:id/reprocess', authenticate, async (req, res) => {
|
||||
try {
|
||||
const userId = req.user!.user_id;
|
||||
const { id } = req.params;
|
||||
const userId = typeof req.user!.user_id === 'string' ? req.user!.user_id : String(req.user!.user_id);
|
||||
const id = typeof req.params.id === 'string' ? req.params.id : req.params.id[0];
|
||||
const { provider } = req.body;
|
||||
|
||||
// 触发 OCR 处理
|
||||
triggerOCRProcessing(id, userId);
|
||||
triggerOCRProcessing(id, userId, { provider: provider as OCRProviderType });
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
|
||||
@@ -1,20 +1,36 @@
|
||||
/**
|
||||
* OCR Processor Service
|
||||
* 处理图片 OCR 识别的异步服务
|
||||
* 支持多种 OCR Provider: Tesseract.js, Baidu OCR, RapidOCR
|
||||
*/
|
||||
|
||||
import { prisma } from '../lib/prisma';
|
||||
import { ImageService } from './image.service';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import {
|
||||
OCRProviderFactory,
|
||||
OCRProviderType,
|
||||
IImageSource,
|
||||
} from './ocr-providers';
|
||||
|
||||
export interface OCRProcessOptions {
|
||||
/** 指定 OCR Provider */
|
||||
provider?: OCRProviderType;
|
||||
/** 置信度阈值 */
|
||||
confidenceThreshold?: number;
|
||||
}
|
||||
|
||||
export class OCRProcessorService {
|
||||
/**
|
||||
* 处理图片的 OCR 识别
|
||||
* 注意:当前是模拟实现,返回占位符文本
|
||||
* 实际使用时需要集成 Tesseract.js 或其他 OCR 服务
|
||||
* @param imageId 图片 ID
|
||||
* @param userId 用户 ID
|
||||
* @param options 处理选项
|
||||
*/
|
||||
static async processImage(imageId: string, userId: string): Promise<void> {
|
||||
static async processImage(
|
||||
imageId: string,
|
||||
userId: string,
|
||||
options: OCRProcessOptions = {}
|
||||
): Promise<void> {
|
||||
try {
|
||||
// 更新状态为处理中
|
||||
await prisma.image.update({
|
||||
@@ -28,12 +44,14 @@ export class OCRProcessorService {
|
||||
throw new Error('Image not found');
|
||||
}
|
||||
|
||||
// TODO: 集成真实的 OCR 服务
|
||||
// 当前使用模拟实现
|
||||
const ocrResult = await this.performOCRSimulated(image);
|
||||
// 执行 OCR 识别
|
||||
const ocrResult = await this.performOCR(image, options);
|
||||
|
||||
// 根据置信度决定状态
|
||||
const status = ocrResult.confidence >= 0.3 ? 'completed' : 'failed';
|
||||
const threshold = options.confidenceThreshold
|
||||
? parseFloat(options.confidenceThreshold.toString())
|
||||
: parseFloat(process.env.OCR_CONFIDENCE_THRESHOLD || '0.3');
|
||||
const status = ocrResult.confidence >= threshold ? 'completed' : 'failed';
|
||||
|
||||
await prisma.image.update({
|
||||
where: { id: imageId },
|
||||
@@ -57,109 +75,106 @@ export class OCRProcessorService {
|
||||
}
|
||||
|
||||
/**
|
||||
* 模拟 OCR 处理
|
||||
* 实际实现应该调用 Tesseract.js 或其他 OCR API
|
||||
* 执行 OCR 识别
|
||||
* @param image 图片信息
|
||||
* @param options 处理选项
|
||||
*/
|
||||
private static async performOCRSimulated(image: any): Promise<{
|
||||
text: string;
|
||||
confidence: number;
|
||||
}> {
|
||||
// 模拟处理延迟
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
private static async performOCR(
|
||||
image: any,
|
||||
options: OCRProcessOptions = {}
|
||||
): Promise<{ text: string; confidence: number }> {
|
||||
// 获取 OCR Provider
|
||||
const providerType = options.provider || (process.env.OCR_PROVIDER as OCRProviderType) || 'auto';
|
||||
const provider = OCRProviderFactory.create(providerType);
|
||||
|
||||
// TODO: 实际 OCR 集成选项:
|
||||
// 1. Tesseract.js (本地)
|
||||
// import Tesseract from 'tesseract.js';
|
||||
// const { data: { text, confidence } } = await Tesseract.recognize(imagePath, 'chi_sim+eng');
|
||||
//
|
||||
// 2. PaddleOCR (需要 Python 服务)
|
||||
// const response = await fetch('http://localhost:5000/ocr', {
|
||||
// method: 'POST',
|
||||
// body: JSON.stringify({ image_path: imagePath }),
|
||||
// });
|
||||
//
|
||||
// 3. 云端 OCR API (百度/腾讯/阿里)
|
||||
// 检查 provider 是否可用
|
||||
const available = await provider.isAvailable();
|
||||
if (!available) {
|
||||
throw new Error(
|
||||
`OCR Provider "${provider.getName()}" 不可用。` +
|
||||
`请检查配置或安装相应的依赖。`
|
||||
);
|
||||
}
|
||||
|
||||
// 准备图片来源
|
||||
const source: IImageSource = {
|
||||
path: image.file_path,
|
||||
};
|
||||
|
||||
// 执行识别
|
||||
const result = await provider.recognize(source);
|
||||
|
||||
console.log(
|
||||
`[OCR] Provider: ${provider.getName()}, ` +
|
||||
`Confidence: ${(result.confidence * 100).toFixed(1)}%, ` +
|
||||
`Duration: ${result.duration}ms, ` +
|
||||
`Text length: ${result.text.length}`
|
||||
);
|
||||
|
||||
// 模拟返回结果
|
||||
return {
|
||||
text: '[模拟 OCR 结果] 图片文字识别功能尚未集成。请在设置页面配置 OCR 服务后重试。',
|
||||
confidence: 0.5,
|
||||
text: result.text,
|
||||
confidence: result.confidence,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用 Tesseract.js 进行 OCR 识别(需要安装依赖)
|
||||
* 获取所有可用的 OCR Providers
|
||||
*/
|
||||
private static async performOCRWithTesseract(imagePath: string): Promise<{
|
||||
text: string;
|
||||
confidence: number;
|
||||
}> {
|
||||
// 动态导入 Tesseract(如果已安装)
|
||||
try {
|
||||
const Tesseract = await import('tesseract.js');
|
||||
|
||||
// 检查文件是否存在
|
||||
const fullPath = path.join(process.cwd(), imagePath.replace('/uploads/', 'uploads/'));
|
||||
if (!fs.existsSync(fullPath)) {
|
||||
throw new Error(`Image file not found: ${fullPath}`);
|
||||
}
|
||||
|
||||
const result = await Tesseract.recognize(fullPath, 'chi_sim+eng', {
|
||||
logger: (m: any) => console.log(m),
|
||||
});
|
||||
|
||||
return {
|
||||
text: result.data.text,
|
||||
confidence: result.data.confidence / 100, // Tesseract 返回 0-100,需要转换为 0-1
|
||||
};
|
||||
} catch (error) {
|
||||
// 如果 Tesseract 未安装,返回模拟结果
|
||||
console.warn('Tesseract.js not installed, using simulated OCR:', error);
|
||||
return this.performOCRSimulated(null);
|
||||
}
|
||||
static async getAvailableProviders(): Promise<
|
||||
Array<{ type: string; name: string; available: boolean; typeDesc: string }>
|
||||
> {
|
||||
return OCRProviderFactory.getAvailableProviders();
|
||||
}
|
||||
|
||||
/**
|
||||
* 调用外部 OCR API(示例)
|
||||
* 测试指定的 OCR Provider
|
||||
*/
|
||||
private static async performOCRWithAPI(imagePath: string): Promise<{
|
||||
text: string;
|
||||
confidence: number;
|
||||
static async testProvider(
|
||||
providerType: OCRProviderType,
|
||||
imagePath: string
|
||||
): Promise<{
|
||||
success: boolean;
|
||||
result?: { text: string; confidence: number; duration: number };
|
||||
error?: string;
|
||||
}> {
|
||||
// 示例:调用百度 OCR API
|
||||
// const apiKey = process.env.BAIDU_OCR_API_KEY;
|
||||
// const secretKey = process.env.BAIDU_OCR_SECRET_KEY;
|
||||
//
|
||||
// // 获取 access token
|
||||
// const tokenResponse = await fetch(`https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${apiKey}&client_secret=${secretKey}`);
|
||||
// const { access_token } = await tokenResponse.json();
|
||||
//
|
||||
// // 读取图片并转为 base64
|
||||
// const imageBuffer = fs.readFileSync(imagePath);
|
||||
// const imageBase64 = imageBuffer.toString('base64');
|
||||
//
|
||||
// // 调用 OCR API
|
||||
// const ocrResponse = await fetch(`https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=${access_token}`, {
|
||||
// method: 'POST',
|
||||
// headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
// body: `image=${encodeURIComponent(imageBase64)}`,
|
||||
// });
|
||||
//
|
||||
// const result = await ocrResponse.json();
|
||||
//
|
||||
// return {
|
||||
// text: result.words_result?.map((w: any) => w.words).join('\n') || '',
|
||||
// confidence: (result.words_result?.[0]?.probability?.average || 0.5) / 100,
|
||||
// };
|
||||
try {
|
||||
const provider = OCRProviderFactory.create(providerType);
|
||||
const available = await provider.isAvailable();
|
||||
|
||||
throw new Error('OCR API not configured');
|
||||
if (!available) {
|
||||
return {
|
||||
success: false,
|
||||
error: `Provider "${provider.getName()}" 不可用`,
|
||||
};
|
||||
}
|
||||
|
||||
const result = await provider.recognize({ path: imagePath });
|
||||
|
||||
return {
|
||||
success: true,
|
||||
result: {
|
||||
text: result.text,
|
||||
confidence: result.confidence,
|
||||
duration: result.duration || 0,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 导出异步处理函数(用于在后台触发 OCR)
|
||||
export const triggerOCRProcessing = async (imageId: string, userId: string) => {
|
||||
export const triggerOCRProcessing = async (
|
||||
imageId: string,
|
||||
userId: string,
|
||||
options?: OCRProcessOptions
|
||||
) => {
|
||||
// 不等待完成,在后台处理
|
||||
OCRProcessorService.processImage(imageId, userId).catch(error => {
|
||||
OCRProcessorService.processImage(imageId, userId, options).catch((error) => {
|
||||
console.error('OCR processing failed:', error);
|
||||
});
|
||||
};
|
||||
|
||||
187
backend/src/services/ocr-providers/baidu.provider.ts
Normal file
187
backend/src/services/ocr-providers/baidu.provider.ts
Normal file
@@ -0,0 +1,187 @@
|
||||
/**
|
||||
* Baidu OCR Provider
|
||||
* 特点:准确率高、速度快、国内访问快
|
||||
* 缺点:需要付费 (有免费额度)、需要网络连接
|
||||
* 官方文档: https://cloud.baidu.com/doc/OCR/index.html
|
||||
*/
|
||||
|
||||
import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
|
||||
import fs from 'fs';
|
||||
|
||||
interface BaiduOCRResponse {
|
||||
words_result: Array<{
|
||||
words: string;
|
||||
probability?: {
|
||||
average: number;
|
||||
variance: number;
|
||||
min: number;
|
||||
};
|
||||
}>;
|
||||
error_code?: number;
|
||||
error_msg?: string;
|
||||
}
|
||||
|
||||
interface BaiduTokenResponse {
|
||||
access_token: string;
|
||||
expires_in: number;
|
||||
}
|
||||
|
||||
export class BaiduProvider extends BaseOCRProvider {
|
||||
private apiKey: string;
|
||||
private secretKey: string;
|
||||
private accessToken: string | null = null;
|
||||
private tokenExpireTime: number = 0;
|
||||
private apiUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic';
|
||||
|
||||
constructor(config: OCRProviderConfig & { apiKey?: string; secretKey?: string } = {}) {
|
||||
super(config);
|
||||
this.apiKey = config.apiKey || process.env.BAIDU_OCR_API_KEY || '';
|
||||
this.secretKey = config.secretKey || process.env.BAIDU_OCR_SECRET_KEY || '';
|
||||
}
|
||||
|
||||
getName(): string {
|
||||
return 'Baidu OCR';
|
||||
}
|
||||
|
||||
getType(): 'local' | 'cloud' {
|
||||
return 'cloud';
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查配置是否完整
|
||||
*/
|
||||
async isAvailable(): Promise<boolean> {
|
||||
return !!(this.apiKey && this.secretKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取访问令牌
|
||||
*/
|
||||
private async getAccessToken(): Promise<string> {
|
||||
// 检查缓存
|
||||
if (this.accessToken && Date.now() < this.tokenExpireTime) {
|
||||
return this.accessToken;
|
||||
}
|
||||
|
||||
const url = `https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${this.apiKey}&client_secret=${this.secretKey}`;
|
||||
|
||||
const response = await fetch(url);
|
||||
const data = (await response.json()) as BaiduTokenResponse;
|
||||
|
||||
if (!data.access_token) {
|
||||
throw new Error('获取百度 OCR access_token 失败');
|
||||
}
|
||||
|
||||
this.accessToken = data.access_token;
|
||||
this.tokenExpireTime = Date.now() + (data.expires_in - 300) * 1000; // 提前 5 分钟过期
|
||||
|
||||
return this.accessToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 OCR 识别
|
||||
*/
|
||||
async recognize(
|
||||
source: IImageSource,
|
||||
options?: OCRProviderConfig
|
||||
): Promise<OCRRecognitionResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// 检查配置
|
||||
const available = await this.isAvailable();
|
||||
if (!available) {
|
||||
throw new Error('百度 OCR 未配置。请设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY 环境变量');
|
||||
}
|
||||
|
||||
// 获取图片 Base64
|
||||
const imageBase64 = await this.getImageBase64(source);
|
||||
|
||||
// 获取访问令牌
|
||||
const token = await this.getAccessToken();
|
||||
|
||||
// 调用 OCR API
|
||||
const response = await this.withTimeout(
|
||||
fetch(`${this.apiUrl}?access_token=${token}`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
body: `image=${encodeURIComponent(imageBase64)}`,
|
||||
}),
|
||||
options?.timeout || this.config.timeout || 10000
|
||||
);
|
||||
|
||||
const data = (await response.json()) as BaiduOCRResponse;
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// 检查错误
|
||||
if (data.error_code) {
|
||||
throw new Error(`百度 OCR 错误: ${data.error_msg} (${data.error_code})`);
|
||||
}
|
||||
|
||||
// 提取文本和置信度
|
||||
const words = data.words_result || [];
|
||||
const text = words.map((w) => w.words).join('\n');
|
||||
|
||||
// 计算平均置信度
|
||||
let confidence = 0.9; // 默认置信度
|
||||
if (words.length > 0 && words[0].probability) {
|
||||
const sum = words.reduce((acc, w) => acc + (w.probability?.average || 0), 0);
|
||||
confidence = (sum / words.length) / 100;
|
||||
}
|
||||
|
||||
return {
|
||||
text: text.trim(),
|
||||
confidence,
|
||||
duration,
|
||||
extra: {
|
||||
provider: 'baidu',
|
||||
wordCount: words.length,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
getRecommendations() {
|
||||
return {
|
||||
maxImageSize: 4 * 1024 * 1024, // 4MB 限制
|
||||
supportedFormats: ['jpg', 'jpeg', 'png', 'bmp'],
|
||||
notes: '百度 OCR 标准版 QPS 限制为 2,每日免费额度 1000 次。适合高精度需求场景。',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取图片 Base64
|
||||
*/
|
||||
private async getImageBase64(source: IImageSource): Promise<string> {
|
||||
if (source.base64) {
|
||||
// 移除 data URL 前缀
|
||||
return source.base64.replace(/^data:image\/\w+;base64,/, '');
|
||||
}
|
||||
|
||||
if (source.buffer) {
|
||||
return source.buffer.toString('base64');
|
||||
}
|
||||
|
||||
if (source.path) {
|
||||
// 使用基类的路径解析方法
|
||||
const fullPath = this.resolveImagePath(source.path);
|
||||
const buffer = fs.readFileSync(fullPath);
|
||||
return buffer.toString('base64');
|
||||
}
|
||||
|
||||
throw new Error('无效的图片来源');
|
||||
}
|
||||
|
||||
/**
|
||||
* 超时包装
|
||||
*/
|
||||
private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
|
||||
return Promise.race([
|
||||
promise,
|
||||
new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('timeout')), timeout)
|
||||
),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例实例
|
||||
export const baiduProvider = new BaiduProvider();
|
||||
127
backend/src/services/ocr-providers/base.provider.ts
Normal file
127
backend/src/services/ocr-providers/base.provider.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
* OCR Provider Base Interface
|
||||
* OCR 提供商基础接口
|
||||
*/
|
||||
|
||||
import { resolveImagePath } from '../../lib/path';
|
||||
|
||||
export interface OCRRecognitionResult {
|
||||
/** 识别的文本内容 */
|
||||
text: string;
|
||||
/** 置信度 (0-1) */
|
||||
confidence: number;
|
||||
/** 处理耗时 (毫秒) */
|
||||
duration?: number;
|
||||
/** 额外信息 */
|
||||
extra?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface OCRProviderConfig {
|
||||
/** 超时时间 (毫秒) */
|
||||
timeout?: number;
|
||||
/** 语言代码 (chi_sim, eng 等) */
|
||||
language?: string;
|
||||
/** 额外配置 */
|
||||
extras?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface IImageSource {
|
||||
/** 图片本地路径 */
|
||||
path?: string;
|
||||
/** 图片 Buffer */
|
||||
buffer?: Buffer;
|
||||
/** 图片 Base64 */
|
||||
base64?: string;
|
||||
/** 图片 URL */
|
||||
url?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* OCR Provider 抽象基类
|
||||
* 所有 OCR 提供商都需要实现此接口
|
||||
*/
|
||||
export abstract class BaseOCRProvider {
|
||||
protected config: OCRProviderConfig;
|
||||
|
||||
constructor(config: OCRProviderConfig = {}) {
|
||||
this.config = {
|
||||
timeout: 30000,
|
||||
language: 'chi_sim+eng',
|
||||
...config,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Provider 名称
|
||||
*/
|
||||
abstract getName(): string;
|
||||
|
||||
/**
|
||||
* Provider 类型 (local | cloud)
|
||||
*/
|
||||
abstract getType(): 'local' | 'cloud';
|
||||
|
||||
/**
|
||||
* 检查 Provider 是否可用
|
||||
*/
|
||||
abstract isAvailable(): Promise<boolean> | boolean;
|
||||
|
||||
/**
|
||||
* 执行 OCR 识别
|
||||
* @param source 图片来源 (路径/Buffer/Base64/URL)
|
||||
* @param options 可选配置
|
||||
*/
|
||||
abstract recognize(
|
||||
source: IImageSource,
|
||||
options?: OCRProviderConfig
|
||||
): Promise<OCRRecognitionResult>;
|
||||
|
||||
/**
|
||||
* 批量识别
|
||||
*/
|
||||
async batchRecognize(
|
||||
sources: IImageSource[],
|
||||
options?: OCRProviderConfig
|
||||
): Promise<OCRRecognitionResult[]> {
|
||||
const results: OCRRecognitionResult[] = [];
|
||||
|
||||
for (const source of sources) {
|
||||
try {
|
||||
const result = await this.recognize(source, options);
|
||||
results.push(result);
|
||||
} catch (error) {
|
||||
results.push({
|
||||
text: '',
|
||||
confidence: 0,
|
||||
duration: 0,
|
||||
extra: { error: error instanceof Error ? error.message : String(error) },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取推荐配置
|
||||
*/
|
||||
getRecommendations(): {
|
||||
maxImageSize?: number;
|
||||
supportedFormats?: string[];
|
||||
notes?: string;
|
||||
} {
|
||||
return {
|
||||
maxImageSize: 10 * 1024 * 1024, // 10MB
|
||||
supportedFormats: ['jpg', 'jpeg', 'png', 'webp', 'bmp', 'gif'],
|
||||
notes: '建议图片分辨率不低于 300dpi',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析图片路径
|
||||
* 将数据库路径转换为绝对路径
|
||||
*/
|
||||
protected resolveImagePath(imagePath: string): string {
|
||||
return resolveImagePath(imagePath);
|
||||
}
|
||||
}
|
||||
100
backend/src/services/ocr-providers/index.ts
Normal file
100
backend/src/services/ocr-providers/index.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
/**
|
||||
* OCR Providers Module
|
||||
* 导出所有 OCR 提供商
|
||||
*/
|
||||
|
||||
export { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
|
||||
export { TesseractProvider, tesseractProvider } from './tesseract.provider';
|
||||
export { BaiduProvider, baiduProvider } from './baidu.provider';
|
||||
export { RapidOCRProvider, rapidocrProvider } from './rapidocr.provider';
|
||||
|
||||
import { TesseractProvider, BaiduProvider, RapidOCRProvider } from './index';
|
||||
|
||||
/**
|
||||
* OCR Provider 类型
|
||||
*/
|
||||
export type OCRProviderType = 'tesseract' | 'baidu' | 'rapidocr' | 'auto';
|
||||
|
||||
/**
|
||||
* OCR Provider 工厂
|
||||
* 根据 provider 类型返回对应的实例
|
||||
*/
|
||||
export class OCRProviderFactory {
|
||||
private static providers = {
|
||||
tesseract: TesseractProvider,
|
||||
baidu: BaiduProvider,
|
||||
rapidocr: RapidOCRProvider,
|
||||
};
|
||||
|
||||
/**
|
||||
* 创建 Provider 实例
|
||||
*/
|
||||
static create(
|
||||
type: OCRProviderType,
|
||||
config?: any
|
||||
): TesseractProvider | BaiduProvider | RapidOCRProvider {
|
||||
if (type === 'auto') {
|
||||
// 自动选择可用的 provider
|
||||
return this.autoSelect();
|
||||
}
|
||||
|
||||
const ProviderClass = this.providers[type];
|
||||
if (!ProviderClass) {
|
||||
throw new Error(`未知的 OCR provider: ${type}`);
|
||||
}
|
||||
|
||||
return new ProviderClass(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* 自动选择可用的 provider
|
||||
* 优先级: RapidOCR > Tesseract > Baidu
|
||||
*/
|
||||
private static autoSelect(): TesseractProvider | BaiduProvider | RapidOCRProvider {
|
||||
const envProvider = process.env.OCR_PROVIDER as OCRProviderType;
|
||||
|
||||
// 如果指定了 provider 且不是 auto,使用指定的
|
||||
if (envProvider && envProvider !== 'auto' && this.providers[envProvider]) {
|
||||
const ProviderClass = this.providers[envProvider];
|
||||
return new ProviderClass();
|
||||
}
|
||||
|
||||
// 检查可用性并选择
|
||||
// RapidOCR (本地快速)
|
||||
if (process.env.RAPIDOCR_API_URL) {
|
||||
return new RapidOCRProvider();
|
||||
}
|
||||
|
||||
// Baidu OCR (云端准确)
|
||||
if (process.env.BAIDU_OCR_API_KEY && process.env.BAIDU_OCR_SECRET_KEY) {
|
||||
return new BaiduProvider();
|
||||
}
|
||||
|
||||
// Tesseract.js (本地兜底)
|
||||
return new TesseractProvider();
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有可用的 providers
|
||||
*/
|
||||
static async getAvailableProviders(): Promise<
|
||||
Array<{ type: string; name: string; available: boolean; typeDesc: string }>
|
||||
> {
|
||||
const providers = [
|
||||
{ type: 'rapidocr', name: 'RapidOCR', instance: new RapidOCRProvider(), typeDesc: '本地快速准确' },
|
||||
{ type: 'baidu', name: 'Baidu OCR', instance: new BaiduProvider(), typeDesc: '云端准确' },
|
||||
{ type: 'tesseract', name: 'Tesseract.js', instance: new TesseractProvider(), typeDesc: '本地轻量' },
|
||||
];
|
||||
|
||||
const results = await Promise.all(
|
||||
providers.map(async (p) => ({
|
||||
type: p.type,
|
||||
name: p.name,
|
||||
available: await p.instance.isAvailable(),
|
||||
typeDesc: p.typeDesc,
|
||||
}))
|
||||
);
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
167
backend/src/services/ocr-providers/rapidocr.provider.ts
Normal file
167
backend/src/services/ocr-providers/rapidocr.provider.ts
Normal file
@@ -0,0 +1,167 @@
|
||||
/**
|
||||
* RapidOCR Provider
|
||||
* 特点:速度快、准确率高、免费、本地运行
|
||||
* 基于 PaddleOCR,中文识别效果优秀
|
||||
*
|
||||
* 部署方式:
|
||||
* 1. 安装 Python 服务: pip install rapidocr-onnxruntime
|
||||
* 2. 启动服务 (参考 rapidocr_server 项目)
|
||||
* 3. 或使用 Docker: docker run -p 8080:8080 cshgg/rapidocr
|
||||
*
|
||||
* GitHub: https://github.com/RapidAI/RapidOCR
|
||||
*/
|
||||
|
||||
import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
|
||||
import fs from 'fs';
|
||||
|
||||
interface RapidOCRResponse {
|
||||
code: number;
|
||||
msg: string;
|
||||
data: Array<{
|
||||
text: string;
|
||||
score: number;
|
||||
box: number[][];
|
||||
}>;
|
||||
}
|
||||
|
||||
interface RapidOCRRequest {
|
||||
images: string[];
|
||||
options?: {
|
||||
use_dilation?: boolean;
|
||||
use_cls?: boolean;
|
||||
use_tensorrt?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
export class RapidOCRProvider extends BaseOCRProvider {
|
||||
private apiUrl: string;
|
||||
|
||||
constructor(config: OCRProviderConfig & { apiUrl?: string } = {}) {
|
||||
super(config);
|
||||
this.apiUrl = config.apiUrl || process.env.RAPIDOCR_API_URL || 'http://localhost:8080';
|
||||
}
|
||||
|
||||
getName(): string {
|
||||
return 'RapidOCR';
|
||||
}
|
||||
|
||||
getType(): 'local' | 'cloud' {
|
||||
return 'local';
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查 RapidOCR 服务是否可用
|
||||
*/
|
||||
async isAvailable(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.apiUrl}/health`, {
|
||||
signal: AbortSignal.timeout(2000),
|
||||
});
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 OCR 识别
|
||||
*/
|
||||
async recognize(
|
||||
source: IImageSource,
|
||||
options?: OCRProviderConfig
|
||||
): Promise<OCRRecognitionResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// 获取图片 Base64
|
||||
const imageBase64 = await this.getImageBase64(source);
|
||||
|
||||
// 调用 RapidOCR API
|
||||
const response = await this.withTimeout(
|
||||
fetch(`${this.apiUrl}/ocr`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
images: [imageBase64],
|
||||
options: {
|
||||
use_dilation: true, // 使用膨胀增强识别
|
||||
use_cls: true, // 使用文字方向分类
|
||||
},
|
||||
} as RapidOCRRequest),
|
||||
}),
|
||||
options?.timeout || this.config.timeout || 15000
|
||||
);
|
||||
|
||||
const data = (await response.json()) as RapidOCRResponse;
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// 检查错误(支持两种错误格式)
|
||||
if (data.code !== 200 && 'error' in data) {
|
||||
throw new Error(`RapidOCR 错误: ${(data as any).error || data.msg} (${data.code})`);
|
||||
}
|
||||
|
||||
// 提取文本和置信度(确保 data.data 存在)
|
||||
const ocrResults = Array.isArray(data.data) ? data.data : [];
|
||||
const text = ocrResults.map((r) => r.text).join('\n');
|
||||
|
||||
// 计算平均置信度
|
||||
const confidence = ocrResults.length > 0
|
||||
? ocrResults.reduce((acc, r) => acc + (r.score || 0), 0) / ocrResults.length
|
||||
: 0;
|
||||
|
||||
return {
|
||||
text: text.trim(),
|
||||
confidence,
|
||||
duration,
|
||||
extra: {
|
||||
provider: 'rapidocr',
|
||||
textCount: ocrResults.length,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
getRecommendations() {
|
||||
return {
|
||||
maxImageSize: 10 * 1024 * 1024,
|
||||
supportedFormats: ['jpg', 'jpeg', 'png', 'webp', 'bmp'],
|
||||
notes: 'RapidOCR 是基于 PaddleOCR 的本地服务,速度快且准确率高。需要先启动 RapidOCR 服务。',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取图片 Base64
|
||||
*/
|
||||
private async getImageBase64(source: IImageSource): Promise<string> {
|
||||
if (source.base64) {
|
||||
// 移除 data URL 前缀
|
||||
return source.base64.replace(/^data:image\/\w+;base64,/, '');
|
||||
}
|
||||
|
||||
if (source.buffer) {
|
||||
return source.buffer.toString('base64');
|
||||
}
|
||||
|
||||
if (source.path) {
|
||||
// 使用基类的路径解析方法
|
||||
const fullPath = this.resolveImagePath(source.path);
|
||||
const buffer = fs.readFileSync(fullPath);
|
||||
return buffer.toString('base64');
|
||||
}
|
||||
|
||||
throw new Error('无效的图片来源');
|
||||
}
|
||||
|
||||
/**
|
||||
* 超时包装
|
||||
*/
|
||||
private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
|
||||
return Promise.race([
|
||||
promise,
|
||||
new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('timeout')), timeout)
|
||||
),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例实例
|
||||
export const rapidocrProvider = new RapidOCRProvider();
|
||||
166
backend/src/services/ocr-providers/tesseract.provider.ts
Normal file
166
backend/src/services/ocr-providers/tesseract.provider.ts
Normal file
@@ -0,0 +1,166 @@
|
||||
/**
|
||||
* Tesseract.js OCR Provider
|
||||
* 特点:免费、本地运行、支持多语言
|
||||
* 缺点:速度较慢、准确率中等
|
||||
*/
|
||||
|
||||
import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
|
||||
import fs from 'fs';
|
||||
|
||||
interface TesseractModule {
|
||||
recognize: (
|
||||
image: string | Buffer,
|
||||
lang: string,
|
||||
options?: { logger?: (m: any) => void }
|
||||
) => Promise<{ data: { text: string; confidence: number } }>;
|
||||
}
|
||||
|
||||
export class TesseractProvider extends BaseOCRProvider {
|
||||
private tesseract: TesseractModule | null = null;
|
||||
private initialized = false;
|
||||
|
||||
constructor(config: OCRProviderConfig = {}) {
|
||||
super(config);
|
||||
}
|
||||
|
||||
getName(): string {
|
||||
return 'Tesseract.js';
|
||||
}
|
||||
|
||||
getType(): 'local' | 'cloud' {
|
||||
return 'local';
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查 Tesseract.js 是否已安装
|
||||
*/
|
||||
async isAvailable(): Promise<boolean> {
|
||||
if (this.initialized) {
|
||||
return this.tesseract !== null;
|
||||
}
|
||||
|
||||
try {
|
||||
const module = await import('tesseract.js');
|
||||
// tesseract.js 是默认导出,需要使用 .default
|
||||
this.tesseract = (module as any).default || module;
|
||||
this.initialized = true;
|
||||
return true;
|
||||
} catch {
|
||||
this.initialized = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 OCR 识别
|
||||
*/
|
||||
async recognize(
|
||||
source: IImageSource,
|
||||
options?: OCRProviderConfig
|
||||
): Promise<OCRRecognitionResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// 确定语言
|
||||
const language = options?.language || this.config.language || 'chi_sim+eng';
|
||||
|
||||
// 获取图片数据
|
||||
const imageData = await this.getImageData(source);
|
||||
|
||||
// 动态导入 tesseract.js
|
||||
try {
|
||||
const tesseractModule = await import('tesseract.js');
|
||||
const Tesseract = (tesseractModule as any).default || tesseractModule;
|
||||
|
||||
const result = await this.withTimeout(
|
||||
Tesseract.recognize(imageData, language, {
|
||||
logger: (m: any) => {
|
||||
if (m.status === 'recognizing text') {
|
||||
// 可选:记录进度
|
||||
// console.log(`Tesseract progress: ${(m.progress * 100).toFixed(0)}%`);
|
||||
}
|
||||
},
|
||||
}),
|
||||
options?.timeout || this.config.timeout || 30000
|
||||
) as { data: { text: string; confidence: number } };
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
return {
|
||||
text: this.cleanText(result.data.text),
|
||||
confidence: result.data.confidence / 100, // Tesseract 返回 0-100
|
||||
duration,
|
||||
extra: {
|
||||
provider: 'tesseract.js',
|
||||
language,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message === 'timeout') {
|
||||
throw new Error('OCR 识别超时');
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
getRecommendations() {
|
||||
return {
|
||||
...super.getRecommendations(),
|
||||
notes: '首次运行会下载语言包 (约 20MB),后续运行会更快。适合少量图片处理。',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取图片数据 (路径或 Buffer)
|
||||
*/
|
||||
private async getImageData(source: IImageSource): Promise<string | Buffer> {
|
||||
if (source.buffer) {
|
||||
return source.buffer;
|
||||
}
|
||||
|
||||
if (source.path) {
|
||||
// 使用基类的路径解析方法
|
||||
const fullPath = this.resolveImagePath(source.path);
|
||||
|
||||
if (!fs.existsSync(fullPath)) {
|
||||
throw new Error(`图片文件不存在: ${fullPath}`);
|
||||
}
|
||||
|
||||
return fullPath;
|
||||
}
|
||||
|
||||
if (source.base64) {
|
||||
// Tesseract.js 支持 Base64,需要添加 data URL 前缀
|
||||
const base64Data = source.base64.startsWith('data:')
|
||||
? source.base64
|
||||
: `data:image/png;base64,${source.base64}`;
|
||||
return base64Data;
|
||||
}
|
||||
|
||||
throw new Error('无效的图片来源');
|
||||
}
|
||||
|
||||
/**
|
||||
* 超时包装
|
||||
*/
|
||||
private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
|
||||
return Promise.race([
|
||||
promise,
|
||||
new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('timeout')), timeout)
|
||||
),
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理识别结果文本
|
||||
*/
|
||||
private cleanText(text: string): string {
|
||||
return text
|
||||
.replace(/\s+/g, ' ') // 多个空格合并为一个
|
||||
.replace(/\n\s*\n/g, '\n\n') // 多个空行合并
|
||||
.trim();
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例实例
|
||||
export const tesseractProvider = new TesseractProvider();
|
||||
15
backend/src/types/tesseract.d.ts
vendored
Normal file
15
backend/src/types/tesseract.d.ts
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
// Type declarations for optional tesseract.js dependency
|
||||
declare module 'tesseract.js' {
|
||||
export interface TesseractResult {
|
||||
data: {
|
||||
text: string;
|
||||
confidence: number;
|
||||
};
|
||||
}
|
||||
|
||||
export function recognize(
|
||||
image: string | Buffer,
|
||||
lang: string,
|
||||
options?: { logger?: (m: any) => void }
|
||||
): Promise<TesseractResult>;
|
||||
}
|
||||
Reference in New Issue
Block a user