feat: 实现多 OCR 提供商架构和完整设置页面

## 主要变更 ### OCR 架构 - 新增多提供商 OCR 系统 (Tesseract.js, Baidu OCR, RapidOCR) - 添加 Provider 基类接口和工厂模式 - 支持 provider 自动选择和降级处理 - 新增 RapidOCR Python HTTP 服务 (端口 8080) ### 路径修复 - 修复 Windows 平台路径解析问题 - 统一路径处理工具 (lib/path.ts) - 修复 uploads 目录定位问题 ### 设置页面重构 - 三个标签页：API 配置、OCR 配置、AI 配置 - API 服务器地址配置 - OCR 服务商配置（Tesseract.js, RapidOCR, 百度 OCR） - AI 服务商配置（智谱 GLM, MiniMax, DeepSeek, Kimi, OpenAI, Anthropic） ### 端口配置 - 前端端口: 13056 - 后端端口: 13057 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 14:00:22 +08:00
parent 813df6c738
commit f8472987f0
23 changed files with 2321 additions and 275 deletions
@@ -6,11 +6,16 @@ import express from 'express';
 import cors from 'cors';
 import dotenv from 'dotenv';
 import path from 'path';
+import { fileURLToPath } from 'url';
 import authRoutes from './routes/auth.routes';
 import documentRoutes from './routes/document.routes';
 import todoRoutes from './routes/todo.routes';
 import imageRoutes from './routes/image.routes';

+// 获取当前文件的目录
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
 // Load environment variables
 dotenv.config();

@@ -24,8 +29,8 @@ app.use(cors({
 app.use(express.json());
 app.use(express.urlencoded({ extended: true }));

-// Static files for uploads
-app.use('/uploads', express.static(path.join(process.cwd(), 'uploads')));
+// Static files for uploads (使用绝对路径指向 backend/uploads)
+app.use('/uploads', express.static(path.join(__dirname, '..', 'uploads')));

 // Health check
 app.get('/api/health', (_req, res) => {
@@ -0,0 +1,84 @@
+/**
+ * 路径解析工具
+ * 解决开发环境下路径解析问题
+ */
+
+import path from 'path';
+import fs from 'fs';
+import { fileURLToPath } from 'url';
+
+/**
+ * 获取项目根目录
+ * 通过从当前文件向上查找 package.json 来确定
+ */
+export function getProjectRoot(): string {
+  // 在开发环境使用 tsx 时，使用 process.cwd()
+  // 在构建后的环境，使用 __dirname 的方式
+  let currentDir: string;
+
+  try {
+    // ESM 模式下获取当前文件目录
+    const __filename = fileURLToPath(import.meta.url);
+    currentDir = path.dirname(__filename);
+  } catch {
+    // 回退到 process.cwd()
+    currentDir = process.cwd();
+  }
+
+  // Windows 路径处理（去除开头的 /）
+  if (process.platform === 'win32' && currentDir.startsWith('/') && /^[a-zA-Z]:/.test(currentDir.slice(1))) {
+    currentDir = currentDir.substring(1);
+  }
+
+  // 从当前目录向上查找 package.json
+  let searchDir = currentDir;
+  for (let i = 0; i < 10; i++) {
+    const pkgPath = path.join(searchDir, 'package.json');
+    if (fs.existsSync(pkgPath)) {
+      return searchDir;
+    }
+    searchDir = path.dirname(searchDir);
+  }
+
+  // 如果找不到，回退到 process.cwd()
+  return process.cwd();
+}
+
+/**
+ * 获取上传目录的绝对路径
+ */
+export function getUploadsDir(): string {
+  const projectRoot = getProjectRoot();
+  return path.join(projectRoot, 'uploads');
+}
+
+/**
+ * 解析图片路径
+ * 将数据库中存储的路径 (/uploads/xxx.png) 解析为绝对路径
+ */
+export function resolveImagePath(imagePath: string): string {
+  // 在 Windows 上，path.isAbsolute 会将 /uploads/... 认为是绝对路径
+  // 但这实际上是 Unix 风格的相对路径，需要特殊处理
+  const isWindowsAbsPath = process.platform === 'win32'
+    ? /^[a-zA-Z]:\\/.test(imagePath)  // Windows 真正的绝对路径如 C:\
+    : path.isAbsolute(imagePath);
+
+  if (isWindowsAbsPath) {
+    return imagePath;
+  }
+
+  // 处理 /uploads/ 开头的相对路径
+  if (imagePath.startsWith('/uploads/')) {
+    return path.join(getUploadsDir(), imagePath.replace('/uploads/', ''));
+  }
+
+  // 其他相对路径，使用项目根目录
+  return path.join(getProjectRoot(), imagePath);
+}
+
+/**
+ * 生成存储到数据库的路径
+ */
+export function generateDbPath(filename: string): string {
+  return `/uploads/${filename}`;
+}
@@ -5,9 +5,16 @@
 import multer from 'multer';
 import path from 'path';
 import fs from 'fs';
+import { fileURLToPath } from 'url';
+
+// 获取当前文件的目录
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+// uploads 目录放在 backend 根目录下
+const uploadDir = path.join(__dirname, '..', '..', 'uploads');

 // Ensure upload directory exists
-const uploadDir = path.join(process.cwd(), 'uploads');
 if (!fs.existsSync(uploadDir)) {
  fs.mkdirSync(uploadDir, { recursive: true });
 }
@@ -51,3 +58,6 @@ export const upload = multer({
    fileSize: 10 * 1024 * 1024, // 10MB limit
  },
 });
+
+// 导出上传目录路径供其他模块使用
+export { uploadDir };
@@ -7,7 +7,8 @@ import { Router } from 'express';
 import { ImageController } from '../controllers/image.controller';
 import { authenticate } from '../middleware/auth.middleware';
 import { upload } from '../middleware/upload.middleware';
-import { triggerOCRProcessing } from '../services/ocr-processor.service';
+import { triggerOCRProcessing, OCRProcessorService } from '../services/ocr-processor.service';
+import { OCRProviderType } from '../services/ocr-providers';

 const router = Router();

@@ -32,6 +33,27 @@ router.get('/', authenticate, ImageController.getUserImages);
 */
 router.get('/pending', authenticate, ImageController.getPending);

+/**
+ * @route   GET /api/images/ocr/providers
+ * @desc    Get available OCR providers
+ * @access  Private
+ */
+router.get('/ocr/providers', authenticate, async (_req, res) => {
+  try {
+    const providers = await OCRProcessorService.getAvailableProviders();
+    res.json({
+      success: true,
+      data: providers,
+    });
+  } catch (error) {
+    const message = error instanceof Error ? error.message : '获取 OCR 提供商失败';
+    res.status(500).json({
+      success: false,
+      error: message,
+    });
+  }
+});
+
 /**
 * @route   GET /api/images/:id
 * @desc    Get image by ID
@@ -43,14 +65,16 @@ router.get('/:id', authenticate, ImageController.getById);
 * @route   POST /api/images/:id/reprocess
 * @desc    Re-trigger OCR processing
 * @access  Private
+ * @body    { provider?: 'tesseract' | 'baidu' | 'rapidocr' | 'auto' }
 */
 router.post('/:id/reprocess', authenticate, async (req, res) => {
  try {
-    const userId = req.user!.user_id;
-    const { id } = req.params;
+    const userId = typeof req.user!.user_id === 'string' ? req.user!.user_id : String(req.user!.user_id);
+    const id = typeof req.params.id === 'string' ? req.params.id : req.params.id[0];
+    const { provider } = req.body;

    // 触发 OCR 处理
-    triggerOCRProcessing(id, userId);
+    triggerOCRProcessing(id, userId, { provider: provider as OCRProviderType });

    res.json({
      success: true,
@@ -1,20 +1,36 @@
 /**
 * OCR Processor Service
 * 处理图片 OCR 识别的异步服务
+ * 支持多种 OCR Provider: Tesseract.js, Baidu OCR, RapidOCR
 */

 import { prisma } from '../lib/prisma';
 import { ImageService } from './image.service';
-import fs from 'fs';
-import path from 'path';
+import {
+  OCRProviderFactory,
+  OCRProviderType,
+  IImageSource,
+} from './ocr-providers';
+
+export interface OCRProcessOptions {
+  /** 指定 OCR Provider */
+  provider?: OCRProviderType;
+  /** 置信度阈值 */
+  confidenceThreshold?: number;
+}

 export class OCRProcessorService {
  /**
   * 处理图片的 OCR 识别
-   * 注意：当前是模拟实现，返回占位符文本
-   * 实际使用时需要集成 Tesseract.js 或其他 OCR 服务
+   * @param imageId 图片 ID
+   * @param userId 用户 ID
+   * @param options 处理选项
   */
-  static async processImage(imageId: string, userId: string): Promise<void> {
+  static async processImage(
+    imageId: string,
+    userId: string,
+    options: OCRProcessOptions = {}
+  ): Promise<void> {
    try {
      // 更新状态为处理中
      await prisma.image.update({
@@ -28,12 +44,14 @@ export class OCRProcessorService {
        throw new Error('Image not found');
      }

-      // TODO: 集成真实的 OCR 服务
-      // 当前使用模拟实现
-      const ocrResult = await this.performOCRSimulated(image);
+      // 执行 OCR 识别
+      const ocrResult = await this.performOCR(image, options);

      // 根据置信度决定状态
-      const status = ocrResult.confidence >= 0.3 ? 'completed' : 'failed';
+      const threshold = options.confidenceThreshold
+        ? parseFloat(options.confidenceThreshold.toString())
+        : parseFloat(process.env.OCR_CONFIDENCE_THRESHOLD || '0.3');
+      const status = ocrResult.confidence >= threshold ? 'completed' : 'failed';

      await prisma.image.update({
        where: { id: imageId },
@@ -57,109 +75,106 @@ export class OCRProcessorService {
  }

  /**
-   * 模拟 OCR 处理
-   * 实际实现应该调用 Tesseract.js 或其他 OCR API
+   * 执行 OCR 识别
+   * @param image 图片信息
+   * @param options 处理选项
   */
-  private static async performOCRSimulated(image: any): Promise<{
-    text: string;
-    confidence: number;
-  }> {
-    // 模拟处理延迟
-    await new Promise(resolve => setTimeout(resolve, 2000));
+  private static async performOCR(
+    image: any,
+    options: OCRProcessOptions = {}
+  ): Promise<{ text: string; confidence: number }> {
+    // 获取 OCR Provider
+    const providerType = options.provider || (process.env.OCR_PROVIDER as OCRProviderType) || 'auto';
+    const provider = OCRProviderFactory.create(providerType);

-    // TODO: 实际 OCR 集成选项：
-    // 1. Tesseract.js (本地)
-    //    import Tesseract from 'tesseract.js';
-    //    const { data: { text, confidence } } = await Tesseract.recognize(imagePath, 'chi_sim+eng');
-    //
-    // 2. PaddleOCR (需要 Python 服务)
-    //    const response = await fetch('http://localhost:5000/ocr', {
-    //      method: 'POST',
-    //      body: JSON.stringify({ image_path: imagePath }),
-    //    });
-    //
-    // 3. 云端 OCR API (百度/腾讯/阿里)
+    // 检查 provider 是否可用
+    const available = await provider.isAvailable();
+    if (!available) {
+      throw new Error(
+        `OCR Provider "${provider.getName()}" 不可用。` +
+        `请检查配置或安装相应的依赖。`
+      );
+    }
+
+    // 准备图片来源
+    const source: IImageSource = {
+      path: image.file_path,
+    };
+
+    // 执行识别
+    const result = await provider.recognize(source);
+
+    console.log(
+      `[OCR] Provider: ${provider.getName()}, ` +
+      `Confidence: ${(result.confidence * 100).toFixed(1)}%, ` +
+      `Duration: ${result.duration}ms, ` +
+      `Text length: ${result.text.length}`
+    );

-    // 模拟返回结果
    return {
-      text: '[模拟 OCR 结果] 图片文字识别功能尚未集成。请在设置页面配置 OCR 服务后重试。',
-      confidence: 0.5,
+      text: result.text,
+      confidence: result.confidence,
    };
  }

  /**
-   * 使用 Tesseract.js 进行 OCR 识别（需要安装依赖）
+   * 获取所有可用的 OCR Providers
   */
-  private static async performOCRWithTesseract(imagePath: string): Promise<{
-    text: string;
-    confidence: number;
-  }> {
-    // 动态导入 Tesseract（如果已安装）
-    try {
-      const Tesseract = await import('tesseract.js');
-
-      // 检查文件是否存在
-      const fullPath = path.join(process.cwd(), imagePath.replace('/uploads/', 'uploads/'));
-      if (!fs.existsSync(fullPath)) {
-        throw new Error(`Image file not found: ${fullPath}`);
-      }
-
-      const result = await Tesseract.recognize(fullPath, 'chi_sim+eng', {
-        logger: (m: any) => console.log(m),
-      });
-
-      return {
-        text: result.data.text,
-        confidence: result.data.confidence / 100, // Tesseract 返回 0-100，需要转换为 0-1
-      };
-    } catch (error) {
-      // 如果 Tesseract 未安装，返回模拟结果
-      console.warn('Tesseract.js not installed, using simulated OCR:', error);
-      return this.performOCRSimulated(null);
-    }
+  static async getAvailableProviders(): Promise<
+    Array<{ type: string; name: string; available: boolean; typeDesc: string }>
+  > {
+    return OCRProviderFactory.getAvailableProviders();
  }

  /**
-   * 调用外部 OCR API（示例）
+   * 测试指定的 OCR Provider
   */
-  private static async performOCRWithAPI(imagePath: string): Promise<{
-    text: string;
-    confidence: number;
+  static async testProvider(
+    providerType: OCRProviderType,
+    imagePath: string
+  ): Promise<{
+    success: boolean;
+    result?: { text: string; confidence: number; duration: number };
+    error?: string;
  }> {
-    // 示例：调用百度 OCR API
-    // const apiKey = process.env.BAIDU_OCR_API_KEY;
-    // const secretKey = process.env.BAIDU_OCR_SECRET_KEY;
-    //
-    // // 获取 access token
-    // const tokenResponse = await fetch(`https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${apiKey}&client_secret=${secretKey}`);
-    // const { access_token } = await tokenResponse.json();
-    //
-    // // 读取图片并转为 base64
-    // const imageBuffer = fs.readFileSync(imagePath);
-    // const imageBase64 = imageBuffer.toString('base64');
-    //
-    // // 调用 OCR API
-    // const ocrResponse = await fetch(`https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=${access_token}`, {
-    //   method: 'POST',
-    //   headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
-    //   body: `image=${encodeURIComponent(imageBase64)}`,
-    // });
-    //
-    // const result = await ocrResponse.json();
-    //
-    // return {
-    //   text: result.words_result?.map((w: any) => w.words).join('\n') || '',
-    //   confidence: (result.words_result?.[0]?.probability?.average || 0.5) / 100,
-    // };
+    try {
+      const provider = OCRProviderFactory.create(providerType);
+      const available = await provider.isAvailable();

-    throw new Error('OCR API not configured');
+      if (!available) {
+        return {
+          success: false,
+          error: `Provider "${provider.getName()}" 不可用`,
+        };
+      }
+
+      const result = await provider.recognize({ path: imagePath });
+
+      return {
+        success: true,
+        result: {
+          text: result.text,
+          confidence: result.confidence,
+          duration: result.duration || 0,
+        },
+      };
+    } catch (error) {
+      return {
+        success: false,
+        error: error instanceof Error ? error.message : String(error),
+      };
+    }
  }
 }

 // 导出异步处理函数（用于在后台触发 OCR）
-export const triggerOCRProcessing = async (imageId: string, userId: string) => {
+export const triggerOCRProcessing = async (
+  imageId: string,
+  userId: string,
+  options?: OCRProcessOptions
+) => {
  // 不等待完成，在后台处理
-  OCRProcessorService.processImage(imageId, userId).catch(error => {
+  OCRProcessorService.processImage(imageId, userId, options).catch((error) => {
    console.error('OCR processing failed:', error);
  });
 };
@@ -0,0 +1,187 @@
+/**
+ * Baidu OCR Provider
+ * 特点：准确率高、速度快、国内访问快
+ * 缺点：需要付费 (有免费额度)、需要网络连接
+ * 官方文档: https://cloud.baidu.com/doc/OCR/index.html
+ */
+
+import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
+import fs from 'fs';
+
+interface BaiduOCRResponse {
+  words_result: Array<{
+    words: string;
+    probability?: {
+      average: number;
+      variance: number;
+      min: number;
+    };
+  }>;
+  error_code?: number;
+  error_msg?: string;
+}
+
+interface BaiduTokenResponse {
+  access_token: string;
+  expires_in: number;
+}
+
+export class BaiduProvider extends BaseOCRProvider {
+  private apiKey: string;
+  private secretKey: string;
+  private accessToken: string | null = null;
+  private tokenExpireTime: number = 0;
+  private apiUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic';
+
+  constructor(config: OCRProviderConfig & { apiKey?: string; secretKey?: string } = {}) {
+    super(config);
+    this.apiKey = config.apiKey || process.env.BAIDU_OCR_API_KEY || '';
+    this.secretKey = config.secretKey || process.env.BAIDU_OCR_SECRET_KEY || '';
+  }
+
+  getName(): string {
+    return 'Baidu OCR';
+  }
+
+  getType(): 'local' | 'cloud' {
+    return 'cloud';
+  }
+
+  /**
+   * 检查配置是否完整
+   */
+  async isAvailable(): Promise<boolean> {
+    return !!(this.apiKey && this.secretKey);
+  }
+
+  /**
+   * 获取访问令牌
+   */
+  private async getAccessToken(): Promise<string> {
+    // 检查缓存
+    if (this.accessToken && Date.now() < this.tokenExpireTime) {
+      return this.accessToken;
+    }
+
+    const url = `https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${this.apiKey}&client_secret=${this.secretKey}`;
+
+    const response = await fetch(url);
+    const data = (await response.json()) as BaiduTokenResponse;
+
+    if (!data.access_token) {
+      throw new Error('获取百度 OCR access_token 失败');
+    }
+
+    this.accessToken = data.access_token;
+    this.tokenExpireTime = Date.now() + (data.expires_in - 300) * 1000; // 提前 5 分钟过期
+
+    return this.accessToken;
+  }
+
+  /**
+   * 执行 OCR 识别
+   */
+  async recognize(
+    source: IImageSource,
+    options?: OCRProviderConfig
+  ): Promise<OCRRecognitionResult> {
+    const startTime = Date.now();
+
+    // 检查配置
+    const available = await this.isAvailable();
+    if (!available) {
+      throw new Error('百度 OCR 未配置。请设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY 环境变量');
+    }
+
+    // 获取图片 Base64
+    const imageBase64 = await this.getImageBase64(source);
+
+    // 获取访问令牌
+    const token = await this.getAccessToken();
+
+    // 调用 OCR API
+    const response = await this.withTimeout(
+      fetch(`${this.apiUrl}?access_token=${token}`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
+        body: `image=${encodeURIComponent(imageBase64)}`,
+      }),
+      options?.timeout || this.config.timeout || 10000
+    );
+
+    const data = (await response.json()) as BaiduOCRResponse;
+    const duration = Date.now() - startTime;
+
+    // 检查错误
+    if (data.error_code) {
+      throw new Error(`百度 OCR 错误: ${data.error_msg} (${data.error_code})`);
+    }
+
+    // 提取文本和置信度
+    const words = data.words_result || [];
+    const text = words.map((w) => w.words).join('\n');
+
+    // 计算平均置信度
+    let confidence = 0.9; // 默认置信度
+    if (words.length > 0 && words[0].probability) {
+      const sum = words.reduce((acc, w) => acc + (w.probability?.average || 0), 0);
+      confidence = (sum / words.length) / 100;
+    }
+
+    return {
+      text: text.trim(),
+      confidence,
+      duration,
+      extra: {
+        provider: 'baidu',
+        wordCount: words.length,
+      },
+    };
+  }
+
+  getRecommendations() {
+    return {
+      maxImageSize: 4 * 1024 * 1024, // 4MB 限制
+      supportedFormats: ['jpg', 'jpeg', 'png', 'bmp'],
+      notes: '百度 OCR 标准版 QPS 限制为 2，每日免费额度 1000 次。适合高精度需求场景。',
+    };
+  }
+
+  /**
+   * 获取图片 Base64
+   */
+  private async getImageBase64(source: IImageSource): Promise<string> {
+    if (source.base64) {
+      // 移除 data URL 前缀
+      return source.base64.replace(/^data:image\/\w+;base64,/, '');
+    }
+
+    if (source.buffer) {
+      return source.buffer.toString('base64');
+    }
+
+    if (source.path) {
+      // 使用基类的路径解析方法
+      const fullPath = this.resolveImagePath(source.path);
+      const buffer = fs.readFileSync(fullPath);
+      return buffer.toString('base64');
+    }
+
+    throw new Error('无效的图片来源');
+  }
+
+  /**
+   * 超时包装
+   */
+  private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
+    return Promise.race([
+      promise,
+      new Promise<never>((_, reject) =>
+        setTimeout(() => reject(new Error('timeout')), timeout)
+      ),
+    ]);
+  }
+}
+
+// 导出单例实例
+export const baiduProvider = new BaiduProvider();
@@ -0,0 +1,127 @@
+/**
+ * OCR Provider Base Interface
+ * OCR 提供商基础接口
+ */
+
+import { resolveImagePath } from '../../lib/path';
+
+export interface OCRRecognitionResult {
+  /** 识别的文本内容 */
+  text: string;
+  /** 置信度 (0-1) */
+  confidence: number;
+  /** 处理耗时 (毫秒) */
+  duration?: number;
+  /** 额外信息 */
+  extra?: Record<string, unknown>;
+}
+
+export interface OCRProviderConfig {
+  /** 超时时间 (毫秒) */
+  timeout?: number;
+  /** 语言代码 (chi_sim, eng 等) */
+  language?: string;
+  /** 额外配置 */
+  extras?: Record<string, unknown>;
+}
+
+export interface IImageSource {
+  /** 图片本地路径 */
+  path?: string;
+  /** 图片 Buffer */
+  buffer?: Buffer;
+  /** 图片 Base64 */
+  base64?: string;
+  /** 图片 URL */
+  url?: string;
+}
+
+/**
+ * OCR Provider 抽象基类
+ * 所有 OCR 提供商都需要实现此接口
+ */
+export abstract class BaseOCRProvider {
+  protected config: OCRProviderConfig;
+
+  constructor(config: OCRProviderConfig = {}) {
+    this.config = {
+      timeout: 30000,
+      language: 'chi_sim+eng',
+      ...config,
+    };
+  }
+
+  /**
+   * Provider 名称
+   */
+  abstract getName(): string;
+
+  /**
+   * Provider 类型 (local | cloud)
+   */
+  abstract getType(): 'local' | 'cloud';
+
+  /**
+   * 检查 Provider 是否可用
+   */
+  abstract isAvailable(): Promise<boolean> | boolean;
+
+  /**
+   * 执行 OCR 识别
+   * @param source 图片来源 (路径/Buffer/Base64/URL)
+   * @param options 可选配置
+   */
+  abstract recognize(
+    source: IImageSource,
+    options?: OCRProviderConfig
+  ): Promise<OCRRecognitionResult>;
+
+  /**
+   * 批量识别
+   */
+  async batchRecognize(
+    sources: IImageSource[],
+    options?: OCRProviderConfig
+  ): Promise<OCRRecognitionResult[]> {
+    const results: OCRRecognitionResult[] = [];
+
+    for (const source of sources) {
+      try {
+        const result = await this.recognize(source, options);
+        results.push(result);
+      } catch (error) {
+        results.push({
+          text: '',
+          confidence: 0,
+          duration: 0,
+          extra: { error: error instanceof Error ? error.message : String(error) },
+        });
+      }
+    }
+
+    return results;
+  }
+
+  /**
+   * 获取推荐配置
+   */
+  getRecommendations(): {
+    maxImageSize?: number;
+    supportedFormats?: string[];
+    notes?: string;
+  } {
+    return {
+      maxImageSize: 10 * 1024 * 1024, // 10MB
+      supportedFormats: ['jpg', 'jpeg', 'png', 'webp', 'bmp', 'gif'],
+      notes: '建议图片分辨率不低于 300dpi',
+    };
+  }
+
+  /**
+   * 解析图片路径
+   * 将数据库路径转换为绝对路径
+   */
+  protected resolveImagePath(imagePath: string): string {
+    return resolveImagePath(imagePath);
+  }
+}
@@ -0,0 +1,100 @@
+/**
+ * OCR Providers Module
+ * 导出所有 OCR 提供商
+ */
+
+export { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
+export { TesseractProvider, tesseractProvider } from './tesseract.provider';
+export { BaiduProvider, baiduProvider } from './baidu.provider';
+export { RapidOCRProvider, rapidocrProvider } from './rapidocr.provider';
+
+import { TesseractProvider, BaiduProvider, RapidOCRProvider } from './index';
+
+/**
+ * OCR Provider 类型
+ */
+export type OCRProviderType = 'tesseract' | 'baidu' | 'rapidocr' | 'auto';
+
+/**
+ * OCR Provider 工厂
+ * 根据 provider 类型返回对应的实例
+ */
+export class OCRProviderFactory {
+  private static providers = {
+    tesseract: TesseractProvider,
+    baidu: BaiduProvider,
+    rapidocr: RapidOCRProvider,
+  };
+
+  /**
+   * 创建 Provider 实例
+   */
+  static create(
+    type: OCRProviderType,
+    config?: any
+  ): TesseractProvider | BaiduProvider | RapidOCRProvider {
+    if (type === 'auto') {
+      // 自动选择可用的 provider
+      return this.autoSelect();
+    }
+
+    const ProviderClass = this.providers[type];
+    if (!ProviderClass) {
+      throw new Error(`未知的 OCR provider: ${type}`);
+    }
+
+    return new ProviderClass(config);
+  }
+
+  /**
+   * 自动选择可用的 provider
+   * 优先级: RapidOCR > Tesseract > Baidu
+   */
+  private static autoSelect(): TesseractProvider | BaiduProvider | RapidOCRProvider {
+    const envProvider = process.env.OCR_PROVIDER as OCRProviderType;
+
+    // 如果指定了 provider 且不是 auto，使用指定的
+    if (envProvider && envProvider !== 'auto' && this.providers[envProvider]) {
+      const ProviderClass = this.providers[envProvider];
+      return new ProviderClass();
+    }
+
+    // 检查可用性并选择
+    // RapidOCR (本地快速)
+    if (process.env.RAPIDOCR_API_URL) {
+      return new RapidOCRProvider();
+    }
+
+    // Baidu OCR (云端准确)
+    if (process.env.BAIDU_OCR_API_KEY && process.env.BAIDU_OCR_SECRET_KEY) {
+      return new BaiduProvider();
+    }
+
+    // Tesseract.js (本地兜底)
+    return new TesseractProvider();
+  }
+
+  /**
+   * 获取所有可用的 providers
+   */
+  static async getAvailableProviders(): Promise<
+    Array<{ type: string; name: string; available: boolean; typeDesc: string }>
+  > {
+    const providers = [
+      { type: 'rapidocr', name: 'RapidOCR', instance: new RapidOCRProvider(), typeDesc: '本地快速准确' },
+      { type: 'baidu', name: 'Baidu OCR', instance: new BaiduProvider(), typeDesc: '云端准确' },
+      { type: 'tesseract', name: 'Tesseract.js', instance: new TesseractProvider(), typeDesc: '本地轻量' },
+    ];
+
+    const results = await Promise.all(
+      providers.map(async (p) => ({
+        type: p.type,
+        name: p.name,
+        available: await p.instance.isAvailable(),
+        typeDesc: p.typeDesc,
+      }))
+    );
+
+    return results;
+  }
+}
@@ -0,0 +1,167 @@
+/**
+ * RapidOCR Provider
+ * 特点：速度快、准确率高、免费、本地运行
+ * 基于 PaddleOCR，中文识别效果优秀
+ *
+ * 部署方式：
+ * 1. 安装 Python 服务: pip install rapidocr-onnxruntime
+ * 2. 启动服务 (参考 rapidocr_server 项目)
+ * 3. 或使用 Docker: docker run -p 8080:8080 cshgg/rapidocr
+ *
+ * GitHub: https://github.com/RapidAI/RapidOCR
+ */
+
+import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
+import fs from 'fs';
+
+interface RapidOCRResponse {
+  code: number;
+  msg: string;
+  data: Array<{
+    text: string;
+    score: number;
+    box: number[][];
+  }>;
+}
+
+interface RapidOCRRequest {
+  images: string[];
+  options?: {
+    use_dilation?: boolean;
+    use_cls?: boolean;
+    use_tensorrt?: boolean;
+  };
+}
+
+export class RapidOCRProvider extends BaseOCRProvider {
+  private apiUrl: string;
+
+  constructor(config: OCRProviderConfig & { apiUrl?: string } = {}) {
+    super(config);
+    this.apiUrl = config.apiUrl || process.env.RAPIDOCR_API_URL || 'http://localhost:8080';
+  }
+
+  getName(): string {
+    return 'RapidOCR';
+  }
+
+  getType(): 'local' | 'cloud' {
+    return 'local';
+  }
+
+  /**
+   * 检查 RapidOCR 服务是否可用
+   */
+  async isAvailable(): Promise<boolean> {
+    try {
+      const response = await fetch(`${this.apiUrl}/health`, {
+        signal: AbortSignal.timeout(2000),
+      });
+      return response.ok;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * 执行 OCR 识别
+   */
+  async recognize(
+    source: IImageSource,
+    options?: OCRProviderConfig
+  ): Promise<OCRRecognitionResult> {
+    const startTime = Date.now();
+
+    // 获取图片 Base64
+    const imageBase64 = await this.getImageBase64(source);
+
+    // 调用 RapidOCR API
+    const response = await this.withTimeout(
+      fetch(`${this.apiUrl}/ocr`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          images: [imageBase64],
+          options: {
+            use_dilation: true,  // 使用膨胀增强识别
+            use_cls: true,       // 使用文字方向分类
+          },
+        } as RapidOCRRequest),
+      }),
+      options?.timeout || this.config.timeout || 15000
+    );
+
+    const data = (await response.json()) as RapidOCRResponse;
+    const duration = Date.now() - startTime;
+
+    // 检查错误（支持两种错误格式）
+    if (data.code !== 200 && 'error' in data) {
+      throw new Error(`RapidOCR 错误: ${(data as any).error || data.msg} (${data.code})`);
+    }
+
+    // 提取文本和置信度（确保 data.data 存在）
+    const ocrResults = Array.isArray(data.data) ? data.data : [];
+    const text = ocrResults.map((r) => r.text).join('\n');
+
+    // 计算平均置信度
+    const confidence = ocrResults.length > 0
+      ? ocrResults.reduce((acc, r) => acc + (r.score || 0), 0) / ocrResults.length
+      : 0;
+
+    return {
+      text: text.trim(),
+      confidence,
+      duration,
+      extra: {
+        provider: 'rapidocr',
+        textCount: ocrResults.length,
+      },
+    };
+  }
+
+  getRecommendations() {
+    return {
+      maxImageSize: 10 * 1024 * 1024,
+      supportedFormats: ['jpg', 'jpeg', 'png', 'webp', 'bmp'],
+      notes: 'RapidOCR 是基于 PaddleOCR 的本地服务，速度快且准确率高。需要先启动 RapidOCR 服务。',
+    };
+  }
+
+  /**
+   * 获取图片 Base64
+   */
+  private async getImageBase64(source: IImageSource): Promise<string> {
+    if (source.base64) {
+      // 移除 data URL 前缀
+      return source.base64.replace(/^data:image\/\w+;base64,/, '');
+    }
+
+    if (source.buffer) {
+      return source.buffer.toString('base64');
+    }
+
+    if (source.path) {
+      // 使用基类的路径解析方法
+      const fullPath = this.resolveImagePath(source.path);
+      const buffer = fs.readFileSync(fullPath);
+      return buffer.toString('base64');
+    }
+
+    throw new Error('无效的图片来源');
+  }
+
+  /**
+   * 超时包装
+   */
+  private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
+    return Promise.race([
+      promise,
+      new Promise<never>((_, reject) =>
+        setTimeout(() => reject(new Error('timeout')), timeout)
+      ),
+    ]);
+  }
+}
+
+// 导出单例实例
+export const rapidocrProvider = new RapidOCRProvider();
@@ -0,0 +1,166 @@
+/**
+ * Tesseract.js OCR Provider
+ * 特点：免费、本地运行、支持多语言
+ * 缺点：速度较慢、准确率中等
+ */
+
+import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
+import fs from 'fs';
+
+interface TesseractModule {
+  recognize: (
+    image: string | Buffer,
+    lang: string,
+    options?: { logger?: (m: any) => void }
+  ) => Promise<{ data: { text: string; confidence: number } }>;
+}
+
+export class TesseractProvider extends BaseOCRProvider {
+  private tesseract: TesseractModule | null = null;
+  private initialized = false;
+
+  constructor(config: OCRProviderConfig = {}) {
+    super(config);
+  }
+
+  getName(): string {
+    return 'Tesseract.js';
+  }
+
+  getType(): 'local' | 'cloud' {
+    return 'local';
+  }
+
+  /**
+   * 检查 Tesseract.js 是否已安装
+   */
+  async isAvailable(): Promise<boolean> {
+    if (this.initialized) {
+      return this.tesseract !== null;
+    }
+
+    try {
+      const module = await import('tesseract.js');
+      // tesseract.js 是默认导出，需要使用 .default
+      this.tesseract = (module as any).default || module;
+      this.initialized = true;
+      return true;
+    } catch {
+      this.initialized = true;
+      return false;
+    }
+  }
+
+  /**
+   * 执行 OCR 识别
+   */
+  async recognize(
+    source: IImageSource,
+    options?: OCRProviderConfig
+  ): Promise<OCRRecognitionResult> {
+    const startTime = Date.now();
+
+    // 确定语言
+    const language = options?.language || this.config.language || 'chi_sim+eng';
+
+    // 获取图片数据
+    const imageData = await this.getImageData(source);
+
+    // 动态导入 tesseract.js
+    try {
+      const tesseractModule = await import('tesseract.js');
+      const Tesseract = (tesseractModule as any).default || tesseractModule;
+
+      const result = await this.withTimeout(
+        Tesseract.recognize(imageData, language, {
+          logger: (m: any) => {
+            if (m.status === 'recognizing text') {
+              // 可选：记录进度
+              // console.log(`Tesseract progress: ${(m.progress * 100).toFixed(0)}%`);
+            }
+          },
+        }),
+        options?.timeout || this.config.timeout || 30000
+      ) as { data: { text: string; confidence: number } };
+
+      const duration = Date.now() - startTime;
+
+      return {
+        text: this.cleanText(result.data.text),
+        confidence: result.data.confidence / 100, // Tesseract 返回 0-100
+        duration,
+        extra: {
+          provider: 'tesseract.js',
+          language,
+        },
+      };
+    } catch (error) {
+      if (error instanceof Error && error.message === 'timeout') {
+        throw new Error('OCR 识别超时');
+      }
+      throw error;
+    }
+  }
+
+  getRecommendations() {
+    return {
+      ...super.getRecommendations(),
+      notes: '首次运行会下载语言包 (约 20MB)，后续运行会更快。适合少量图片处理。',
+    };
+  }
+
+  /**
+   * 获取图片数据 (路径或 Buffer)
+   */
+  private async getImageData(source: IImageSource): Promise<string | Buffer> {
+    if (source.buffer) {
+      return source.buffer;
+    }
+
+    if (source.path) {
+      // 使用基类的路径解析方法
+      const fullPath = this.resolveImagePath(source.path);
+
+      if (!fs.existsSync(fullPath)) {
+        throw new Error(`图片文件不存在: ${fullPath}`);
+      }
+
+      return fullPath;
+    }
+
+    if (source.base64) {
+      // Tesseract.js 支持 Base64，需要添加 data URL 前缀
+      const base64Data = source.base64.startsWith('data:')
+        ? source.base64
+        : `data:image/png;base64,${source.base64}`;
+      return base64Data;
+    }
+
+    throw new Error('无效的图片来源');
+  }
+
+  /**
+   * 超时包装
+   */
+  private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
+    return Promise.race([
+      promise,
+      new Promise<never>((_, reject) =>
+        setTimeout(() => reject(new Error('timeout')), timeout)
+      ),
+    ]);
+  }
+
+  /**
+   * 清理识别结果文本
+   */
+  private cleanText(text: string): string {
+    return text
+      .replace(/\s+/g, ' ') // 多个空格合并为一个
+      .replace(/\n\s*\n/g, '\n\n') // 多个空行合并
+      .trim();
+  }
+}
+
+// 导出单例实例
+export const tesseractProvider = new TesseractProvider();
@@ -0,0 +1,15 @@
+// Type declarations for optional tesseract.js dependency
+declare module 'tesseract.js' {
+  export interface TesseractResult {
+    data: {
+      text: string;
+      confidence: number;
+    };
+  }
+
+  export function recognize(
+    image: string | Buffer,
+    lang: string,
+    options?: { logger?: (m: any) => void }
+  ): Promise<TesseractResult>;
+}