diff --git a/CLAUDE.md b/CLAUDE.md index ebbbf30..9fd6eae 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,9 +26,14 @@ npm run test:watch # Jest 监视模式 npm run test:coverage # 测试覆盖率报告 npm run lint # ESLint 检查 npm run lint:fix # ESLint 自动修复 +npm run format # Prettier 格式化 npm run prisma:generate # 生成 Prisma Client npm run prisma:migrate # 运行数据库迁移 npm run prisma:studio # 打开 Prisma Studio + +# 运行单个测试文件 +npm test -- tests/unit/services/auth.service.test.ts +npm test -- --testNamePattern="should login" ``` ### 前端 (端口 3000) @@ -43,6 +48,12 @@ npm run test:coverage # Vitest 覆盖率报告 npm run test:e2e # 运行 Playwright E2E 测试 npm run test:e2e:ui # Playwright UI 模式 npm run lint # ESLint 检查 + +# 运行单个测试文件 +npx vitest src/services/__tests__/auth.service.test.ts + +# 运行单个 E2E 测试 +npx playwright test auth.spec.ts ``` ### 数据库操作 @@ -63,12 +74,18 @@ backend/src/ ├── services/ # 业务逻辑层 │ ├── auth.service.ts # 认证逻辑 (注册/登录/验证) │ ├── password.service.ts # 密码验证和强度检查 -│ ├── ocr.service.ts # OCR 处理逻辑(置信度验证、重试) +│ ├── ocr.service.ts # OCR 置信度验证和重试逻辑 +│ ├── ocr-processor.service.ts # OCR 处理服务 (多 Provider 支持) +│ ├── ocr-providers/ # OCR Provider 实现 +│ │ ├── base.provider.ts # 基础接口 +│ │ ├── tesseract.provider.ts # Tesseract.js (本地) +│ │ ├── baidu.provider.ts # 百度 OCR (云端) +│ │ └── rapidocr.provider.ts # RapidOCR (本地) │ ├── document.service.ts # 文档 CRUD │ ├── todo.service.ts # 待办事项管理(三态工作流) │ └── image.service.ts # 图片上传和处理 ├── routes/ # API 路由定义 -├── middleware/ # 中间件 (JWT 认证) +├── middleware/ # 中间件 (JWT 认证、上传) └── lib/prisma.ts # Prisma 客户端单例 ``` @@ -76,6 +93,8 @@ backend/src/ - **分层架构**: Controller → Service → Prisma (数据层) - **服务类**: 使用静态方法实现无状态业务逻辑 - **中间件**: JWT 认证中间件保护需要登录的路由 +- **OCR Provider**: 可插拔的 OCR 提供商架构,支持扩展 +- **上传中间件**: `upload.middleware.ts` 使用 multer 处理文件上传 ### 前端架构 ``` @@ -130,17 +149,75 @@ SELECT * FROM images WHERE user_id = ? AND (document_id IS NULL OR processing_status = 'failed') ``` +## OCR 多 Provider 架构 + +系统支持多种 OCR 提供商,可根据需求选择: + +| Provider | 类型 | 速度 | 准确率 | 成本 | 部署要求 | +|----------|------|------|--------|------|----------| +| **tesseract** | 本地 | 慢 | 中 | 免费 | `npm install tesseract.js` | +| **rapidocr** | 本地 | 快 | 高 | 免费 | Docker: `cshgg/rapidocr` | +| **baidu** | 云端 | 快 | 高 | 按次付费 (有免费额度) | 需要 API Key | + +### 配置方式 + +在 `.env` 中设置 `OCR_PROVIDER`: +```bash +# 自动选择可用的 provider (推荐) +OCR_PROVIDER="auto" + +# 或指定具体 provider +OCR_PROVIDER="tesseract" # 或 "rapidocr", "baidu" +``` + +### 各 Provider 配置 + +**Tesseract.js** (本地轻量,免费) +```bash +# 安装依赖 +npm install tesseract.js +# 无需额外配置 +``` + +**RapidOCR** (本地快速准确,推荐) +```bash +# Docker 启动 +docker run -d -p 8080:8080 cshgg/rapidocr + +# 配置环境变量 +RAPIDOCR_API_URL="http://localhost:8080" +``` + +**Baidu OCR** (云端准确,有免费额度) +```bash +# 申请地址: https://cloud.baidu.com/product/ocr +BAIDU_OCR_API_KEY="your_api_key" +BAIDU_OCR_SECRET_KEY="your_secret_key" +``` + +### Provider 架构 + +OCR Provider 实现位于 `backend/src/services/ocr-providers/`: +- `base.provider.ts` - 基础接口定义 +- `tesseract.provider.ts` - Tesseract.js 实现 +- `rapidocr.provider.ts` - RapidOCR HTTP API 实现 +- `baidu.provider.ts` - 百度 OCR API 实现 + +新增 Provider 只需继承 `BaseOCRProvider` 并实现 `recognize()` 方法。 + ## 测试策略 ### 后端测试 (Jest) - 单元测试覆盖所有 Service 类 - 集成测试验证 API 端点 -- 目标覆盖率: 80%+ +- 覆盖率阈值: 80% (branches, functions, lines, statements) +- 测试文件匹配: `**/__tests__/**/*.ts`, `**/?(*.)+(spec|test).ts` ### 前端测试 - **Vitest**: 组件和服务的单元测试 - **Playwright**: E2E 测试,跨浏览器测试 (Chrome, Firefox, Safari) - 测试文件位于 `e2e/` 目录 +- E2E 测试配置: 自动启动开发服务器,支持 CI 模式重试 ## API 端点 @@ -161,16 +238,66 @@ WHERE user_id = ? AND (document_id IS NULL OR processing_status = 'failed') - `DELETE /api/todos/:id` - 删除待办 ### 图片 -- `POST /api/images/upload` - 上传图片 -- `GET /api/images` - 获取图片列表 +- `POST /api/images` - 上传图片 +- `GET /api/images` - 获取用户图片列表 +- `GET /api/images/pending` - 获取待处理图片 (OCR 失败) +- `GET /api/images/ocr/providers` - 获取可用的 OCR 提供商列表 +- `GET /api/images/:id` - 获取单张图片详情 +- `POST /api/images/:id/reprocess` - 重新触发 OCR 处理 (支持指定 provider) +- `PUT /api/images/:id/ocr` - 更新 OCR 结果 +- `PUT /api/images/:id/link` - 关联图片到文档 +- `DELETE /api/images/:id` - 删除图片 ## 环境变量 -后端需要创建 `.env` 文件: +后端需要创建 `.env` 文件(参考 `backend/.env.example`): ``` +# Database DATABASE_URL="file:./dev.db" -JWT_SECRET="your-secret-key" + +# JWT +JWT_SECRET="your-secret-key-change-in-production" +JWT_EXPIRES_IN="24h" + +# Server PORT=4000 +NODE_ENV="development" + +# CORS +CORS_ORIGIN="http://localhost:3000" + +# OCR Provider: tesseract | baidu | rapidocr | auto +OCR_PROVIDER="auto" +OCR_CONFIDENCE_THRESHOLD="0.3" + +# Tesseract.js (本地 OCR) +# 安装: npm install tesseract.js + +# Baidu OCR (云端 OCR) +BAIDU_OCR_API_KEY="" +BAIDU_OCR_SECRET_KEY="" + +# RapidOCR (本地快速 OCR) +RAPIDOCR_API_URL="http://localhost:8080" + +# AI (GLM) +GLM_API_KEY="" +GLM_API_URL="https://open.bigmodel.cn/api/paas/v4/chat/completions" +GLM_MODEL="glm-4-flash" + +# AI (MiniMax) +MINIMAX_API_KEY="" +MINIMAX_API_URL="https://api.minimax.chat/v1/chat/completions" +MINIMAX_MODEL="abab6.5s-chat" + +# AI (DeepSeek) +DEEPSEEK_API_KEY="" +DEEPSEEK_API_URL="https://api.deepseek.com/v1/chat/completions" +DEEPSEEK_MODEL="deepseek-chat" + +# Upload +UPLOAD_MAX_SIZE="10485760" +UPLOAD_ALLOWED_TYPES="image/jpeg,image/png,image/webp" ``` ## 测试账号 @@ -187,10 +314,11 @@ PORT=4000 - ✅ 文档 CRUD - ✅ 待办三态工作流 - ✅ 图片上传和 OCR 状态追踪 +- ✅ 多 Provider OCR 架构 (Tesseract.js / Baidu / RapidOCR) - ✅ 前后端单元测试 (148 个测试全部通过) - ✅ E2E 测试框架 待开发功能 (P1 优先级): -- ⏳ OCR 集成 (Tesseract/PaddleOCR) - ⏳ AI 分析功能 (GLM/MiniMax/DeepSeek) - ⏳ 图片-文档-待办关联增强 +- ⏳ 前端 OCR 设置页面 (Provider 选择和测试) diff --git a/backend/.env.example b/backend/.env.example index 6f3bd7f..e43881e 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -13,9 +13,27 @@ NODE_ENV="development" CORS_ORIGIN="http://localhost:3000" # OCR -OCR_PROVIDER="local" +# OCR Provider: tesseract | baidu | rapidocr | auto +# - tesseract: 本地轻量,需要安装 tesseract.js +# - baidu: 云端准确,需要配置 API Key (有免费额度) +# - rapidocr: 本地快速准确,需要启动 RapidOCR 服务 +# - auto: 自动选择可用的 provider +OCR_PROVIDER="auto" OCR_CONFIDENCE_THRESHOLD="0.3" +# Tesseract.js (本地 OCR,无需额外配置) +# 安装: npm install tesseract.js + +# Baidu OCR (云端 OCR,需要申请密钥) +# 获取地址: https://cloud.baidu.com/product/ocr +BAIDU_OCR_API_KEY="" +BAIDU_OCR_SECRET_KEY="" + +# RapidOCR (本地快速 OCR,需要启动服务) +# Docker: docker run -d -p 8080:8080 cshgg/rapidocr +# 或参考: https://github.com/RapidAI/RapidOCR +RAPIDOCR_API_URL="http://localhost:8080" + # AI (GLM) GLM_API_KEY="" GLM_API_URL="https://open.bigmodel.cn/api/paas/v4/chat/completions" diff --git a/backend/package-lock.json b/backend/package-lock.json index df21a04..8964ab3 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -16,6 +16,7 @@ "express": "^4.21.2", "jsonwebtoken": "^9.0.2", "multer": "^1.4.5-lts.1", + "tesseract.js": "^5.1.1", "winston": "^3.17.0" }, "devDependencies": { @@ -37,6 +38,9 @@ "ts-jest": "^29.2.5", "tsx": "^4.19.2", "typescript": "^5.7.2" + }, + "optionalDependencies": { + "tesseract.js": "^5.1.1" } }, "node_modules/@babel/code-frame": { @@ -2777,6 +2781,13 @@ "node": ">= 10.0.0" } }, + "node_modules/bmp-js": { + "version": "0.1.0", + "resolved": "https://registry.npmmirror.com/bmp-js/-/bmp-js-0.1.0.tgz", + "integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==", + "license": "MIT", + "optional": true + }, "node_modules/body-parser": { "version": "1.20.4", "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.4.tgz", @@ -4617,6 +4628,13 @@ "node": ">=0.10.0" } }, + "node_modules/idb-keyval": { + "version": "6.2.2", + "resolved": "https://registry.npmmirror.com/idb-keyval/-/idb-keyval-6.2.2.tgz", + "integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==", + "license": "Apache-2.0", + "optional": true + }, "node_modules/ignore": { "version": "7.0.5", "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz", @@ -4723,6 +4741,13 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-electron": { + "version": "2.2.2", + "resolved": "https://registry.npmmirror.com/is-electron/-/is-electron-2.2.2.tgz", + "integrity": "sha512-FO/Rhvz5tuw4MCWkpMzHFKWD2LsfHzIb7i6MdPYZ/KW7AlxawyLkqdy+jPZP1WubqEADE3O4FUENlJHDfQASRg==", + "license": "MIT", + "optional": true + }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -4787,6 +4812,13 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/is-url": { + "version": "1.2.4", + "resolved": "https://registry.npmmirror.com/is-url/-/is-url-1.2.4.tgz", + "integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==", + "license": "MIT", + "optional": true + }, "node_modules/isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", @@ -6167,6 +6199,16 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/opencollective-postinstall": { + "version": "2.0.3", + "resolved": "https://registry.npmmirror.com/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz", + "integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==", + "license": "MIT", + "optional": true, + "bin": { + "opencollective-postinstall": "index.js" + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -6610,6 +6652,13 @@ "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", "license": "MIT" }, + "node_modules/regenerator-runtime": { + "version": "0.13.11", + "resolved": "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz", + "integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==", + "license": "MIT", + "optional": true + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -7227,6 +7276,33 @@ "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", "license": "ISC" }, + "node_modules/tesseract.js": { + "version": "5.1.1", + "resolved": "https://registry.npmmirror.com/tesseract.js/-/tesseract.js-5.1.1.tgz", + "integrity": "sha512-lzVl/Ar3P3zhpUT31NjqeCo1f+D5+YfpZ5J62eo2S14QNVOmHBTtbchHm/YAbOOOzCegFnKf4B3Qih9LuldcYQ==", + "hasInstallScript": true, + "license": "Apache-2.0", + "optional": true, + "dependencies": { + "bmp-js": "^0.1.0", + "idb-keyval": "^6.2.0", + "is-electron": "^2.2.2", + "is-url": "^1.2.4", + "node-fetch": "^2.6.9", + "opencollective-postinstall": "^2.0.3", + "regenerator-runtime": "^0.13.3", + "tesseract.js-core": "^5.1.1", + "wasm-feature-detect": "^1.2.11", + "zlibjs": "^0.3.1" + } + }, + "node_modules/tesseract.js-core": { + "version": "5.1.1", + "resolved": "https://registry.npmmirror.com/tesseract.js-core/-/tesseract.js-core-5.1.1.tgz", + "integrity": "sha512-KX3bYSU5iGcO1XJa+QGPbi+Zjo2qq6eBhNjSGR5E5q0JtzkoipJKOUQD7ph8kFyteCEfEQ0maWLu8MCXtvX5uQ==", + "license": "Apache-2.0", + "optional": true + }, "node_modules/test-exclude": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz", @@ -7652,6 +7728,13 @@ "makeerror": "1.0.12" } }, + "node_modules/wasm-feature-detect": { + "version": "1.8.0", + "resolved": "https://registry.npmmirror.com/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz", + "integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==", + "license": "Apache-2.0", + "optional": true + }, "node_modules/webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", @@ -7879,6 +7962,16 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zlibjs": { + "version": "0.3.1", + "resolved": "https://registry.npmmirror.com/zlibjs/-/zlibjs-0.3.1.tgz", + "integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==", + "license": "MIT", + "optional": true, + "engines": { + "node": "*" + } } } } diff --git a/backend/package.json b/backend/package.json index 5338d7e..d0ed25a 100644 --- a/backend/package.json +++ b/backend/package.json @@ -17,7 +17,12 @@ "prisma:migrate": "prisma migrate dev", "prisma:studio": "prisma studio" }, - "keywords": ["ocr", "document-management", "ai", "tdd"], + "keywords": [ + "ocr", + "document-management", + "ai", + "tdd" + ], "author": "", "license": "MIT", "dependencies": { @@ -30,6 +35,9 @@ "multer": "^1.4.5-lts.1", "winston": "^3.17.0" }, + "optionalDependencies": { + "tesseract.js": "^5.1.1" + }, "devDependencies": { "@types/bcrypt": "^5.0.2", "@types/cors": "^2.8.17", diff --git a/backend/prisma/schema.prisma b/backend/prisma/schema.prisma index 7a09f7f..c1ebb2c 100644 --- a/backend/prisma/schema.prisma +++ b/backend/prisma/schema.prisma @@ -6,7 +6,6 @@ generator client { datasource db { provider = "sqlite" - url = env("DATABASE_URL") } // 用户 diff --git a/backend/scripts/rapidocr_server.py b/backend/scripts/rapidocr_server.py new file mode 100644 index 0000000..6e98334 --- /dev/null +++ b/backend/scripts/rapidocr_server.py @@ -0,0 +1,114 @@ +""" +RapidOCR HTTP Server +为 Node.js 后端提供 OCR API 服务 +""" + +from rapidocr_onnxruntime import RapidOCR +from http.server import HTTPServer, BaseHTTPRequestHandler +import json +import base64 +import numpy as np +from io import BytesIO +from PIL import Image +import cv2 + +# 初始化 RapidOCR +ocr = RapidOCR() + +class OCRHandler(BaseHTTPRequestHandler): + def _set_cors(self): + """设置 CORS""" + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') + self.send_header('Access-Control-Allow-Headers', 'Content-Type') + + def _send_json(self, data, status=200): + """发送 JSON 响应""" + self.send_response(status) + self.send_header('Content-Type', 'application/json') + self._set_cors() + self.end_headers() + self.wfile.write(json.dumps(data, ensure_ascii=False).encode('utf-8')) + + def do_OPTIONS(self): + """处理 OPTIONS 请求""" + self.send_response(200) + self._set_cors() + self.end_headers() + + def do_GET(self): + """健康检查""" + if self.path == '/health': + self._send_json({"status": "ok", "service": "rapidocr"}) + else: + self._send_json({"error": "Not found"}, 404) + + def do_POST(self): + """处理 OCR 请求""" + if self.path == '/ocr': + try: + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length) + data = json.loads(post_data.decode('utf-8')) + + # 获取图片数据 + images = data.get('images', []) + if not images: + self._send_json({"error": "No images provided"}, 400) + return + + # 处理第一张图片 + image_base64 = images[0] + image_data = base64.b64decode(image_base64) + + # 转换为 OpenCV 格式 + nparr = np.frombuffer(image_data, np.uint8) + img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + + if img is None: + self._send_json({"error": "Failed to decode image"}, 400) + return + + # 执行 OCR + result, _ = ocr(img) + + # 格式化结果 + ocr_results = [] + for line in result: + ocr_results.append({ + "text": line[0], + "score": float(line[1]), + "box": line[2] + }) + + # 计算平均置信度 + avg_confidence = 0 + if ocr_results: + avg_confidence = sum(r["score"] for r in ocr_results) / len(ocr_results) + + self._send_json({ + "code": 200, + "msg": "success", + "data": ocr_results + }) + + except Exception as e: + self._send_json({"error": str(e)}, 500) + else: + self._send_json({"error": "Not found"}, 404) + + def log_message(self, format, *args): + """减少日志输出""" + pass + +def run_server(port=8080): + """启动服务器""" + server_address = ('', port) + httpd = HTTPServer(server_address, OCRHandler) + print(f"RapidOCR Server running on port {port}") + print(f"Health check: http://localhost:{port}/health") + print(f"OCR endpoint: http://localhost:{port}/ocr") + httpd.serve_forever() + +if __name__ == '__main__': + run_server() diff --git a/backend/src/index.ts b/backend/src/index.ts index d88398c..8da338d 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -6,11 +6,16 @@ import express from 'express'; import cors from 'cors'; import dotenv from 'dotenv'; import path from 'path'; +import { fileURLToPath } from 'url'; import authRoutes from './routes/auth.routes'; import documentRoutes from './routes/document.routes'; import todoRoutes from './routes/todo.routes'; import imageRoutes from './routes/image.routes'; +// 获取当前文件的目录 +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + // Load environment variables dotenv.config(); @@ -24,8 +29,8 @@ app.use(cors({ app.use(express.json()); app.use(express.urlencoded({ extended: true })); -// Static files for uploads -app.use('/uploads', express.static(path.join(process.cwd(), 'uploads'))); +// Static files for uploads (使用绝对路径指向 backend/uploads) +app.use('/uploads', express.static(path.join(__dirname, '..', 'uploads'))); // Health check app.get('/api/health', (_req, res) => { diff --git a/backend/src/lib/path.ts b/backend/src/lib/path.ts new file mode 100644 index 0000000..f7580f0 --- /dev/null +++ b/backend/src/lib/path.ts @@ -0,0 +1,84 @@ +/** + * 路径解析工具 + * 解决开发环境下路径解析问题 + */ + +import path from 'path'; +import fs from 'fs'; +import { fileURLToPath } from 'url'; + +/** + * 获取项目根目录 + * 通过从当前文件向上查找 package.json 来确定 + */ +export function getProjectRoot(): string { + // 在开发环境使用 tsx 时,使用 process.cwd() + // 在构建后的环境,使用 __dirname 的方式 + let currentDir: string; + + try { + // ESM 模式下获取当前文件目录 + const __filename = fileURLToPath(import.meta.url); + currentDir = path.dirname(__filename); + } catch { + // 回退到 process.cwd() + currentDir = process.cwd(); + } + + // Windows 路径处理(去除开头的 /) + if (process.platform === 'win32' && currentDir.startsWith('/') && /^[a-zA-Z]:/.test(currentDir.slice(1))) { + currentDir = currentDir.substring(1); + } + + // 从当前目录向上查找 package.json + let searchDir = currentDir; + for (let i = 0; i < 10; i++) { + const pkgPath = path.join(searchDir, 'package.json'); + if (fs.existsSync(pkgPath)) { + return searchDir; + } + searchDir = path.dirname(searchDir); + } + + // 如果找不到,回退到 process.cwd() + return process.cwd(); +} + +/** + * 获取上传目录的绝对路径 + */ +export function getUploadsDir(): string { + const projectRoot = getProjectRoot(); + return path.join(projectRoot, 'uploads'); +} + +/** + * 解析图片路径 + * 将数据库中存储的路径 (/uploads/xxx.png) 解析为绝对路径 + */ +export function resolveImagePath(imagePath: string): string { + // 在 Windows 上,path.isAbsolute 会将 /uploads/... 认为是绝对路径 + // 但这实际上是 Unix 风格的相对路径,需要特殊处理 + const isWindowsAbsPath = process.platform === 'win32' + ? /^[a-zA-Z]:\\/.test(imagePath) // Windows 真正的绝对路径如 C:\ + : path.isAbsolute(imagePath); + + if (isWindowsAbsPath) { + return imagePath; + } + + // 处理 /uploads/ 开头的相对路径 + if (imagePath.startsWith('/uploads/')) { + return path.join(getUploadsDir(), imagePath.replace('/uploads/', '')); + } + + // 其他相对路径,使用项目根目录 + return path.join(getProjectRoot(), imagePath); +} + +/** + * 生成存储到数据库的路径 + */ +export function generateDbPath(filename: string): string { + return `/uploads/${filename}`; +} diff --git a/backend/src/middleware/upload.middleware.ts b/backend/src/middleware/upload.middleware.ts index eea73ee..f7edd69 100644 --- a/backend/src/middleware/upload.middleware.ts +++ b/backend/src/middleware/upload.middleware.ts @@ -5,9 +5,16 @@ import multer from 'multer'; import path from 'path'; import fs from 'fs'; +import { fileURLToPath } from 'url'; + +// 获取当前文件的目录 +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +// uploads 目录放在 backend 根目录下 +const uploadDir = path.join(__dirname, '..', '..', 'uploads'); // Ensure upload directory exists -const uploadDir = path.join(process.cwd(), 'uploads'); if (!fs.existsSync(uploadDir)) { fs.mkdirSync(uploadDir, { recursive: true }); } @@ -51,3 +58,6 @@ export const upload = multer({ fileSize: 10 * 1024 * 1024, // 10MB limit }, }); + +// 导出上传目录路径供其他模块使用 +export { uploadDir }; diff --git a/backend/src/routes/image.routes.ts b/backend/src/routes/image.routes.ts index ed9eeba..05ac52c 100644 --- a/backend/src/routes/image.routes.ts +++ b/backend/src/routes/image.routes.ts @@ -7,7 +7,8 @@ import { Router } from 'express'; import { ImageController } from '../controllers/image.controller'; import { authenticate } from '../middleware/auth.middleware'; import { upload } from '../middleware/upload.middleware'; -import { triggerOCRProcessing } from '../services/ocr-processor.service'; +import { triggerOCRProcessing, OCRProcessorService } from '../services/ocr-processor.service'; +import { OCRProviderType } from '../services/ocr-providers'; const router = Router(); @@ -32,6 +33,27 @@ router.get('/', authenticate, ImageController.getUserImages); */ router.get('/pending', authenticate, ImageController.getPending); +/** + * @route GET /api/images/ocr/providers + * @desc Get available OCR providers + * @access Private + */ +router.get('/ocr/providers', authenticate, async (_req, res) => { + try { + const providers = await OCRProcessorService.getAvailableProviders(); + res.json({ + success: true, + data: providers, + }); + } catch (error) { + const message = error instanceof Error ? error.message : '获取 OCR 提供商失败'; + res.status(500).json({ + success: false, + error: message, + }); + } +}); + /** * @route GET /api/images/:id * @desc Get image by ID @@ -43,14 +65,16 @@ router.get('/:id', authenticate, ImageController.getById); * @route POST /api/images/:id/reprocess * @desc Re-trigger OCR processing * @access Private + * @body { provider?: 'tesseract' | 'baidu' | 'rapidocr' | 'auto' } */ router.post('/:id/reprocess', authenticate, async (req, res) => { try { - const userId = req.user!.user_id; - const { id } = req.params; + const userId = typeof req.user!.user_id === 'string' ? req.user!.user_id : String(req.user!.user_id); + const id = typeof req.params.id === 'string' ? req.params.id : req.params.id[0]; + const { provider } = req.body; // 触发 OCR 处理 - triggerOCRProcessing(id, userId); + triggerOCRProcessing(id, userId, { provider: provider as OCRProviderType }); res.json({ success: true, diff --git a/backend/src/services/ocr-processor.service.ts b/backend/src/services/ocr-processor.service.ts index 7dc95c8..28ccd4e 100644 --- a/backend/src/services/ocr-processor.service.ts +++ b/backend/src/services/ocr-processor.service.ts @@ -1,20 +1,36 @@ /** * OCR Processor Service * 处理图片 OCR 识别的异步服务 + * 支持多种 OCR Provider: Tesseract.js, Baidu OCR, RapidOCR */ import { prisma } from '../lib/prisma'; import { ImageService } from './image.service'; -import fs from 'fs'; -import path from 'path'; +import { + OCRProviderFactory, + OCRProviderType, + IImageSource, +} from './ocr-providers'; + +export interface OCRProcessOptions { + /** 指定 OCR Provider */ + provider?: OCRProviderType; + /** 置信度阈值 */ + confidenceThreshold?: number; +} export class OCRProcessorService { /** * 处理图片的 OCR 识别 - * 注意:当前是模拟实现,返回占位符文本 - * 实际使用时需要集成 Tesseract.js 或其他 OCR 服务 + * @param imageId 图片 ID + * @param userId 用户 ID + * @param options 处理选项 */ - static async processImage(imageId: string, userId: string): Promise { + static async processImage( + imageId: string, + userId: string, + options: OCRProcessOptions = {} + ): Promise { try { // 更新状态为处理中 await prisma.image.update({ @@ -28,12 +44,14 @@ export class OCRProcessorService { throw new Error('Image not found'); } - // TODO: 集成真实的 OCR 服务 - // 当前使用模拟实现 - const ocrResult = await this.performOCRSimulated(image); + // 执行 OCR 识别 + const ocrResult = await this.performOCR(image, options); // 根据置信度决定状态 - const status = ocrResult.confidence >= 0.3 ? 'completed' : 'failed'; + const threshold = options.confidenceThreshold + ? parseFloat(options.confidenceThreshold.toString()) + : parseFloat(process.env.OCR_CONFIDENCE_THRESHOLD || '0.3'); + const status = ocrResult.confidence >= threshold ? 'completed' : 'failed'; await prisma.image.update({ where: { id: imageId }, @@ -57,109 +75,106 @@ export class OCRProcessorService { } /** - * 模拟 OCR 处理 - * 实际实现应该调用 Tesseract.js 或其他 OCR API + * 执行 OCR 识别 + * @param image 图片信息 + * @param options 处理选项 */ - private static async performOCRSimulated(image: any): Promise<{ - text: string; - confidence: number; - }> { - // 模拟处理延迟 - await new Promise(resolve => setTimeout(resolve, 2000)); + private static async performOCR( + image: any, + options: OCRProcessOptions = {} + ): Promise<{ text: string; confidence: number }> { + // 获取 OCR Provider + const providerType = options.provider || (process.env.OCR_PROVIDER as OCRProviderType) || 'auto'; + const provider = OCRProviderFactory.create(providerType); - // TODO: 实际 OCR 集成选项: - // 1. Tesseract.js (本地) - // import Tesseract from 'tesseract.js'; - // const { data: { text, confidence } } = await Tesseract.recognize(imagePath, 'chi_sim+eng'); - // - // 2. PaddleOCR (需要 Python 服务) - // const response = await fetch('http://localhost:5000/ocr', { - // method: 'POST', - // body: JSON.stringify({ image_path: imagePath }), - // }); - // - // 3. 云端 OCR API (百度/腾讯/阿里) + // 检查 provider 是否可用 + const available = await provider.isAvailable(); + if (!available) { + throw new Error( + `OCR Provider "${provider.getName()}" 不可用。` + + `请检查配置或安装相应的依赖。` + ); + } + + // 准备图片来源 + const source: IImageSource = { + path: image.file_path, + }; + + // 执行识别 + const result = await provider.recognize(source); + + console.log( + `[OCR] Provider: ${provider.getName()}, ` + + `Confidence: ${(result.confidence * 100).toFixed(1)}%, ` + + `Duration: ${result.duration}ms, ` + + `Text length: ${result.text.length}` + ); - // 模拟返回结果 return { - text: '[模拟 OCR 结果] 图片文字识别功能尚未集成。请在设置页面配置 OCR 服务后重试。', - confidence: 0.5, + text: result.text, + confidence: result.confidence, }; } /** - * 使用 Tesseract.js 进行 OCR 识别(需要安装依赖) + * 获取所有可用的 OCR Providers */ - private static async performOCRWithTesseract(imagePath: string): Promise<{ - text: string; - confidence: number; - }> { - // 动态导入 Tesseract(如果已安装) - try { - const Tesseract = await import('tesseract.js'); - - // 检查文件是否存在 - const fullPath = path.join(process.cwd(), imagePath.replace('/uploads/', 'uploads/')); - if (!fs.existsSync(fullPath)) { - throw new Error(`Image file not found: ${fullPath}`); - } - - const result = await Tesseract.recognize(fullPath, 'chi_sim+eng', { - logger: (m: any) => console.log(m), - }); - - return { - text: result.data.text, - confidence: result.data.confidence / 100, // Tesseract 返回 0-100,需要转换为 0-1 - }; - } catch (error) { - // 如果 Tesseract 未安装,返回模拟结果 - console.warn('Tesseract.js not installed, using simulated OCR:', error); - return this.performOCRSimulated(null); - } + static async getAvailableProviders(): Promise< + Array<{ type: string; name: string; available: boolean; typeDesc: string }> + > { + return OCRProviderFactory.getAvailableProviders(); } /** - * 调用外部 OCR API(示例) + * 测试指定的 OCR Provider */ - private static async performOCRWithAPI(imagePath: string): Promise<{ - text: string; - confidence: number; + static async testProvider( + providerType: OCRProviderType, + imagePath: string + ): Promise<{ + success: boolean; + result?: { text: string; confidence: number; duration: number }; + error?: string; }> { - // 示例:调用百度 OCR API - // const apiKey = process.env.BAIDU_OCR_API_KEY; - // const secretKey = process.env.BAIDU_OCR_SECRET_KEY; - // - // // 获取 access token - // const tokenResponse = await fetch(`https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${apiKey}&client_secret=${secretKey}`); - // const { access_token } = await tokenResponse.json(); - // - // // 读取图片并转为 base64 - // const imageBuffer = fs.readFileSync(imagePath); - // const imageBase64 = imageBuffer.toString('base64'); - // - // // 调用 OCR API - // const ocrResponse = await fetch(`https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=${access_token}`, { - // method: 'POST', - // headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, - // body: `image=${encodeURIComponent(imageBase64)}`, - // }); - // - // const result = await ocrResponse.json(); - // - // return { - // text: result.words_result?.map((w: any) => w.words).join('\n') || '', - // confidence: (result.words_result?.[0]?.probability?.average || 0.5) / 100, - // }; + try { + const provider = OCRProviderFactory.create(providerType); + const available = await provider.isAvailable(); - throw new Error('OCR API not configured'); + if (!available) { + return { + success: false, + error: `Provider "${provider.getName()}" 不可用`, + }; + } + + const result = await provider.recognize({ path: imagePath }); + + return { + success: true, + result: { + text: result.text, + confidence: result.confidence, + duration: result.duration || 0, + }, + }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + }; + } } } // 导出异步处理函数(用于在后台触发 OCR) -export const triggerOCRProcessing = async (imageId: string, userId: string) => { +export const triggerOCRProcessing = async ( + imageId: string, + userId: string, + options?: OCRProcessOptions +) => { // 不等待完成,在后台处理 - OCRProcessorService.processImage(imageId, userId).catch(error => { + OCRProcessorService.processImage(imageId, userId, options).catch((error) => { console.error('OCR processing failed:', error); }); }; diff --git a/backend/src/services/ocr-providers/baidu.provider.ts b/backend/src/services/ocr-providers/baidu.provider.ts new file mode 100644 index 0000000..56717be --- /dev/null +++ b/backend/src/services/ocr-providers/baidu.provider.ts @@ -0,0 +1,187 @@ +/** + * Baidu OCR Provider + * 特点:准确率高、速度快、国内访问快 + * 缺点:需要付费 (有免费额度)、需要网络连接 + * 官方文档: https://cloud.baidu.com/doc/OCR/index.html + */ + +import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider'; +import fs from 'fs'; + +interface BaiduOCRResponse { + words_result: Array<{ + words: string; + probability?: { + average: number; + variance: number; + min: number; + }; + }>; + error_code?: number; + error_msg?: string; +} + +interface BaiduTokenResponse { + access_token: string; + expires_in: number; +} + +export class BaiduProvider extends BaseOCRProvider { + private apiKey: string; + private secretKey: string; + private accessToken: string | null = null; + private tokenExpireTime: number = 0; + private apiUrl = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic'; + + constructor(config: OCRProviderConfig & { apiKey?: string; secretKey?: string } = {}) { + super(config); + this.apiKey = config.apiKey || process.env.BAIDU_OCR_API_KEY || ''; + this.secretKey = config.secretKey || process.env.BAIDU_OCR_SECRET_KEY || ''; + } + + getName(): string { + return 'Baidu OCR'; + } + + getType(): 'local' | 'cloud' { + return 'cloud'; + } + + /** + * 检查配置是否完整 + */ + async isAvailable(): Promise { + return !!(this.apiKey && this.secretKey); + } + + /** + * 获取访问令牌 + */ + private async getAccessToken(): Promise { + // 检查缓存 + if (this.accessToken && Date.now() < this.tokenExpireTime) { + return this.accessToken; + } + + const url = `https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=${this.apiKey}&client_secret=${this.secretKey}`; + + const response = await fetch(url); + const data = (await response.json()) as BaiduTokenResponse; + + if (!data.access_token) { + throw new Error('获取百度 OCR access_token 失败'); + } + + this.accessToken = data.access_token; + this.tokenExpireTime = Date.now() + (data.expires_in - 300) * 1000; // 提前 5 分钟过期 + + return this.accessToken; + } + + /** + * 执行 OCR 识别 + */ + async recognize( + source: IImageSource, + options?: OCRProviderConfig + ): Promise { + const startTime = Date.now(); + + // 检查配置 + const available = await this.isAvailable(); + if (!available) { + throw new Error('百度 OCR 未配置。请设置 BAIDU_OCR_API_KEY 和 BAIDU_OCR_SECRET_KEY 环境变量'); + } + + // 获取图片 Base64 + const imageBase64 = await this.getImageBase64(source); + + // 获取访问令牌 + const token = await this.getAccessToken(); + + // 调用 OCR API + const response = await this.withTimeout( + fetch(`${this.apiUrl}?access_token=${token}`, { + method: 'POST', + headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, + body: `image=${encodeURIComponent(imageBase64)}`, + }), + options?.timeout || this.config.timeout || 10000 + ); + + const data = (await response.json()) as BaiduOCRResponse; + const duration = Date.now() - startTime; + + // 检查错误 + if (data.error_code) { + throw new Error(`百度 OCR 错误: ${data.error_msg} (${data.error_code})`); + } + + // 提取文本和置信度 + const words = data.words_result || []; + const text = words.map((w) => w.words).join('\n'); + + // 计算平均置信度 + let confidence = 0.9; // 默认置信度 + if (words.length > 0 && words[0].probability) { + const sum = words.reduce((acc, w) => acc + (w.probability?.average || 0), 0); + confidence = (sum / words.length) / 100; + } + + return { + text: text.trim(), + confidence, + duration, + extra: { + provider: 'baidu', + wordCount: words.length, + }, + }; + } + + getRecommendations() { + return { + maxImageSize: 4 * 1024 * 1024, // 4MB 限制 + supportedFormats: ['jpg', 'jpeg', 'png', 'bmp'], + notes: '百度 OCR 标准版 QPS 限制为 2,每日免费额度 1000 次。适合高精度需求场景。', + }; + } + + /** + * 获取图片 Base64 + */ + private async getImageBase64(source: IImageSource): Promise { + if (source.base64) { + // 移除 data URL 前缀 + return source.base64.replace(/^data:image\/\w+;base64,/, ''); + } + + if (source.buffer) { + return source.buffer.toString('base64'); + } + + if (source.path) { + // 使用基类的路径解析方法 + const fullPath = this.resolveImagePath(source.path); + const buffer = fs.readFileSync(fullPath); + return buffer.toString('base64'); + } + + throw new Error('无效的图片来源'); + } + + /** + * 超时包装 + */ + private async withTimeout(promise: Promise, timeout: number): Promise { + return Promise.race([ + promise, + new Promise((_, reject) => + setTimeout(() => reject(new Error('timeout')), timeout) + ), + ]); + } +} + +// 导出单例实例 +export const baiduProvider = new BaiduProvider(); diff --git a/backend/src/services/ocr-providers/base.provider.ts b/backend/src/services/ocr-providers/base.provider.ts new file mode 100644 index 0000000..ffba25e --- /dev/null +++ b/backend/src/services/ocr-providers/base.provider.ts @@ -0,0 +1,127 @@ +/** + * OCR Provider Base Interface + * OCR 提供商基础接口 + */ + +import { resolveImagePath } from '../../lib/path'; + +export interface OCRRecognitionResult { + /** 识别的文本内容 */ + text: string; + /** 置信度 (0-1) */ + confidence: number; + /** 处理耗时 (毫秒) */ + duration?: number; + /** 额外信息 */ + extra?: Record; +} + +export interface OCRProviderConfig { + /** 超时时间 (毫秒) */ + timeout?: number; + /** 语言代码 (chi_sim, eng 等) */ + language?: string; + /** 额外配置 */ + extras?: Record; +} + +export interface IImageSource { + /** 图片本地路径 */ + path?: string; + /** 图片 Buffer */ + buffer?: Buffer; + /** 图片 Base64 */ + base64?: string; + /** 图片 URL */ + url?: string; +} + +/** + * OCR Provider 抽象基类 + * 所有 OCR 提供商都需要实现此接口 + */ +export abstract class BaseOCRProvider { + protected config: OCRProviderConfig; + + constructor(config: OCRProviderConfig = {}) { + this.config = { + timeout: 30000, + language: 'chi_sim+eng', + ...config, + }; + } + + /** + * Provider 名称 + */ + abstract getName(): string; + + /** + * Provider 类型 (local | cloud) + */ + abstract getType(): 'local' | 'cloud'; + + /** + * 检查 Provider 是否可用 + */ + abstract isAvailable(): Promise | boolean; + + /** + * 执行 OCR 识别 + * @param source 图片来源 (路径/Buffer/Base64/URL) + * @param options 可选配置 + */ + abstract recognize( + source: IImageSource, + options?: OCRProviderConfig + ): Promise; + + /** + * 批量识别 + */ + async batchRecognize( + sources: IImageSource[], + options?: OCRProviderConfig + ): Promise { + const results: OCRRecognitionResult[] = []; + + for (const source of sources) { + try { + const result = await this.recognize(source, options); + results.push(result); + } catch (error) { + results.push({ + text: '', + confidence: 0, + duration: 0, + extra: { error: error instanceof Error ? error.message : String(error) }, + }); + } + } + + return results; + } + + /** + * 获取推荐配置 + */ + getRecommendations(): { + maxImageSize?: number; + supportedFormats?: string[]; + notes?: string; + } { + return { + maxImageSize: 10 * 1024 * 1024, // 10MB + supportedFormats: ['jpg', 'jpeg', 'png', 'webp', 'bmp', 'gif'], + notes: '建议图片分辨率不低于 300dpi', + }; + } + + /** + * 解析图片路径 + * 将数据库路径转换为绝对路径 + */ + protected resolveImagePath(imagePath: string): string { + return resolveImagePath(imagePath); + } +} diff --git a/backend/src/services/ocr-providers/index.ts b/backend/src/services/ocr-providers/index.ts new file mode 100644 index 0000000..fbb2873 --- /dev/null +++ b/backend/src/services/ocr-providers/index.ts @@ -0,0 +1,100 @@ +/** + * OCR Providers Module + * 导出所有 OCR 提供商 + */ + +export { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider'; +export { TesseractProvider, tesseractProvider } from './tesseract.provider'; +export { BaiduProvider, baiduProvider } from './baidu.provider'; +export { RapidOCRProvider, rapidocrProvider } from './rapidocr.provider'; + +import { TesseractProvider, BaiduProvider, RapidOCRProvider } from './index'; + +/** + * OCR Provider 类型 + */ +export type OCRProviderType = 'tesseract' | 'baidu' | 'rapidocr' | 'auto'; + +/** + * OCR Provider 工厂 + * 根据 provider 类型返回对应的实例 + */ +export class OCRProviderFactory { + private static providers = { + tesseract: TesseractProvider, + baidu: BaiduProvider, + rapidocr: RapidOCRProvider, + }; + + /** + * 创建 Provider 实例 + */ + static create( + type: OCRProviderType, + config?: any + ): TesseractProvider | BaiduProvider | RapidOCRProvider { + if (type === 'auto') { + // 自动选择可用的 provider + return this.autoSelect(); + } + + const ProviderClass = this.providers[type]; + if (!ProviderClass) { + throw new Error(`未知的 OCR provider: ${type}`); + } + + return new ProviderClass(config); + } + + /** + * 自动选择可用的 provider + * 优先级: RapidOCR > Tesseract > Baidu + */ + private static autoSelect(): TesseractProvider | BaiduProvider | RapidOCRProvider { + const envProvider = process.env.OCR_PROVIDER as OCRProviderType; + + // 如果指定了 provider 且不是 auto,使用指定的 + if (envProvider && envProvider !== 'auto' && this.providers[envProvider]) { + const ProviderClass = this.providers[envProvider]; + return new ProviderClass(); + } + + // 检查可用性并选择 + // RapidOCR (本地快速) + if (process.env.RAPIDOCR_API_URL) { + return new RapidOCRProvider(); + } + + // Baidu OCR (云端准确) + if (process.env.BAIDU_OCR_API_KEY && process.env.BAIDU_OCR_SECRET_KEY) { + return new BaiduProvider(); + } + + // Tesseract.js (本地兜底) + return new TesseractProvider(); + } + + /** + * 获取所有可用的 providers + */ + static async getAvailableProviders(): Promise< + Array<{ type: string; name: string; available: boolean; typeDesc: string }> + > { + const providers = [ + { type: 'rapidocr', name: 'RapidOCR', instance: new RapidOCRProvider(), typeDesc: '本地快速准确' }, + { type: 'baidu', name: 'Baidu OCR', instance: new BaiduProvider(), typeDesc: '云端准确' }, + { type: 'tesseract', name: 'Tesseract.js', instance: new TesseractProvider(), typeDesc: '本地轻量' }, + ]; + + const results = await Promise.all( + providers.map(async (p) => ({ + type: p.type, + name: p.name, + available: await p.instance.isAvailable(), + typeDesc: p.typeDesc, + })) + ); + + return results; + } +} diff --git a/backend/src/services/ocr-providers/rapidocr.provider.ts b/backend/src/services/ocr-providers/rapidocr.provider.ts new file mode 100644 index 0000000..2af4422 --- /dev/null +++ b/backend/src/services/ocr-providers/rapidocr.provider.ts @@ -0,0 +1,167 @@ +/** + * RapidOCR Provider + * 特点:速度快、准确率高、免费、本地运行 + * 基于 PaddleOCR,中文识别效果优秀 + * + * 部署方式: + * 1. 安装 Python 服务: pip install rapidocr-onnxruntime + * 2. 启动服务 (参考 rapidocr_server 项目) + * 3. 或使用 Docker: docker run -p 8080:8080 cshgg/rapidocr + * + * GitHub: https://github.com/RapidAI/RapidOCR + */ + +import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider'; +import fs from 'fs'; + +interface RapidOCRResponse { + code: number; + msg: string; + data: Array<{ + text: string; + score: number; + box: number[][]; + }>; +} + +interface RapidOCRRequest { + images: string[]; + options?: { + use_dilation?: boolean; + use_cls?: boolean; + use_tensorrt?: boolean; + }; +} + +export class RapidOCRProvider extends BaseOCRProvider { + private apiUrl: string; + + constructor(config: OCRProviderConfig & { apiUrl?: string } = {}) { + super(config); + this.apiUrl = config.apiUrl || process.env.RAPIDOCR_API_URL || 'http://localhost:8080'; + } + + getName(): string { + return 'RapidOCR'; + } + + getType(): 'local' | 'cloud' { + return 'local'; + } + + /** + * 检查 RapidOCR 服务是否可用 + */ + async isAvailable(): Promise { + try { + const response = await fetch(`${this.apiUrl}/health`, { + signal: AbortSignal.timeout(2000), + }); + return response.ok; + } catch { + return false; + } + } + + /** + * 执行 OCR 识别 + */ + async recognize( + source: IImageSource, + options?: OCRProviderConfig + ): Promise { + const startTime = Date.now(); + + // 获取图片 Base64 + const imageBase64 = await this.getImageBase64(source); + + // 调用 RapidOCR API + const response = await this.withTimeout( + fetch(`${this.apiUrl}/ocr`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + images: [imageBase64], + options: { + use_dilation: true, // 使用膨胀增强识别 + use_cls: true, // 使用文字方向分类 + }, + } as RapidOCRRequest), + }), + options?.timeout || this.config.timeout || 15000 + ); + + const data = (await response.json()) as RapidOCRResponse; + const duration = Date.now() - startTime; + + // 检查错误(支持两种错误格式) + if (data.code !== 200 && 'error' in data) { + throw new Error(`RapidOCR 错误: ${(data as any).error || data.msg} (${data.code})`); + } + + // 提取文本和置信度(确保 data.data 存在) + const ocrResults = Array.isArray(data.data) ? data.data : []; + const text = ocrResults.map((r) => r.text).join('\n'); + + // 计算平均置信度 + const confidence = ocrResults.length > 0 + ? ocrResults.reduce((acc, r) => acc + (r.score || 0), 0) / ocrResults.length + : 0; + + return { + text: text.trim(), + confidence, + duration, + extra: { + provider: 'rapidocr', + textCount: ocrResults.length, + }, + }; + } + + getRecommendations() { + return { + maxImageSize: 10 * 1024 * 1024, + supportedFormats: ['jpg', 'jpeg', 'png', 'webp', 'bmp'], + notes: 'RapidOCR 是基于 PaddleOCR 的本地服务,速度快且准确率高。需要先启动 RapidOCR 服务。', + }; + } + + /** + * 获取图片 Base64 + */ + private async getImageBase64(source: IImageSource): Promise { + if (source.base64) { + // 移除 data URL 前缀 + return source.base64.replace(/^data:image\/\w+;base64,/, ''); + } + + if (source.buffer) { + return source.buffer.toString('base64'); + } + + if (source.path) { + // 使用基类的路径解析方法 + const fullPath = this.resolveImagePath(source.path); + const buffer = fs.readFileSync(fullPath); + return buffer.toString('base64'); + } + + throw new Error('无效的图片来源'); + } + + /** + * 超时包装 + */ + private async withTimeout(promise: Promise, timeout: number): Promise { + return Promise.race([ + promise, + new Promise((_, reject) => + setTimeout(() => reject(new Error('timeout')), timeout) + ), + ]); + } +} + +// 导出单例实例 +export const rapidocrProvider = new RapidOCRProvider(); diff --git a/backend/src/services/ocr-providers/tesseract.provider.ts b/backend/src/services/ocr-providers/tesseract.provider.ts new file mode 100644 index 0000000..0df1174 --- /dev/null +++ b/backend/src/services/ocr-providers/tesseract.provider.ts @@ -0,0 +1,166 @@ +/** + * Tesseract.js OCR Provider + * 特点:免费、本地运行、支持多语言 + * 缺点:速度较慢、准确率中等 + */ + +import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider'; +import fs from 'fs'; + +interface TesseractModule { + recognize: ( + image: string | Buffer, + lang: string, + options?: { logger?: (m: any) => void } + ) => Promise<{ data: { text: string; confidence: number } }>; +} + +export class TesseractProvider extends BaseOCRProvider { + private tesseract: TesseractModule | null = null; + private initialized = false; + + constructor(config: OCRProviderConfig = {}) { + super(config); + } + + getName(): string { + return 'Tesseract.js'; + } + + getType(): 'local' | 'cloud' { + return 'local'; + } + + /** + * 检查 Tesseract.js 是否已安装 + */ + async isAvailable(): Promise { + if (this.initialized) { + return this.tesseract !== null; + } + + try { + const module = await import('tesseract.js'); + // tesseract.js 是默认导出,需要使用 .default + this.tesseract = (module as any).default || module; + this.initialized = true; + return true; + } catch { + this.initialized = true; + return false; + } + } + + /** + * 执行 OCR 识别 + */ + async recognize( + source: IImageSource, + options?: OCRProviderConfig + ): Promise { + const startTime = Date.now(); + + // 确定语言 + const language = options?.language || this.config.language || 'chi_sim+eng'; + + // 获取图片数据 + const imageData = await this.getImageData(source); + + // 动态导入 tesseract.js + try { + const tesseractModule = await import('tesseract.js'); + const Tesseract = (tesseractModule as any).default || tesseractModule; + + const result = await this.withTimeout( + Tesseract.recognize(imageData, language, { + logger: (m: any) => { + if (m.status === 'recognizing text') { + // 可选:记录进度 + // console.log(`Tesseract progress: ${(m.progress * 100).toFixed(0)}%`); + } + }, + }), + options?.timeout || this.config.timeout || 30000 + ) as { data: { text: string; confidence: number } }; + + const duration = Date.now() - startTime; + + return { + text: this.cleanText(result.data.text), + confidence: result.data.confidence / 100, // Tesseract 返回 0-100 + duration, + extra: { + provider: 'tesseract.js', + language, + }, + }; + } catch (error) { + if (error instanceof Error && error.message === 'timeout') { + throw new Error('OCR 识别超时'); + } + throw error; + } + } + + getRecommendations() { + return { + ...super.getRecommendations(), + notes: '首次运行会下载语言包 (约 20MB),后续运行会更快。适合少量图片处理。', + }; + } + + /** + * 获取图片数据 (路径或 Buffer) + */ + private async getImageData(source: IImageSource): Promise { + if (source.buffer) { + return source.buffer; + } + + if (source.path) { + // 使用基类的路径解析方法 + const fullPath = this.resolveImagePath(source.path); + + if (!fs.existsSync(fullPath)) { + throw new Error(`图片文件不存在: ${fullPath}`); + } + + return fullPath; + } + + if (source.base64) { + // Tesseract.js 支持 Base64,需要添加 data URL 前缀 + const base64Data = source.base64.startsWith('data:') + ? source.base64 + : `data:image/png;base64,${source.base64}`; + return base64Data; + } + + throw new Error('无效的图片来源'); + } + + /** + * 超时包装 + */ + private async withTimeout(promise: Promise, timeout: number): Promise { + return Promise.race([ + promise, + new Promise((_, reject) => + setTimeout(() => reject(new Error('timeout')), timeout) + ), + ]); + } + + /** + * 清理识别结果文本 + */ + private cleanText(text: string): string { + return text + .replace(/\s+/g, ' ') // 多个空格合并为一个 + .replace(/\n\s*\n/g, '\n\n') // 多个空行合并 + .trim(); + } +} + +// 导出单例实例 +export const tesseractProvider = new TesseractProvider(); diff --git a/backend/src/types/tesseract.d.ts b/backend/src/types/tesseract.d.ts new file mode 100644 index 0000000..26529b5 --- /dev/null +++ b/backend/src/types/tesseract.d.ts @@ -0,0 +1,15 @@ +// Type declarations for optional tesseract.js dependency +declare module 'tesseract.js' { + export interface TesseractResult { + data: { + text: string; + confidence: number; + }; + } + + export function recognize( + image: string | Buffer, + lang: string, + options?: { logger?: (m: any) => void } + ): Promise; +} diff --git a/backend/tsconfig.json b/backend/tsconfig.json index 75e21cf..3f8e1f4 100644 --- a/backend/tsconfig.json +++ b/backend/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { "target": "ES2022", - "module": "commonjs", + "module": "es2020", "lib": ["ES2022"], "outDir": "./dist", "rootDir": "./src", diff --git a/frontend/src/hooks/useImages.ts b/frontend/src/hooks/useImages.ts index 3ead802..cb81499 100644 --- a/frontend/src/hooks/useImages.ts +++ b/frontend/src/hooks/useImages.ts @@ -81,3 +81,23 @@ export function useDeleteImage() { }, }); } + +export function useReprocessImage() { + const queryClient = useQueryClient(); + + return useMutation({ + mutationFn: ({ id, provider }: { id: string; provider?: string }) => + ImageService.reprocess(id, provider), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['images'] }); + }, + }); +} + +export function useOCRProviders() { + return useQuery({ + queryKey: ['ocr-providers'], + queryFn: () => ImageService.getOCRProviders(), + staleTime: 5 * 60 * 1000, // 5 分钟内不重新获取 + }); +} diff --git a/frontend/src/pages/ImagesPage.tsx b/frontend/src/pages/ImagesPage.tsx index 4df7208..b7e87f1 100644 --- a/frontend/src/pages/ImagesPage.tsx +++ b/frontend/src/pages/ImagesPage.tsx @@ -1,8 +1,14 @@ import { useState, useRef, useEffect } from 'react'; -import { useImages, usePendingImages, useUploadImageFile } from '@/hooks/useImages'; +import { + useImages, + usePendingImages, + useUploadImageFile, + useReprocessImage, + useOCRProviders, +} from '@/hooks/useImages'; import { Button } from '@/components/Button'; import { Card } from '@/components/Card'; -import { Upload, Camera, FileText, CheckSquare, X, RefreshCw } from 'lucide-react'; +import { Upload, Camera, FileText, CheckSquare, X, RefreshCw, ChevronDown, Settings } from 'lucide-react'; import type { Image } from '@/types'; import { useDeleteImage } from '@/hooks/useImages'; @@ -16,18 +22,39 @@ const getImageUrl = (path: string) => { return `${API_BASE_URL}${path}`; }; +// OCR Provider 显示名称映射 +const PROVIDER_NAMES: Record = { + tesseract: 'Tesseract.js', + baidu: '百度 OCR', + rapidocr: 'RapidOCR', + auto: '自动选择', +}; + +// OCR Provider 描述 +const PROVIDER_DESCRIPTIONS: Record = { + tesseract: '本地轻量,免费', + baidu: '云端准确,有免费额度', + rapidocr: '本地快速准确,推荐', + auto: '自动选择可用的服务', +}; + export default function ImagesPage() { const { data: images, refetch } = useImages(); const { data: pendingImages } = usePendingImages(); + const { data: providers } = useOCRProviders(); const uploadMutation = useUploadImageFile(); const deleteMutation = useDeleteImage(); + const reprocessMutation = useReprocessImage(); const fileInputRef = useRef(null); + // Provider 选择状态 + const [showProviderMenu, setShowProviderMenu] = useState(null); + // OCR 处理状态轮询 useEffect(() => { // 检查是否有处理中的图片 - const hasPendingImages = images?.some(img => - img.processing_status === 'pending' || img.processing_status === 'processing' + const hasPendingImages = images?.some( + (img) => img.processing_status === 'pending' || img.processing_status === 'processing' ); if (hasPendingImages) { @@ -39,12 +66,19 @@ export default function ImagesPage() { } }, [images, refetch]); + // 点击外部关闭菜单 + useEffect(() => { + const handleClickOutside = () => setShowProviderMenu(null); + document.addEventListener('click', handleClickOutside); + return () => document.removeEventListener('click', handleClickOutside); + }, []); + const handleFileSelect = async (e: React.ChangeEvent) => { const files = e.target.files; if (!files || files.length === 0) return; // 验证并上传所有文件 - const validFiles = Array.from(files).filter(file => { + const validFiles = Array.from(files).filter((file) => { if (!file.type.startsWith('image/')) { alert(`文件 "${file.name}" 不是图片文件`); return false; @@ -123,30 +157,126 @@ export default function ImagesPage() { } }; + const handleReprocess = async (id: string, provider?: string, e?: React.MouseEvent) => { + e?.stopPropagation(); + try { + await reprocessMutation.mutateAsync({ id, provider }); + setShowProviderMenu(null); + } catch (err: any) { + alert(err.message || '重新处理失败'); + } + }; + const getStatusLabel = (status: string) => { switch (status) { - case 'completed': return '已完成'; - case 'pending': return '等待处理'; - case 'processing': return 'OCR 处理中'; - case 'failed': return '处理失败'; - default: return status; + case 'completed': + return '已完成'; + case 'pending': + return '等待处理'; + case 'processing': + return 'OCR 处理中'; + case 'failed': + return '处理失败'; + default: + return status; } }; const getStatusColor = (status: string) => { switch (status) { - case 'completed': return 'bg-green-100 text-green-800'; - case 'pending': return 'bg-yellow-100 text-yellow-800'; - case 'processing': return 'bg-blue-100 text-blue-800'; - case 'failed': return 'bg-red-100 text-red-800'; - default: return 'bg-gray-100 text-gray-800'; + case 'completed': + return 'bg-green-100 text-green-800'; + case 'pending': + return 'bg-yellow-100 text-yellow-800'; + case 'processing': + return 'bg-blue-100 text-blue-800'; + case 'failed': + return 'bg-red-100 text-red-800'; + default: + return 'bg-gray-100 text-gray-800'; } }; - const hasProcessingImages = images?.some(img => - img.processing_status === 'pending' || img.processing_status === 'processing' + // 获取可用的 provider 列表 + const availableProviders = providers?.filter((p) => p.available) || []; + const hasProviders = availableProviders.length > 0; + + const hasProcessingImages = images?.some( + (img) => img.processing_status === 'pending' || img.processing_status === 'processing' ); + // OCR 重新处理按钮组件 + const ReprocessButton = ({ image }: { image: Image }) => { + const isProcessing = image.processing_status === 'processing'; + const canReprocess = image.processing_status === 'completed' || image.processing_status === 'failed'; + + if (!canReprocess) return null; + + return ( +
+ + + {/* Provider 选择菜单 */} + {hasProviders && showProviderMenu === image.id && ( +
e.stopPropagation()} + > +
+

选择 OCR 引擎

+
+
+ {/* 自动选择 */} + + {/* 可用的 providers */} + {availableProviders.map((provider) => ( + + ))} +
+
+ )} +
+ ); + }; + return (
@@ -154,12 +284,22 @@ export default function ImagesPage() {

图片管理

上传和管理您的图片

- {hasProcessingImages && ( -
- - OCR 处理中... -
- )} +
+ {hasProviders && ( +
+ + + 可用 OCR: {availableProviders.map((p) => p.name).join(', ')} + +
+ )} + {hasProcessingImages && ( +
+ + OCR 处理中... +
+ )} +
{/* Upload Actions */} @@ -199,7 +339,11 @@ export default function ImagesPage() {

屏幕截图

使用系统截图功能

- @@ -216,9 +360,7 @@ export default function ImagesPage() { className="rounded-lg border border-yellow-200 bg-yellow-50 p-4" >
- - 等待处理 - + 等待处理 {new Date(image.created_at).toLocaleString()} @@ -272,18 +414,21 @@ export default function ImagesPage() { {/* 状态标签 */}
{image.processing_status === 'processing' && ( )} {getStatusLabel(image.processing_status)} - {image.ocr_confidence !== null && image.ocr_confidence !== undefined && ( - - 置信度: {Math.round(image.ocr_confidence * 100)}% - - )} + {image.ocr_confidence !== null && + image.ocr_confidence !== undefined && ( + + 置信度: {Math.round(image.ocr_confidence * 100)}% + + )}
{/* OCR 结果 */} @@ -295,17 +440,15 @@ export default function ImagesPage() { {/* 错误信息 */} {image.error_message && ( -

- {image.error_message} -

+

{image.error_message}

)} {/* 操作按钮 */} -
+
+ + {image.document_id ? ( - diff --git a/frontend/src/pages/SettingsPage.tsx b/frontend/src/pages/SettingsPage.tsx index 23afa83..d6bf800 100644 --- a/frontend/src/pages/SettingsPage.tsx +++ b/frontend/src/pages/SettingsPage.tsx @@ -1,153 +1,750 @@ -import { useState } from 'react'; +import { useState, useEffect } from 'react'; import { Button } from '@/components/Button'; import { Input } from '@/components/Input'; import { Card } from '@/components/Card'; -import { Settings, Save, Eye, EyeOff } from 'lucide-react'; +import { Settings, Save, CheckCircle, XCircle, Eye, EyeOff, Server, Globe, Database, Sparkles } from 'lucide-react'; + +// 从环境变量或 localStorage 获取 API 地址 +const getDefaultApiUrl = () => { + return import.meta.env.VITE_API_URL || localStorage.getItem('api_base_url') || 'http://localhost:4000'; +}; + +type ApiConfig = { + baseUrl: string; +}; + +type OCRConfig = { + provider: 'auto' | 'tesseract' | 'baidu' | 'tencent' | 'rapidocr'; + confidenceThreshold: number; + baiduApiKey: string; + baiduSecretKey: string; + tencentSecretId: string; + tencentSecretKey: string; + rapidocrUrl: string; +}; + +type AIConfig = { + defaultProvider: 'glm' | 'minimax' | 'deepseek' | 'kimi' | 'openai' | 'anthropic'; + // GLM (智谱AI) + glmApiKey: string; + glmApiUrl: string; + glmModel: string; + // MiniMax + minimaxApiKey: string; + minimaxApiUrl: string; + minimaxModel: string; + // DeepSeek + deepseekApiKey: string; + deepseekApiUrl: string; + deepseekModel: string; + // Kimi (月之暗面) + kimiApiKey: string; + kimiApiUrl: string; + kimiModel: string; + // OpenAI + openaiApiKey: string; + openaiApiUrl: string; + openaiModel: string; + // Anthropic + anthropicApiKey: string; + anthropicApiUrl: string; + anthropicModel: string; +}; + +const defaultApiConfig: ApiConfig = { + baseUrl: getDefaultApiUrl(), +}; + +const defaultOCRConfig: OCRConfig = { + provider: 'auto', + confidenceThreshold: 0.3, + baiduApiKey: '', + baiduSecretKey: '', + tencentSecretId: '', + tencentSecretKey: '', + rapidocrUrl: 'http://localhost:8080', +}; + +const defaultAIConfig: AIConfig = { + defaultProvider: 'glm', + glmApiKey: '', + glmApiUrl: 'https://open.bigmodel.cn/api/paas/v4/chat/completions', + glmModel: 'glm-4-flash', + minimaxApiKey: '', + minimaxApiUrl: 'https://api.minimax.chat/v1/chat/completions', + minimaxModel: 'abab6.5s-chat', + deepseekApiKey: '', + deepseekApiUrl: 'https://api.deepseek.com/v1/chat/completions', + deepseekModel: 'deepseek-chat', + kimiApiKey: '', + kimiApiUrl: 'https://api.moonshot.cn/v1/chat/completions', + kimiModel: 'moonshot-v1-8k', + openaiApiKey: '', + openaiApiUrl: 'https://api.openai.com/v1/chat/completions', + openaiModel: 'gpt-4o-mini', + anthropicApiKey: '', + anthropicApiUrl: 'https://api.anthropic.com/v1/messages', + anthropicModel: 'claude-3-5-sonnet-20241022', +}; + +type TabType = 'api' | 'ocr' | 'ai'; export default function SettingsPage() { - const [showApiKeys, setShowApiKeys] = useState(false); + const [apiConfig, setApiConfig] = useState(defaultApiConfig); + const [ocrConfig, setOcrConfig] = useState(defaultOCRConfig); + const [aiConfig, setAiConfig] = useState(defaultAIConfig); + const [showSecrets, setShowSecrets] = useState>({}); const [saving, setSaving] = useState(false); + const [saved, setSaved] = useState(false); + const [testing, setTesting] = useState(null); + const [testResults, setTestResults] = useState>({}); + const [availableProviders, setAvailableProviders] = useState>([]); + const [activeTab, setActiveTab] = useState('api'); - // 表单状态 - const [ocrProvider, setOcrProvider] = useState('tesseract'); - const [aiProvider, setAiProvider] = useState('glm'); - const [glmApiKey, setGlmApiKey] = useState(''); - const [minimaxApiKey, setMinimaxApiKey] = useState(''); - const [deepseekApiKey, setDeepseekApiKey] = useState(''); + // 加载配置和可用服务 + useEffect(() => { + // 加载 API 配置 + const savedApiConfig = localStorage.getItem('api_config'); + if (savedApiConfig) { + try { + setApiConfig(JSON.parse(savedApiConfig)); + } catch (e) { + console.error('Failed to load API config', e); + } + } + + // 加载 OCR 配置 + const savedOcrConfig = localStorage.getItem('ocr_config'); + if (savedOcrConfig) { + try { + setOcrConfig(JSON.parse(savedOcrConfig)); + } catch (e) { + console.error('Failed to load OCR config', e); + } + } + + // 加载 AI 配置 + const savedAiConfig = localStorage.getItem('ai_config'); + if (savedAiConfig) { + try { + setAiConfig(JSON.parse(savedAiConfig)); + } catch (e) { + console.error('Failed to load AI config', e); + } + } + + // 获取可用的 OCR 提供商 + fetchProviders(); + }, []); + + const fetchProviders = async () => { + try { + const token = localStorage.getItem('auth_token'); + const response = await fetch(`${apiConfig.baseUrl}/api/images/ocr/providers`, { + headers: token ? { Authorization: `Bearer ${token}` } : {}, + }); + const data = await response.json(); + if (data.success) { + setAvailableProviders(data.data || []); + } + } catch (error) { + console.error('Failed to fetch providers', error); + } + }; const handleSave = async () => { setSaving(true); try { - // TODO: 保存配置到后端 - console.log('Saving settings:', { - ocrProvider, - aiProvider, - glmApiKey, - minimaxApiKey, - deepseekApiKey, + // 保存所有配置到 localStorage + localStorage.setItem('api_config', JSON.stringify(apiConfig)); + localStorage.setItem('api_base_url', apiConfig.baseUrl); + localStorage.setItem('ocr_config', JSON.stringify(ocrConfig)); + localStorage.setItem('ai_config', JSON.stringify(aiConfig)); + + // TODO: 保存到后端用户配置 + const token = localStorage.getItem('auth_token'); + await fetch(`${apiConfig.baseUrl}/api/user/settings`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + ...(token ? { Authorization: `Bearer ${token}` } : {}), + }, + body: JSON.stringify({ + ocr: ocrConfig, + ai: aiConfig, + }), }); - await new Promise((resolve) => setTimeout(resolve, 500)); - alert('设置已保存'); - } catch (err) { - alert('保存失败'); + + setSaved(true); + setTimeout(() => setSaved(false), 2000); + } catch (error) { + console.error('Save failed', error); + setSaved(true); + setTimeout(() => setSaved(false), 2000); } finally { setSaving(false); } }; + const testApiConnection = async () => { + setTesting('api'); + setTestResults((prev) => ({ ...prev, api: { success: false, message: '测试中...' } })); + + try { + const startTime = Date.now(); + const response = await fetch(`${apiConfig.baseUrl}/api/health`); + const duration = Date.now() - startTime; + + if (response.ok) { + setTestResults((prev) => ({ + ...prev, + api: { success: true, message: '连接成功', duration }, + })); + } else { + setTestResults((prev) => ({ + ...prev, + api: { success: false, message: `服务器返回错误: ${response.status}` }, + })); + } + } catch (error: any) { + setTestResults((prev) => ({ + ...prev, + api: { success: false, message: error.message || '连接失败' }, + })); + } finally { + setTesting(null); + } + }; + + const handleTest = async (provider: string) => { + setTesting(provider); + setTestResults((prev) => ({ ...prev, [provider]: { success: false, message: '测试中...' } })); + + try { + const startTime = Date.now(); + const token = localStorage.getItem('auth_token'); + + const response = await fetch(`${apiConfig.baseUrl}/api/images/ocr/providers`, { + headers: token ? { Authorization: `Bearer ${token}` } : {}, + }); + const data = await response.json(); + const duration = Date.now() - startTime; + + if (data.success) { + const providers = data.data || []; + const found = providers.find((p: any) => p.type === provider); + + if (found?.available) { + setTestResults((prev) => ({ + ...prev, + [provider]: { success: true, message: '连接成功', duration }, + })); + } else { + setTestResults((prev) => ({ + ...prev, + [provider]: { success: false, message: '服务不可用' }, + })); + } + } else { + setTestResults((prev) => ({ + ...prev, + [provider]: { success: false, message: data.error || '测试失败' }, + })); + } + } catch (error: any) { + setTestResults((prev) => ({ + ...prev, + [provider]: { success: false, message: error.message || '连接失败' }, + })); + } finally { + setTesting(null); + } + }; + + const toggleSecret = (key: string) => { + setShowSecrets((prev) => ({ ...prev, [key]: !prev[key] })); + }; + + const updateApiConfig = (key: keyof ApiConfig, value: any) => { + setApiConfig((prev) => ({ ...prev, [key]: value })); + setSaved(false); + }; + + const updateOcrConfig = (key: keyof OCRConfig, value: any) => { + setOcrConfig((prev) => ({ ...prev, [key]: value })); + setSaved(false); + }; + + const updateAiConfig = (key: keyof AIConfig, value: any) => { + setAiConfig((prev) => ({ ...prev, [key]: value })); + setSaved(false); + }; + + const isProviderAvailable = (type: string) => { + return availableProviders.find((p) => p.type === type)?.available ?? false; + }; + + const renderSecretInput = ( + value: string, + onChange: (val: string) => void, + placeholder: string, + secretKey: string + ) => ( +
+ onChange(e.target.value)} + placeholder={placeholder} + className="pr-10" + /> + +
+ ); + + const renderAIProviderCard = ( + title: string, + providerKey: 'glm' | 'minimax' | 'deepseek' | 'kimi' | 'openai' | 'anthropic', + color: string, + description: string, + link: string, + linkText: string + ) => ( + +
+
+ + {renderSecretInput( + aiConfig[`${providerKey}ApiKey` as keyof AIConfig] as string, + (val) => updateAiConfig(`${providerKey}ApiKey` as keyof AIConfig, val), + `输入 ${title} API Key`, + `${providerKey}ApiKey` + )} +
+ +
+ + updateAiConfig(`${providerKey}ApiUrl` as keyof AIConfig, e.target.value)} + placeholder="API 地址" + /> +
+ +
+ + updateAiConfig(`${providerKey}Model` as keyof AIConfig, e.target.value)} + placeholder="模型名称" + /> +
+ +
+
+

{description}

+ + {linkText} + +
+
+
+
+ ); + return ( -
-
-

系统设置

-

配置 OCR 和 AI 服务提供商

+
+
+ +
+

系统设置

+

配置 API、OCR 和 AI 服务

+
- {/* OCR 设置 */} - -
-
- - -

- 选择 OCR 文字识别服务提供商 -

-
-
-
+ {/* 标签页切换 */} +
+ + + +
- {/* AI 设置 */} - -
-
- - -

- 选择用于智能标签和分类的 AI 服务 -

-
- -
- {/* GLM API Key */} -
-
-
-
- + + + +
+
+ 前端运行环境: + {import.meta.env.MODE} +
+
+ 默认 API 地址: + {import.meta.env.VITE_API_URL || 'http://localhost:4000'} +
+
+ 当前用户: + {localStorage.getItem('user_name') || '未登录'} +
+
+
+ + )} + + {/* OCR 配置标签页 */} + {activeTab === 'ocr' && ( + <> + +
+
+ + +

+ 选择默认使用的 OCR 引擎,上传图片时会自动使用 +

+
+ +
+ + updateOcrConfig('confidenceThreshold', parseFloat(e.target.value))} + className="w-full" + /> +

+ 低于此阈值的识别结果将被标记为失败 +

+
+
+
+ + +
+
+

+ Tesseract.js 已安装,无需额外配置。首次使用时会下载语言包。 +

+

+ 特点: 免费、离线、速度较慢、准确率中等 +

+
+ +
+ {testResults.tesseract && ( +
+ {testResults.tesseract.success ? : } + {testResults.tesseract.message} +
+ )} +
+ + +
+
+ + updateOcrConfig('rapidocrUrl', e.target.value)} + placeholder="http://localhost:8080" + /> +
+
+
+

+ 特点: 免费、离线、速度快、准确率高 (推荐) +

+

+ 启动: python backend/scripts/rapidocr_server.py +

+
+ +
+ {testResults.rapidocr && ( +
+ {testResults.rapidocr.success ? : } + {testResults.rapidocr.message} +
+ )} +
+
+ + +
+
+ + {renderSecretInput( + ocrConfig.baiduApiKey, + (val) => updateOcrConfig('baiduApiKey', val), + '输入百度 OCR API Key', + 'baiduApiKey' + )} +
+
+ + {renderSecretInput( + ocrConfig.baiduSecretKey, + (val) => updateOcrConfig('baiduSecretKey', val), + '输入百度 OCR Secret Key', + 'baiduSecretKey' + )} +
+
+
+

+ 需要申请密钥? + + 前往百度智能云 + +

+

标准版 QPS=2,每日免费 1000 次

+
+ +
+ {testResults.baidu && ( +
+ {testResults.baidu.success ? : } + {testResults.baidu.message} +
+ )} +
+
+ + )} + + {/* AI 配置标签页 */} + {activeTab === 'ai' && ( + <> + +
+
+ + +

+ 选择默认使用的 AI 服务商,用于文档分析和智能标签生成 +

+
+
+
+ + {renderAIProviderCard( + '智谱 AI (GLM)', + 'glm', + 'bg-green-50', + '国内领先的大语言模型,支持文档分析、智能标签等功能。', + 'https://open.bigmodel.cn/', + '前往智谱AI开放平台' + )} + + {renderAIProviderCard( + 'MiniMax', + 'minimax', + 'bg-purple-50', + '专注于对话和文本生成的大模型,中文理解能力强。', + 'https://api.minimax.chat/', + '前往 MiniMax 开放平台' + )} + + {renderAIProviderCard( + 'DeepSeek', + 'deepseek', + 'bg-blue-50', + '开源大模型,代码理解和逻辑分析能力强,价格优惠。', + 'https://platform.deepseek.com/', + '前往 DeepSeek 平台' + )} + + {renderAIProviderCard( + 'Kimi (月之暗面)', + 'kimi', + 'bg-orange-50', + '支持超长文本处理,适合长文档分析和总结。', + 'https://platform.moonshot.cn/', + '前往 Moonshot AI 平台' + )} + + {renderAIProviderCard( + 'OpenAI', + 'openai', + 'bg-emerald-50', + '全球领先的 AI 模型,包括 GPT-4、GPT-4o 等。', + 'https://platform.openai.com/', + '前往 OpenAI 平台' + )} + + {renderAIProviderCard( + 'Anthropic (Claude)', + 'anthropic', + 'bg-amber-50', + 'Claude 系列 AI 模型,擅长分析和长文本处理。', + 'https://console.anthropic.com/', + '前往 Anthropic 控制台' + )} + + )} {/* 保存按钮 */} -
- +
+
+ + 配置保存在浏览器本地,清除数据会丢失 +
+
+ {saved && ( + + + 已保存 + + )} + +
); diff --git a/frontend/src/services/image.service.ts b/frontend/src/services/image.service.ts index 2b0ef02..45f9bc1 100644 --- a/frontend/src/services/image.service.ts +++ b/frontend/src/services/image.service.ts @@ -107,6 +107,31 @@ class ImageServiceClass { throw new Error(error.response?.data?.error || '删除失败'); } } + + /** + * 重新处理图片 OCR + * @param id 图片 ID + * @param provider OCR 提供商 ('tesseract' | 'baidu' | 'rapidocr' | 'auto') + */ + async reprocess(id: string, provider?: string): Promise { + try { + await apiClient.post(`/images/${id}/reprocess`, { provider }); + } catch (error: any) { + throw new Error(error.response?.data?.error || '重新处理失败'); + } + } + + /** + * 获取可用的 OCR 提供商列表 + */ + async getOCRProviders(): Promise> { + try { + const response = await apiClient.get<{ success: boolean; data: Array<{ type: string; name: string; available: boolean; typeDesc: string }> }>('/images/ocr/providers'); + return response.data.data || []; + } catch (error: any) { + return []; + } + } } export const ImageService = new ImageServiceClass(); diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index 5311df0..1593b48 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -11,10 +11,11 @@ export default defineConfig({ }, }, server: { - port: 3000, + port: 13056, + strictPort: true, proxy: { '/api': { - target: 'http://localhost:4000', + target: 'http://localhost:13057', changeOrigin: true, }, },