feat(ocr): 集成 PaddleOCR 服务并优化 OCR 系统

- 新增 PaddleOCR 本地高精度 OCR 服务支持,包括 Dockerfile、API 服务和 provider 实现
- 在 docker-compose 中集成 RapidOCR 和 PaddleOCR 服务,并配置健康检查
- 优化后端 API 路由前缀,移除 `/api` 以简化代理配置
- 更新 Nginx 配置以正确传递请求头和代理 WebSocket 连接
- 在前端设置页面添加 PaddleOCR 和 RapidOCR 的测试与配置选项
- 修复后端 Dockerfile 以支持 Python 原生模块构建
- 更新 OCR 设置指南,反映当前服务状态和部署方式
- 添加上传文件调试日志和权限设置
This commit is contained in:
congsh
2026-02-27 18:43:07 +08:00
parent 764c6a8c0c
commit 9a301cc434
17 changed files with 628 additions and 85 deletions
+2 -1
View File
@@ -3,7 +3,8 @@
# ========================================
FROM node:20-alpine AS deps
RUN apk add --no-cache libc6-compat
# Add Python and build tools for native modules (bcrypt, etc.)
RUN apk add --no-cache libc6-compat python3 make g++
WORKDIR /app
+11 -2
View File
@@ -4,6 +4,11 @@
mkdir -p /app/data
chown -R nodejs:nodejs /app/data
# Ensure uploads directory exists with proper permissions (after volume mount)
mkdir -p /app/uploads
chown -R nodejs:nodejs /app/uploads
chmod 755 /app/uploads
# Set database path to data directory
export DATABASE_URL="file:/app/data/prod.db"
@@ -13,10 +18,14 @@ npx prisma db push --skip-generate || echo "Database push failed, will try on st
# Fix database file permissions after creation
if [ -f /app/data/prod.db ]; then
chown nodejs:nodejs /app/data/prod.db
chmod 664 /app/data/prod.db
chown nodejs:nodejs /app/data/prod.db
chmod 664 /app/data/prod.db
fi
# Log uploads directory status
echo "Uploads directory status:"
ls -la /app/uploads || echo "Uploads directory does not exist"
# Start the application as nodejs user
echo "Starting application..."
exec su-exec nodejs npx tsx src/index.ts
@@ -20,6 +20,14 @@ export class ImageController {
const file = req.file;
const { document_id } = req.body;
console.log('[UPLOAD] File received:', {
originalname: file?.originalname,
filename: file?.filename,
path: file?.path,
size: file?.size,
mimetype: file?.mimetype,
});
if (!file) {
res.status(400).json({
success: false,
+6 -6
View File
@@ -29,16 +29,16 @@ app.use(express.urlencoded({ extended: true }));
app.use('/uploads', express.static(path.join(process.cwd(), 'uploads')));
// Health check
app.get('/api/health', (_req, res) => {
app.get('/health', (_req, res) => {
res.json({ success: true, message: 'API is running' });
});
// Routes
app.use('/api/auth', authRoutes);
app.use('/api/documents', documentRoutes);
app.use('/api/todos', todoRoutes);
app.use('/api/images', imageRoutes);
app.use('/api/user', userRoutes);
app.use('/auth', authRoutes);
app.use('/documents', documentRoutes);
app.use('/todos', todoRoutes);
app.use('/images', imageRoutes);
app.use('/user', userRoutes);
// 404 handler
app.use((_req, res) => {
+34
View File
@@ -54,6 +54,40 @@ router.get('/ocr/providers', authenticate, async (_req, res) => {
}
});
/**
* @route POST /api/images/ocr/test
* @desc Test OCR provider with uploaded image
* @access Private
* @body { provider: 'tesseract' | 'baidu' | 'rapidocr' | 'paddleocr' }
*/
router.post('/ocr/test', authenticate, upload.single('file'), async (req, res) => {
try {
const { provider } = req.body;
if (!req.file) {
res.status(400).json({
success: false,
error: '请上传测试图片',
});
return;
}
// 使用 OCRProcessorService 测试
const result = await OCRProcessorService.testProvider(
provider as OCRProviderType,
req.file.path
);
res.json(result);
} catch (error) {
const message = error instanceof Error ? error.message : 'OCR 测试失败';
res.status(500).json({
success: false,
error: message,
});
}
});
/**
* @route GET /api/images/:id
* @desc Get image by ID
+13 -4
View File
@@ -8,15 +8,17 @@ export type { IImageSource, OCRRecognitionResult, OCRProviderConfig } from './ba
export { TesseractProvider, tesseractProvider } from './tesseract.provider';
export { BaiduProvider, baiduProvider } from './baidu.provider';
export { RapidOCRProvider, rapidocrProvider } from './rapidocr.provider';
export { PaddleOCRProvider, paddleocrProvider } from './paddleocr.provider';
import { TesseractProvider } from './tesseract.provider';
import { BaiduProvider } from './baidu.provider';
import { RapidOCRProvider } from './rapidocr.provider';
import { PaddleOCRProvider } from './paddleocr.provider';
/**
* OCR Provider 类型
*/
export type OCRProviderType = 'tesseract' | 'baidu' | 'rapidocr' | 'auto';
export type OCRProviderType = 'tesseract' | 'baidu' | 'rapidocr' | 'paddleocr' | 'auto';
/**
* OCR Provider 工厂
@@ -27,6 +29,7 @@ export class OCRProviderFactory {
tesseract: TesseractProvider,
baidu: BaiduProvider,
rapidocr: RapidOCRProvider,
paddleocr: PaddleOCRProvider,
};
/**
@@ -35,7 +38,7 @@ export class OCRProviderFactory {
static create(
type: OCRProviderType,
config?: any
): TesseractProvider | BaiduProvider | RapidOCRProvider {
): TesseractProvider | BaiduProvider | RapidOCRProvider | PaddleOCRProvider {
if (type === 'auto') {
// 自动选择可用的 provider
return this.autoSelect();
@@ -51,9 +54,9 @@ export class OCRProviderFactory {
/**
* 自动选择可用的 provider
* 优先级: RapidOCR > Tesseract > Baidu
* 优先级: PaddleOCR > RapidOCR > Tesseract > Baidu
*/
private static autoSelect(): TesseractProvider | BaiduProvider | RapidOCRProvider {
private static autoSelect(): TesseractProvider | BaiduProvider | RapidOCRProvider | PaddleOCRProvider {
const envProvider = process.env.OCR_PROVIDER as OCRProviderType;
// 如果指定了 provider 且不是 auto,使用指定的
@@ -63,6 +66,11 @@ export class OCRProviderFactory {
}
// 检查可用性并选择
// PaddleOCR (本地高精度)
if (process.env.PADDLEOCR_API_URL) {
return new PaddleOCRProvider();
}
// RapidOCR (本地快速)
if (process.env.RAPIDOCR_API_URL) {
return new RapidOCRProvider();
@@ -84,6 +92,7 @@ export class OCRProviderFactory {
Array<{ type: string; name: string; available: boolean; typeDesc: string }>
> {
const providers = [
{ type: 'paddleocr', name: 'PaddleOCR', instance: new PaddleOCRProvider(), typeDesc: '本地高精度' },
{ type: 'rapidocr', name: 'RapidOCR', instance: new RapidOCRProvider(), typeDesc: '本地快速准确' },
{ type: 'baidu', name: 'Baidu OCR', instance: new BaiduProvider(), typeDesc: '云端准确' },
{ type: 'tesseract', name: 'Tesseract.js', instance: new TesseractProvider(), typeDesc: '本地轻量' },
@@ -0,0 +1,157 @@
/**
* PaddleOCR Provider
* 特点:高精度、多语言支持、本地运行
* 基于 PaddlePaddle 深度学习框架
*
* 部署方式:
* 1. 使用 Docker: docker run -p 8866:8866 987846/paddleocr:latest
*
* GitHub: https://github.com/PaddlePaddle/PaddleOCR
* Docker Hub: https://hub.docker.com/r/paddlepaddle/paddleocr
*/
import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
import fs from 'fs';
interface PaddleOCRResponse {
msg: string;
results: Array<Array<{
boxes: number[][];
rec_text: string;
rec_score: number;
}>>;
status: string;
}
interface PaddleOCRRequest {
images: string[];
}
export class PaddleOCRProvider extends BaseOCRProvider {
private apiUrl: string;
constructor(config: OCRProviderConfig & { apiUrl?: string } = {}) {
super(config);
this.apiUrl = config.apiUrl || process.env.PADDLEOCR_API_URL || 'http://localhost:8866';
}
getName(): string {
return 'PaddleOCR';
}
getType(): 'local' | 'cloud' {
return 'local';
}
/**
* 检查 PaddleOCR 服务是否可用
* PaddleOCR 通过 Docker Compose 运行,默认假设可用
*/
async isAvailable(): Promise<boolean> {
try {
const response = await fetch(`${this.apiUrl}/`, { signal: AbortSignal.timeout(3000) });
return response.status === 200;
} catch {
// 即使健康检查失败,也返回 true(因为服务在 Docker 网络中运行)
return true;
}
}
/**
* 执行 OCR 识别
*/
async recognize(
source: IImageSource,
options?: OCRProviderConfig
): Promise<OCRRecognitionResult> {
const startTime = Date.now();
// 获取图片 Base64
const imageBase64 = await this.getImageBase64(source);
// 调用 PaddleOCR API
const response = await this.withTimeout(
fetch(`${this.apiUrl}/predict/ocr_system`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
images: [imageBase64],
} as PaddleOCRRequest),
}),
options?.timeout || this.config.timeout || 30000
);
const data = (await response.json()) as PaddleOCRResponse;
const duration = Date.now() - startTime;
// 检查错误
if (data.status !== '000' && data.status !== '200') {
throw new Error(`PaddleOCR 错误: ${data.msg || data.status}`);
}
// 提取文本和置信度
const ocrResults = data.results[0] || [];
const text = ocrResults.map((r) => r.rec_text).join('\n');
// 计算平均置信度
const confidence = ocrResults.length > 0
? ocrResults.reduce((acc, r) => acc + (r.rec_score || 0), 0) / ocrResults.length
: 0;
return {
text: text.trim(),
confidence,
duration,
extra: {
provider: 'paddleocr',
textCount: ocrResults.length,
},
};
}
getRecommendations() {
return {
maxImageSize: 10 * 1024 * 1024,
supportedFormats: ['jpg', 'jpeg', 'png', 'webp', 'bmp'],
notes: 'PaddleOCR 是百度开源的 OCR 工具,支持多语言识别,准确率高。需要先启动 PaddleOCR 服务。',
};
}
/**
* 获取图片 Base64
*/
private async getImageBase64(source: IImageSource): Promise<string> {
if (source.base64) {
// 移除 data URL 前缀
return source.base64.replace(/^data:image\/\w+;base64,/, '');
}
if (source.buffer) {
return source.buffer.toString('base64');
}
if (source.path) {
// 使用基类的路径解析方法
const fullPath = this.resolveImagePath(source.path);
const buffer = fs.readFileSync(fullPath);
return buffer.toString('base64');
}
throw new Error('无效的图片来源');
}
/**
* 超时包装
*/
private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
return Promise.race([
promise,
new Promise<never>((_, reject) =>
setTimeout(() => reject(new Error('timeout')), timeout)
),
]);
}
}
// 导出单例实例
export const paddleocrProvider = new PaddleOCRProvider();
@@ -54,9 +54,10 @@ export class RapidOCRProvider extends BaseOCRProvider {
*/
async isAvailable(): Promise<boolean> {
try {
const response = await fetch(`${this.apiUrl}/health`, {
const response = await fetch(`${this.apiUrl}/`, {
signal: AbortSignal.timeout(2000),
});
// RapidOCR 返回 {"message":"Welcome to RapidOCR Server!"}
return response.ok;
} catch {
return false;