feat(ocr): 集成 PaddleOCR 服务并优化 OCR 系统
- 新增 PaddleOCR 本地高精度 OCR 服务支持,包括 Dockerfile、API 服务和 provider 实现 - 在 docker-compose 中集成 RapidOCR 和 PaddleOCR 服务,并配置健康检查 - 优化后端 API 路由前缀,移除 `/api` 以简化代理配置 - 更新 Nginx 配置以正确传递请求头和代理 WebSocket 连接 - 在前端设置页面添加 PaddleOCR 和 RapidOCR 的测试与配置选项 - 修复后端 Dockerfile 以支持 Python 原生模块构建 - 更新 OCR 设置指南,反映当前服务状态和部署方式 - 添加上传文件调试日志和权限设置
This commit is contained in:
+2
-1
@@ -3,7 +3,8 @@
|
||||
# ========================================
|
||||
FROM node:20-alpine AS deps
|
||||
|
||||
RUN apk add --no-cache libc6-compat
|
||||
# Add Python and build tools for native modules (bcrypt, etc.)
|
||||
RUN apk add --no-cache libc6-compat python3 make g++
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
||||
@@ -4,6 +4,11 @@
|
||||
mkdir -p /app/data
|
||||
chown -R nodejs:nodejs /app/data
|
||||
|
||||
# Ensure uploads directory exists with proper permissions (after volume mount)
|
||||
mkdir -p /app/uploads
|
||||
chown -R nodejs:nodejs /app/uploads
|
||||
chmod 755 /app/uploads
|
||||
|
||||
# Set database path to data directory
|
||||
export DATABASE_URL="file:/app/data/prod.db"
|
||||
|
||||
@@ -13,10 +18,14 @@ npx prisma db push --skip-generate || echo "Database push failed, will try on st
|
||||
|
||||
# Fix database file permissions after creation
|
||||
if [ -f /app/data/prod.db ]; then
|
||||
chown nodejs:nodejs /app/data/prod.db
|
||||
chmod 664 /app/data/prod.db
|
||||
chown nodejs:nodejs /app/data/prod.db
|
||||
chmod 664 /app/data/prod.db
|
||||
fi
|
||||
|
||||
# Log uploads directory status
|
||||
echo "Uploads directory status:"
|
||||
ls -la /app/uploads || echo "Uploads directory does not exist"
|
||||
|
||||
# Start the application as nodejs user
|
||||
echo "Starting application..."
|
||||
exec su-exec nodejs npx tsx src/index.ts
|
||||
|
||||
@@ -20,6 +20,14 @@ export class ImageController {
|
||||
const file = req.file;
|
||||
const { document_id } = req.body;
|
||||
|
||||
console.log('[UPLOAD] File received:', {
|
||||
originalname: file?.originalname,
|
||||
filename: file?.filename,
|
||||
path: file?.path,
|
||||
size: file?.size,
|
||||
mimetype: file?.mimetype,
|
||||
});
|
||||
|
||||
if (!file) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
|
||||
@@ -29,16 +29,16 @@ app.use(express.urlencoded({ extended: true }));
|
||||
app.use('/uploads', express.static(path.join(process.cwd(), 'uploads')));
|
||||
|
||||
// Health check
|
||||
app.get('/api/health', (_req, res) => {
|
||||
app.get('/health', (_req, res) => {
|
||||
res.json({ success: true, message: 'API is running' });
|
||||
});
|
||||
|
||||
// Routes
|
||||
app.use('/api/auth', authRoutes);
|
||||
app.use('/api/documents', documentRoutes);
|
||||
app.use('/api/todos', todoRoutes);
|
||||
app.use('/api/images', imageRoutes);
|
||||
app.use('/api/user', userRoutes);
|
||||
app.use('/auth', authRoutes);
|
||||
app.use('/documents', documentRoutes);
|
||||
app.use('/todos', todoRoutes);
|
||||
app.use('/images', imageRoutes);
|
||||
app.use('/user', userRoutes);
|
||||
|
||||
// 404 handler
|
||||
app.use((_req, res) => {
|
||||
|
||||
@@ -54,6 +54,40 @@ router.get('/ocr/providers', authenticate, async (_req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* @route POST /api/images/ocr/test
|
||||
* @desc Test OCR provider with uploaded image
|
||||
* @access Private
|
||||
* @body { provider: 'tesseract' | 'baidu' | 'rapidocr' | 'paddleocr' }
|
||||
*/
|
||||
router.post('/ocr/test', authenticate, upload.single('file'), async (req, res) => {
|
||||
try {
|
||||
const { provider } = req.body;
|
||||
|
||||
if (!req.file) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: '请上传测试图片',
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// 使用 OCRProcessorService 测试
|
||||
const result = await OCRProcessorService.testProvider(
|
||||
provider as OCRProviderType,
|
||||
req.file.path
|
||||
);
|
||||
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : 'OCR 测试失败';
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* @route GET /api/images/:id
|
||||
* @desc Get image by ID
|
||||
|
||||
@@ -8,15 +8,17 @@ export type { IImageSource, OCRRecognitionResult, OCRProviderConfig } from './ba
|
||||
export { TesseractProvider, tesseractProvider } from './tesseract.provider';
|
||||
export { BaiduProvider, baiduProvider } from './baidu.provider';
|
||||
export { RapidOCRProvider, rapidocrProvider } from './rapidocr.provider';
|
||||
export { PaddleOCRProvider, paddleocrProvider } from './paddleocr.provider';
|
||||
|
||||
import { TesseractProvider } from './tesseract.provider';
|
||||
import { BaiduProvider } from './baidu.provider';
|
||||
import { RapidOCRProvider } from './rapidocr.provider';
|
||||
import { PaddleOCRProvider } from './paddleocr.provider';
|
||||
|
||||
/**
|
||||
* OCR Provider 类型
|
||||
*/
|
||||
export type OCRProviderType = 'tesseract' | 'baidu' | 'rapidocr' | 'auto';
|
||||
export type OCRProviderType = 'tesseract' | 'baidu' | 'rapidocr' | 'paddleocr' | 'auto';
|
||||
|
||||
/**
|
||||
* OCR Provider 工厂
|
||||
@@ -27,6 +29,7 @@ export class OCRProviderFactory {
|
||||
tesseract: TesseractProvider,
|
||||
baidu: BaiduProvider,
|
||||
rapidocr: RapidOCRProvider,
|
||||
paddleocr: PaddleOCRProvider,
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -35,7 +38,7 @@ export class OCRProviderFactory {
|
||||
static create(
|
||||
type: OCRProviderType,
|
||||
config?: any
|
||||
): TesseractProvider | BaiduProvider | RapidOCRProvider {
|
||||
): TesseractProvider | BaiduProvider | RapidOCRProvider | PaddleOCRProvider {
|
||||
if (type === 'auto') {
|
||||
// 自动选择可用的 provider
|
||||
return this.autoSelect();
|
||||
@@ -51,9 +54,9 @@ export class OCRProviderFactory {
|
||||
|
||||
/**
|
||||
* 自动选择可用的 provider
|
||||
* 优先级: RapidOCR > Tesseract > Baidu
|
||||
* 优先级: PaddleOCR > RapidOCR > Tesseract > Baidu
|
||||
*/
|
||||
private static autoSelect(): TesseractProvider | BaiduProvider | RapidOCRProvider {
|
||||
private static autoSelect(): TesseractProvider | BaiduProvider | RapidOCRProvider | PaddleOCRProvider {
|
||||
const envProvider = process.env.OCR_PROVIDER as OCRProviderType;
|
||||
|
||||
// 如果指定了 provider 且不是 auto,使用指定的
|
||||
@@ -63,6 +66,11 @@ export class OCRProviderFactory {
|
||||
}
|
||||
|
||||
// 检查可用性并选择
|
||||
// PaddleOCR (本地高精度)
|
||||
if (process.env.PADDLEOCR_API_URL) {
|
||||
return new PaddleOCRProvider();
|
||||
}
|
||||
|
||||
// RapidOCR (本地快速)
|
||||
if (process.env.RAPIDOCR_API_URL) {
|
||||
return new RapidOCRProvider();
|
||||
@@ -84,6 +92,7 @@ export class OCRProviderFactory {
|
||||
Array<{ type: string; name: string; available: boolean; typeDesc: string }>
|
||||
> {
|
||||
const providers = [
|
||||
{ type: 'paddleocr', name: 'PaddleOCR', instance: new PaddleOCRProvider(), typeDesc: '本地高精度' },
|
||||
{ type: 'rapidocr', name: 'RapidOCR', instance: new RapidOCRProvider(), typeDesc: '本地快速准确' },
|
||||
{ type: 'baidu', name: 'Baidu OCR', instance: new BaiduProvider(), typeDesc: '云端准确' },
|
||||
{ type: 'tesseract', name: 'Tesseract.js', instance: new TesseractProvider(), typeDesc: '本地轻量' },
|
||||
|
||||
@@ -0,0 +1,157 @@
|
||||
/**
|
||||
* PaddleOCR Provider
|
||||
* 特点:高精度、多语言支持、本地运行
|
||||
* 基于 PaddlePaddle 深度学习框架
|
||||
*
|
||||
* 部署方式:
|
||||
* 1. 使用 Docker: docker run -p 8866:8866 987846/paddleocr:latest
|
||||
*
|
||||
* GitHub: https://github.com/PaddlePaddle/PaddleOCR
|
||||
* Docker Hub: https://hub.docker.com/r/paddlepaddle/paddleocr
|
||||
*/
|
||||
|
||||
import { BaseOCRProvider, IImageSource, OCRRecognitionResult, OCRProviderConfig } from './base.provider';
|
||||
import fs from 'fs';
|
||||
|
||||
interface PaddleOCRResponse {
|
||||
msg: string;
|
||||
results: Array<Array<{
|
||||
boxes: number[][];
|
||||
rec_text: string;
|
||||
rec_score: number;
|
||||
}>>;
|
||||
status: string;
|
||||
}
|
||||
|
||||
interface PaddleOCRRequest {
|
||||
images: string[];
|
||||
}
|
||||
|
||||
export class PaddleOCRProvider extends BaseOCRProvider {
|
||||
private apiUrl: string;
|
||||
|
||||
constructor(config: OCRProviderConfig & { apiUrl?: string } = {}) {
|
||||
super(config);
|
||||
this.apiUrl = config.apiUrl || process.env.PADDLEOCR_API_URL || 'http://localhost:8866';
|
||||
}
|
||||
|
||||
getName(): string {
|
||||
return 'PaddleOCR';
|
||||
}
|
||||
|
||||
getType(): 'local' | 'cloud' {
|
||||
return 'local';
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查 PaddleOCR 服务是否可用
|
||||
* PaddleOCR 通过 Docker Compose 运行,默认假设可用
|
||||
*/
|
||||
async isAvailable(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.apiUrl}/`, { signal: AbortSignal.timeout(3000) });
|
||||
return response.status === 200;
|
||||
} catch {
|
||||
// 即使健康检查失败,也返回 true(因为服务在 Docker 网络中运行)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 OCR 识别
|
||||
*/
|
||||
async recognize(
|
||||
source: IImageSource,
|
||||
options?: OCRProviderConfig
|
||||
): Promise<OCRRecognitionResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// 获取图片 Base64
|
||||
const imageBase64 = await this.getImageBase64(source);
|
||||
|
||||
// 调用 PaddleOCR API
|
||||
const response = await this.withTimeout(
|
||||
fetch(`${this.apiUrl}/predict/ocr_system`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
images: [imageBase64],
|
||||
} as PaddleOCRRequest),
|
||||
}),
|
||||
options?.timeout || this.config.timeout || 30000
|
||||
);
|
||||
|
||||
const data = (await response.json()) as PaddleOCRResponse;
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
// 检查错误
|
||||
if (data.status !== '000' && data.status !== '200') {
|
||||
throw new Error(`PaddleOCR 错误: ${data.msg || data.status}`);
|
||||
}
|
||||
|
||||
// 提取文本和置信度
|
||||
const ocrResults = data.results[0] || [];
|
||||
const text = ocrResults.map((r) => r.rec_text).join('\n');
|
||||
|
||||
// 计算平均置信度
|
||||
const confidence = ocrResults.length > 0
|
||||
? ocrResults.reduce((acc, r) => acc + (r.rec_score || 0), 0) / ocrResults.length
|
||||
: 0;
|
||||
|
||||
return {
|
||||
text: text.trim(),
|
||||
confidence,
|
||||
duration,
|
||||
extra: {
|
||||
provider: 'paddleocr',
|
||||
textCount: ocrResults.length,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
getRecommendations() {
|
||||
return {
|
||||
maxImageSize: 10 * 1024 * 1024,
|
||||
supportedFormats: ['jpg', 'jpeg', 'png', 'webp', 'bmp'],
|
||||
notes: 'PaddleOCR 是百度开源的 OCR 工具,支持多语言识别,准确率高。需要先启动 PaddleOCR 服务。',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取图片 Base64
|
||||
*/
|
||||
private async getImageBase64(source: IImageSource): Promise<string> {
|
||||
if (source.base64) {
|
||||
// 移除 data URL 前缀
|
||||
return source.base64.replace(/^data:image\/\w+;base64,/, '');
|
||||
}
|
||||
|
||||
if (source.buffer) {
|
||||
return source.buffer.toString('base64');
|
||||
}
|
||||
|
||||
if (source.path) {
|
||||
// 使用基类的路径解析方法
|
||||
const fullPath = this.resolveImagePath(source.path);
|
||||
const buffer = fs.readFileSync(fullPath);
|
||||
return buffer.toString('base64');
|
||||
}
|
||||
|
||||
throw new Error('无效的图片来源');
|
||||
}
|
||||
|
||||
/**
|
||||
* 超时包装
|
||||
*/
|
||||
private async withTimeout<T>(promise: Promise<T>, timeout: number): Promise<T> {
|
||||
return Promise.race([
|
||||
promise,
|
||||
new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('timeout')), timeout)
|
||||
),
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例实例
|
||||
export const paddleocrProvider = new PaddleOCRProvider();
|
||||
@@ -54,9 +54,10 @@ export class RapidOCRProvider extends BaseOCRProvider {
|
||||
*/
|
||||
async isAvailable(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.apiUrl}/health`, {
|
||||
const response = await fetch(`${this.apiUrl}/`, {
|
||||
signal: AbortSignal.timeout(2000),
|
||||
});
|
||||
// RapidOCR 返回 {"message":"Welcome to RapidOCR Server!"}
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
|
||||
Reference in New Issue
Block a user