feat(ocr): 集成 PaddleOCR 服务并优化 OCR 系统

- 新增 PaddleOCR 本地高精度 OCR 服务支持,包括 Dockerfile、API 服务和 provider 实现
- 在 docker-compose 中集成 RapidOCR 和 PaddleOCR 服务,并配置健康检查
- 优化后端 API 路由前缀,移除 `/api` 以简化代理配置
- 更新 Nginx 配置以正确传递请求头和代理 WebSocket 连接
- 在前端设置页面添加 PaddleOCR 和 RapidOCR 的测试与配置选项
- 修复后端 Dockerfile 以支持 Python 原生模块构建
- 更新 OCR 设置指南,反映当前服务状态和部署方式
- 添加上传文件调试日志和权限设置
This commit is contained in:
congsh
2026-02-27 18:43:07 +08:00
parent 764c6a8c0c
commit 9a301cc434
17 changed files with 628 additions and 85 deletions
+42
View File
@@ -0,0 +1,42 @@
# PaddleOCR Service Dockerfile
# 从 Python 基础镜像构建,避免 CPU 指令集兼容性问题
FROM python:3.10-slim
WORKDIR /app
# 安装系统依赖(使用新的包名适配 Debian Trixie
RUN apt-get update && apt-get install -y \
libgomp1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1 \
git \
wget \
&& rm -rf /var/lib/apt/lists/*
# 复制 requirements
COPY requirements.txt .
# 安装 Python 依赖
# 使用 pip 安装的 PaddlePaddle 会自动适配 CPU 指令集
RUN pip install --no-cache-dir paddlepaddle==2.6.0 \
&& pip install --no-cache-dir -r requirements.txt
# 克隆 PaddleOCR 仓库
RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR
# 设置环境
ENV PYTHONPATH=/PaddleOCR:$PYTHONPATH
ENV HOME=/root
# 复制 API 服务代码
COPY paddleocr_api.py /app/paddleocr_api.py
# 暴露端口
EXPOSE 8866
# 启动 API 服务
CMD ["python", "/app/paddleocr_api.py"]
+166
View File
@@ -0,0 +1,166 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
PaddleOCR HTTP API Service
基于 PaddlePaddle 官方镜像的 OCR HTTP 服务
"""
from flask import Flask, request, jsonify
from paddleocr import PaddleOCR
import base64
import io
from PIL import Image
import logging
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 初始化 PaddleOCR
ocr = PaddleOCR(
use_angle_cls=True,
lang='ch',
use_gpu=False,
show_log=False
)
app = Flask(__name__)
@app.route('/', methods=['GET'])
def index():
"""健康检查"""
return jsonify({
"message": "PaddleOCR Server is running!",
"version": "2.7.0",
"endpoints": {
"/": "GET - 健康检查",
"/ocr/scan": "POST - OCR 识别"
}
})
@app.route('/ocr/scan', methods=['POST'])
def ocr_scan():
"""OCR 识别接口"""
try:
# 获取请求数据
data = request.get_json()
if not data or 'image' not in data:
return jsonify({
"success": False,
"error": "Missing image data"
}), 400
# 解码图片
image_data = data['image']
if isinstance(image_data, str):
# Base64 编码
if image_data.startswith('data:image'):
image_data = image_data.split(',')[1]
image_bytes = base64.b64decode(image_data)
else:
return jsonify({
"success": False,
"error": "Invalid image format"
}), 400
# 转换为 PIL Image
image = Image.open(io.BytesIO(image_bytes))
# 执行 OCR
result = ocr.ocr(image, cls=True)
# 解析结果
if result and result[0]:
texts = []
for line in result[0]:
box = line[0]
text_info = line[1]
texts.append({
"text": text_info[0],
"confidence": float(text_info[1]),
"box": box
})
all_text = "\n".join([t["text"] for t in texts])
return jsonify({
"success": True,
"data": {
"texts": texts,
"fullText": all_text
}
})
else:
return jsonify({
"success": False,
"error": "No text detected"
}), 200
except Exception as e:
logger.error(f"OCR Error: {str(e)}")
return jsonify({
"success": False,
"error": str(e)
}), 500
@app.route('/ocr/text', methods=['POST'])
def ocr_text():
"""简化的 OCR 接口,只返回文本"""
try:
data = request.get_json()
if not data or 'image' not in data:
return jsonify({
"success": False,
"error": "Missing image data"
}), 400
# 解码图片
image_data = data['image']
if isinstance(image_data, str):
if image_data.startswith('data:image'):
image_data = image_data.split(',')[1]
image_bytes = base64.b64decode(image_data)
else:
return jsonify({
"success": False,
"error": "Invalid image format"
}), 400
image = Image.open(io.BytesIO(image_bytes))
# 执行 OCR
result = ocr.ocr(image, cls=True)
# 提取文本
if result and result[0]:
texts = [line[1][0] for line in result[0]]
all_text = "\n".join(texts)
return jsonify({
"success": True,
"data": {
"text": all_text,
"lines": texts
}
})
else:
return jsonify({
"success": True,
"data": {
"text": "",
"lines": []
}
})
except Exception as e:
logger.error(f"OCR Error: {str(e)}")
return jsonify({
"success": False,
"error": str(e)
}), 500
if __name__ == '__main__':
logger.info("Starting PaddleOCR API server on port 8866...")
app.run(host='0.0.0.0', port=8866, debug=False)
+5
View File
@@ -0,0 +1,5 @@
paddleocr==2.7.0
protobuf>=3.20.2
flask==2.3.0
pillow==10.0.0
numpy<2.0.0