Files
SnapAndAnaly/backend/app/providers/ocr_paddle.py
T
congsh 5c028d7952 Initial commit: snapAna 截图智能整理工具
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-27 15:45:50 +08:00

44 lines
1.4 KiB
Python

"""PaddleOCR 本地 OCR(可选依赖)。"""
from __future__ import annotations
import asyncio
from pathlib import Path
from .base import OCRProvider
class PaddleOCRProvider(OCRProvider):
"""通过 PaddleOCR 本地识文。需 pip install paddleocr paddlepaddle。"""
name = "paddleocr"
def __init__(self, lang: str = "ch") -> None:
self.lang = lang
self._engine = None
async def recognize(self, image_path: Path) -> str:
return await asyncio.to_thread(self._sync_recognize, image_path)
def _sync_recognize(self, image_path: Path) -> str:
try:
from paddleocr import PaddleOCR # type: ignore
except ImportError as exc:
raise RuntimeError(
"未安装 PaddleOCR,请执行: pip install paddleocr paddlepaddle"
) from exc
if self._engine is None:
self._engine = PaddleOCR(use_angle_cls=True, lang=self.lang, show_log=False)
result = self._engine.ocr(str(image_path), cls=True)
lines: list[str] = []
if result and result[0]:
for line in result[0]:
if line and len(line) >= 2:
text_part = line[1]
if isinstance(text_part, (list, tuple)) and text_part:
lines.append(str(text_part[0]))
elif isinstance(text_part, str):
lines.append(text_part)
return "\n".join(lines).strip()