5c028d7952
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。 Co-authored-by: Cursor <cursoragent@cursor.com>
44 lines
1.4 KiB
Python
44 lines
1.4 KiB
Python
"""PaddleOCR 本地 OCR(可选依赖)。"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from pathlib import Path
|
|
|
|
from .base import OCRProvider
|
|
|
|
|
|
class PaddleOCRProvider(OCRProvider):
|
|
"""通过 PaddleOCR 本地识文。需 pip install paddleocr paddlepaddle。"""
|
|
|
|
name = "paddleocr"
|
|
|
|
def __init__(self, lang: str = "ch") -> None:
|
|
self.lang = lang
|
|
self._engine = None
|
|
|
|
async def recognize(self, image_path: Path) -> str:
|
|
return await asyncio.to_thread(self._sync_recognize, image_path)
|
|
|
|
def _sync_recognize(self, image_path: Path) -> str:
|
|
try:
|
|
from paddleocr import PaddleOCR # type: ignore
|
|
except ImportError as exc:
|
|
raise RuntimeError(
|
|
"未安装 PaddleOCR,请执行: pip install paddleocr paddlepaddle"
|
|
) from exc
|
|
|
|
if self._engine is None:
|
|
self._engine = PaddleOCR(use_angle_cls=True, lang=self.lang, show_log=False)
|
|
|
|
result = self._engine.ocr(str(image_path), cls=True)
|
|
lines: list[str] = []
|
|
if result and result[0]:
|
|
for line in result[0]:
|
|
if line and len(line) >= 2:
|
|
text_part = line[1]
|
|
if isinstance(text_part, (list, tuple)) and text_part:
|
|
lines.append(str(text_part[0]))
|
|
elif isinstance(text_part, str):
|
|
lines.append(text_part)
|
|
return "\n".join(lines).strip()
|