Initial commit: snapAna 截图智能整理工具
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。 Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,43 @@
|
||||
"""PaddleOCR 本地 OCR(可选依赖)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from .base import OCRProvider
|
||||
|
||||
|
||||
class PaddleOCRProvider(OCRProvider):
|
||||
"""通过 PaddleOCR 本地识文。需 pip install paddleocr paddlepaddle。"""
|
||||
|
||||
name = "paddleocr"
|
||||
|
||||
def __init__(self, lang: str = "ch") -> None:
|
||||
self.lang = lang
|
||||
self._engine = None
|
||||
|
||||
async def recognize(self, image_path: Path) -> str:
|
||||
return await asyncio.to_thread(self._sync_recognize, image_path)
|
||||
|
||||
def _sync_recognize(self, image_path: Path) -> str:
|
||||
try:
|
||||
from paddleocr import PaddleOCR # type: ignore
|
||||
except ImportError as exc:
|
||||
raise RuntimeError(
|
||||
"未安装 PaddleOCR,请执行: pip install paddleocr paddlepaddle"
|
||||
) from exc
|
||||
|
||||
if self._engine is None:
|
||||
self._engine = PaddleOCR(use_angle_cls=True, lang=self.lang, show_log=False)
|
||||
|
||||
result = self._engine.ocr(str(image_path), cls=True)
|
||||
lines: list[str] = []
|
||||
if result and result[0]:
|
||||
for line in result[0]:
|
||||
if line and len(line) >= 2:
|
||||
text_part = line[1]
|
||||
if isinstance(text_part, (list, tuple)) and text_part:
|
||||
lines.append(str(text_part[0]))
|
||||
elif isinstance(text_part, str):
|
||||
lines.append(text_part)
|
||||
return "\n".join(lines).strip()
|
||||
Reference in New Issue
Block a user