Initial commit: snapAna 截图智能整理工具
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。 Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,52 @@
|
||||
"""视觉大模型 OCR:用多模态 API 从截图中提取文字。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .base import OCRProvider
|
||||
from .openai_vision_client import chat_completions, safe_parse_json
|
||||
|
||||
|
||||
_VISION_OCR_SYSTEM = """你是 OCR 助手。用户会给你一张截图,请尽可能完整地提取其中的文字。
|
||||
只输出 JSON,格式:{"text": "提取到的全部文字,保留换行"}
|
||||
如果没有可识别文字,text 填空字符串。"""
|
||||
|
||||
|
||||
class VisionOCR(OCRProvider):
|
||||
"""OpenAI 兼容视觉模型识文(GLM-4V / GPT-4o / Qwen-VL / Ollama 等)。"""
|
||||
|
||||
name = "vision"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
timeout: float = 60.0,
|
||||
allow_upload: bool = True,
|
||||
) -> None:
|
||||
self.base_url = base_url
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.timeout = timeout
|
||||
self.allow_upload = allow_upload
|
||||
|
||||
async def recognize(self, image_path: Path) -> str:
|
||||
"""调用视觉模型提取文字。"""
|
||||
if not self.allow_upload:
|
||||
raise RuntimeError("敏感目录禁止上传图片,无法使用视觉 OCR")
|
||||
|
||||
content = await chat_completions(
|
||||
base_url=self.base_url,
|
||||
api_key=self.api_key,
|
||||
model=self.model,
|
||||
system_prompt=_VISION_OCR_SYSTEM,
|
||||
user_text="请提取这张截图中的所有文字。",
|
||||
image_path=image_path,
|
||||
allow_upload=True,
|
||||
timeout=self.timeout,
|
||||
json_mode=True,
|
||||
)
|
||||
parsed = safe_parse_json(content)
|
||||
text = parsed.get("text") or parsed.get("ocr_text") or content
|
||||
return str(text).strip()
|
||||
Reference in New Issue
Block a user