Initial commit: snapAna 截图智能整理工具

包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。 Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-27 15:45:50 +08:00
commit 5c028d7952
76 changed files with 10467 additions and 0 deletions
@@ -0,0 +1,52 @@
+"""视觉大模型 OCR：用多模态 API 从截图中提取文字。"""
+from __future__ import annotations
+
+from pathlib import Path
+
+from .base import OCRProvider
+from .openai_vision_client import chat_completions, safe_parse_json
+
+
+_VISION_OCR_SYSTEM = """你是 OCR 助手。用户会给你一张截图，请尽可能完整地提取其中的文字。
+只输出 JSON，格式：{"text": "提取到的全部文字，保留换行"}
+如果没有可识别文字，text 填空字符串。"""
+
+
+class VisionOCR(OCRProvider):
+    """OpenAI 兼容视觉模型识文（GLM-4V / GPT-4o / Qwen-VL / Ollama 等）。"""
+
+    name = "vision"
+
+    def __init__(
+        self,
+        base_url: str,
+        api_key: str,
+        model: str,
+        timeout: float = 60.0,
+        allow_upload: bool = True,
+    ) -> None:
+        self.base_url = base_url
+        self.api_key = api_key
+        self.model = model
+        self.timeout = timeout
+        self.allow_upload = allow_upload
+
+    async def recognize(self, image_path: Path) -> str:
+        """调用视觉模型提取文字。"""
+        if not self.allow_upload:
+            raise RuntimeError("敏感目录禁止上传图片，无法使用视觉 OCR")
+
+        content = await chat_completions(
+            base_url=self.base_url,
+            api_key=self.api_key,
+            model=self.model,
+            system_prompt=_VISION_OCR_SYSTEM,
+            user_text="请提取这张截图中的所有文字。",
+            image_path=image_path,
+            allow_upload=True,
+            timeout=self.timeout,
+            json_mode=True,
+        )
+        parsed = safe_parse_json(content)
+        text = parsed.get("text") or parsed.get("ocr_text") or content
+        return str(text).strip()