Initial commit: snapAna 截图智能整理工具
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。 Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
"""OpenAI 兼容 VLM 实现:覆盖 Ollama / GLM / MiniMax / Moonshot / OpenRouter / OpenAI。"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from app.core.logger import get_logger
|
||||
|
||||
from .base import VLMProvider, VLMResult
|
||||
from .openai_vision_client import chat_completions, safe_parse_json
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
_SYSTEM_PROMPT = """你是一个截图整理助手。用户会给你一张截图(可能附带 OCR 文本)。
|
||||
请用简洁的中文,按以下 JSON 结构返回分析结果,**只输出 JSON,不要解释**:
|
||||
|
||||
{
|
||||
"title": "一句话标题,不超过 24 个字",
|
||||
"summary": "2-3 句话总结这张截图的内容、要点或笑点",
|
||||
"category": "从给定分类列表中选一个最贴切的名字;如果都不符合就填'其他'",
|
||||
"tags": ["3-6 个能帮助检索的细分标签"],
|
||||
"todos": [
|
||||
{"title": "如果截图里出现'待看/待读/待办/想试试/记一下'的内容,抽成一条 todo", "kind": "待读|待看|待办|学习", "note": "可空"}
|
||||
],
|
||||
"suggestion": "可选:给用户的进一步行动建议或同类资源提示,可空"
|
||||
}
|
||||
|
||||
要求:
|
||||
- 标题要可读,不要复述"这是一张..."。
|
||||
- summary 不要超过 80 字。
|
||||
- todos 没有可识别项时给空数组。"""
|
||||
|
||||
|
||||
class OpenAICompatVLM(VLMProvider):
|
||||
"""统一调用 /v1/chat/completions,图片以 base64 data URL 传入。"""
|
||||
|
||||
name = "openai_compat"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
timeout: float = 60.0,
|
||||
) -> None:
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.timeout = timeout
|
||||
|
||||
async def analyze(
|
||||
self,
|
||||
image_path: Path,
|
||||
ocr_text: str,
|
||||
categories: list[str],
|
||||
allow_upload: bool,
|
||||
) -> VLMResult:
|
||||
"""调用模型并解析结构化 JSON。"""
|
||||
prompt = (
|
||||
f"可选分类:{', '.join(categories)}\n\n"
|
||||
f"OCR 文本(可能不完整或为空):\n{ocr_text or '(无)'}"
|
||||
)
|
||||
content = await chat_completions(
|
||||
base_url=self.base_url,
|
||||
api_key=self.api_key,
|
||||
model=self.model,
|
||||
system_prompt=_SYSTEM_PROMPT,
|
||||
user_text=prompt,
|
||||
image_path=image_path if allow_upload else None,
|
||||
allow_upload=allow_upload,
|
||||
timeout=self.timeout,
|
||||
json_mode=True,
|
||||
)
|
||||
parsed = safe_parse_json(content)
|
||||
return _to_vlm_result(parsed)
|
||||
|
||||
|
||||
def _to_vlm_result(data: dict[str, Any]) -> VLMResult:
|
||||
"""JSON -> dataclass,容错地兜住字段。"""
|
||||
todos_raw = data.get("todos") or []
|
||||
todos: list[dict[str, str]] = []
|
||||
if isinstance(todos_raw, list):
|
||||
for item in todos_raw:
|
||||
if isinstance(item, dict) and item.get("title"):
|
||||
todos.append(
|
||||
{
|
||||
"title": str(item.get("title", ""))[:512],
|
||||
"kind": str(item.get("kind", "")) or "待办",
|
||||
"note": str(item.get("note", "") or ""),
|
||||
}
|
||||
)
|
||||
elif isinstance(item, str):
|
||||
todos.append({"title": item, "kind": "待办", "note": ""})
|
||||
tags_raw = data.get("tags") or []
|
||||
if not isinstance(tags_raw, list):
|
||||
tags_raw = []
|
||||
return VLMResult(
|
||||
title=str(data.get("title", "") or "")[:128],
|
||||
summary=str(data.get("summary", "") or ""),
|
||||
category=str(data.get("category") or "") or None,
|
||||
tags=[str(t) for t in tags_raw if t][:8],
|
||||
todos=todos,
|
||||
suggestion=str(data.get("suggestion", "") or ""),
|
||||
raw=data,
|
||||
)
|
||||
Reference in New Issue
Block a user