Files
SnapAndAnaly/backend/app/providers/base.py
T
congsh 5c028d7952 Initial commit: snapAna 截图智能整理工具
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-27 15:45:50 +08:00

47 lines
1.1 KiB
Python

"""OCR / VLM Provider 抽象接口。"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
@dataclass
class VLMResult:
"""VLM 结构化分析结果。"""
title: str = ""
summary: str = ""
category: str | None = None
tags: list[str] = field(default_factory=list)
todos: list[dict[str, str]] = field(default_factory=list) # [{title, kind, note}]
suggestion: str = ""
raw: dict[str, Any] = field(default_factory=dict)
class OCRProvider(ABC):
"""OCR 接口:输入图片路径,返回文本。"""
name: str = "ocr"
@abstractmethod
async def recognize(self, image_path: Path) -> str:
...
class VLMProvider(ABC):
"""多模态接口:根据图片 + OCR 文本生成结构化分析。"""
name: str = "vlm"
@abstractmethod
async def analyze(
self,
image_path: Path,
ocr_text: str,
categories: list[str],
allow_upload: bool,
) -> VLMResult:
...