Files
SnapAndAnaly/backend/app/providers/base.py
T

47 lines
1.1 KiB
Python
Raw Normal View History

"""OCR / VLM Provider 抽象接口。"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
@dataclass
class VLMResult:
"""VLM 结构化分析结果。"""
title: str = ""
summary: str = ""
category: str | None = None
tags: list[str] = field(default_factory=list)
todos: list[dict[str, str]] = field(default_factory=list) # [{title, kind, note}]
suggestion: str = ""
raw: dict[str, Any] = field(default_factory=dict)
class OCRProvider(ABC):
"""OCR 接口:输入图片路径,返回文本。"""
name: str = "ocr"
@abstractmethod
async def recognize(self, image_path: Path) -> str:
...
class VLMProvider(ABC):
"""多模态接口:根据图片 + OCR 文本生成结构化分析。"""
name: str = "vlm"
@abstractmethod
async def analyze(
self,
image_path: Path,
ocr_text: str,
categories: list[str],
allow_upload: bool,
) -> VLMResult:
...