SnapAndAnaly/backend/app/providers/base.py

"""OCR / VLM Provider 抽象接口。"""
from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any


@dataclass
class VLMResult:
    """VLM 结构化分析结果。"""

    title: str = ""
    summary: str = ""
    category: str | None = None
    tags: list[str] = field(default_factory=list)
    todos: list[dict[str, str]] = field(default_factory=list)  # [{title, kind, note}]
    suggestion: str = ""
    raw: dict[str, Any] = field(default_factory=dict)


class OCRProvider(ABC):
    """OCR 接口：输入图片路径，返回文本。"""

    name: str = "ocr"

    @abstractmethod
    async def recognize(self, image_path: Path) -> str:
        ...


class VLMProvider(ABC):
    """多模态接口：根据图片 + OCR 文本生成结构化分析。"""

    name: str = "vlm"

    @abstractmethod
    async def analyze(
        self,
        image_path: Path,
        ocr_text: str,
        categories: list[str],
        allow_upload: bool,
    ) -> VLMResult:
        ...