5c028d7952
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。 Co-authored-by: Cursor <cursoragent@cursor.com>
47 lines
1.1 KiB
Python
47 lines
1.1 KiB
Python
"""OCR / VLM Provider 抽象接口。"""
|
|
from __future__ import annotations
|
|
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
@dataclass
|
|
class VLMResult:
|
|
"""VLM 结构化分析结果。"""
|
|
|
|
title: str = ""
|
|
summary: str = ""
|
|
category: str | None = None
|
|
tags: list[str] = field(default_factory=list)
|
|
todos: list[dict[str, str]] = field(default_factory=list) # [{title, kind, note}]
|
|
suggestion: str = ""
|
|
raw: dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
class OCRProvider(ABC):
|
|
"""OCR 接口:输入图片路径,返回文本。"""
|
|
|
|
name: str = "ocr"
|
|
|
|
@abstractmethod
|
|
async def recognize(self, image_path: Path) -> str:
|
|
...
|
|
|
|
|
|
class VLMProvider(ABC):
|
|
"""多模态接口:根据图片 + OCR 文本生成结构化分析。"""
|
|
|
|
name: str = "vlm"
|
|
|
|
@abstractmethod
|
|
async def analyze(
|
|
self,
|
|
image_path: Path,
|
|
ocr_text: str,
|
|
categories: list[str],
|
|
allow_upload: bool,
|
|
) -> VLMResult:
|
|
...
|