"""通用 HTTP OCR:向自定义 REST 接口 POST 图片并解析文本。""" from __future__ import annotations import base64 import json from pathlib import Path from typing import Any import httpx from .base import OCRProvider class HttpOCR(OCRProvider): """POST JSON {"image_base64": "..."} 到指定 URL,从响应 JSON 取文本。 extra 配置项: - text_path: 点分路径,如 "data.text" 或 "result",默认 "text" - headers: 额外请求头 dict """ name = "http" def __init__( self, base_url: str, api_key: str = "", text_path: str = "text", headers: dict[str, str] | None = None, timeout: float = 30.0, ) -> None: self.base_url = base_url.rstrip("/") self.api_key = api_key self.text_path = text_path self.headers = headers or {} self.timeout = timeout async def recognize(self, image_path: Path) -> str: with open(image_path, "rb") as f: encoded = base64.b64encode(f.read()).decode("ascii") headers = {"Content-Type": "application/json", **self.headers} if self.api_key: headers["Authorization"] = f"Bearer {self.api_key}" payload = {"image_base64": encoded, "image": encoded} async with httpx.AsyncClient(timeout=self.timeout) as client: resp = await client.post(self.base_url, json=payload, headers=headers) resp.raise_for_status() data = resp.json() return str(_dig(data, self.text_path) or "").strip() def _dig(obj: Any, path: str) -> Any: """按点分路径从嵌套 dict 取值。""" cur = obj for part in path.split("."): if not isinstance(cur, dict): return None cur = cur.get(part) return cur