"""OpenAI 兼容视觉 API 的公共封装:图片编码 + chat/completions 调用。""" from __future__ import annotations import base64 import json from io import BytesIO from pathlib import Path from typing import Any import httpx from PIL import Image from app.core.config import settings from app.core.logger import get_logger logger = get_logger(__name__) def image_to_data_url(image_path: Path, max_side: int | None = None) -> str: """将图片压缩并编码为 data URL。""" max_side = max_side or settings.vlm_max_side with Image.open(image_path) as img: img = img.convert("RGB") w, h = img.size scale = max(w, h) / max_side if scale > 1: img = img.resize((int(w / scale), int(h / scale)), Image.LANCZOS) buf = BytesIO() img.save(buf, format="JPEG", quality=82) encoded = base64.b64encode(buf.getvalue()).decode("ascii") return f"data:image/jpeg;base64,{encoded}" def safe_parse_json(content: str) -> dict[str, Any]: """解析模型 JSON 输出,兼容 markdown 包裹。""" text = content.strip() if text.startswith("```"): text = text.strip("`") if text.lower().startswith("json"): text = text[4:].strip() try: return json.loads(text) except json.JSONDecodeError: start = text.find("{") end = text.rfind("}") if start >= 0 and end > start: try: return json.loads(text[start : end + 1]) except json.JSONDecodeError: pass return {"text": content} async def chat_completions( *, base_url: str, api_key: str, model: str, system_prompt: str, user_text: str, image_path: Path | None = None, allow_upload: bool = True, timeout: float = 60.0, json_mode: bool = True, ) -> str: """调用 /v1/chat/completions,返回 message.content 字符串。""" user_content: list[dict[str, Any]] = [{"type": "text", "text": user_text}] if image_path is not None and allow_upload: data_url = image_to_data_url(image_path) user_content.append({"type": "image_url", "image_url": {"url": data_url}}) payload: dict[str, Any] = { "model": model, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_content}, ], "temperature": 0.2, } if json_mode: payload["response_format"] = {"type": "json_object"} headers = {"Content-Type": "application/json"} if api_key: headers["Authorization"] = f"Bearer {api_key}" url = f"{base_url.rstrip('/')}/chat/completions" async with httpx.AsyncClient(timeout=timeout) as client: try: resp = await client.post(url, json=payload, headers=headers) except httpx.HTTPError as exc: logger.warning("视觉 API 请求失败,尝试移除 response_format:%s", exc) payload.pop("response_format", None) resp = await client.post(url, json=payload, headers=headers) if resp.status_code == 400 and "response_format" in resp.text: payload.pop("response_format", None) resp = await client.post(url, json=payload, headers=headers) resp.raise_for_status() data = resp.json() try: return data["choices"][0]["message"]["content"] except (KeyError, IndexError) as exc: raise RuntimeError(f"视觉 API 返回结构异常: {data}") from exc