backend/app/providers/openai_vision_client.py

"""OpenAI 兼容视觉 API 的公共封装：图片编码 + chat/completions 调用。"""
from __future__ import annotations

import base64
import json
from io import BytesIO
from pathlib import Path
from typing import Any

import httpx
from PIL import Image

from app.core.config import settings
from app.core.logger import get_logger


logger = get_logger(__name__)


def image_to_data_url(image_path: Path, max_side: int | None = None) -> str:
    """将图片压缩并编码为 data URL。"""
    max_side = max_side or settings.vlm_max_side
    with Image.open(image_path) as img:
        img = img.convert("RGB")
        w, h = img.size
        scale = max(w, h) / max_side
        if scale > 1:
            img = img.resize((int(w / scale), int(h / scale)), Image.LANCZOS)
        buf = BytesIO()
        img.save(buf, format="JPEG", quality=82)
        encoded = base64.b64encode(buf.getvalue()).decode("ascii")
    return f"data:image/jpeg;base64,{encoded}"


def safe_parse_json(content: str) -> dict[str, Any]:
    """解析模型 JSON 输出，兼容 markdown 包裹。"""
    text = content.strip()
    if text.startswith("```"):
        text = text.strip("`")
        if text.lower().startswith("json"):
            text = text[4:].strip()
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        start = text.find("{")
        end = text.rfind("}")
        if start >= 0 and end > start:
            try:
                return json.loads(text[start : end + 1])
            except json.JSONDecodeError:
                pass
    return {"text": content}


async def chat_completions(
    *,
    base_url: str,
    api_key: str,
    model: str,
    system_prompt: str,
    user_text: str,
    image_path: Path | None = None,
    allow_upload: bool = True,
    timeout: float = 60.0,
    json_mode: bool = True,
) -> str:
    """调用 /v1/chat/completions，返回 message.content 字符串。"""
    user_content: list[dict[str, Any]] = [{"type": "text", "text": user_text}]
    if image_path is not None and allow_upload:
        data_url = image_to_data_url(image_path)
        user_content.append({"type": "image_url", "image_url": {"url": data_url}})

    payload: dict[str, Any] = {
        "model": model,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_content},
        ],
        "temperature": 0.2,
    }
    if json_mode:
        payload["response_format"] = {"type": "json_object"}

    headers = {"Content-Type": "application/json"}
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"

    url = f"{base_url.rstrip('/')}/chat/completions"
    async with httpx.AsyncClient(timeout=timeout) as client:
        try:
            resp = await client.post(url, json=payload, headers=headers)
        except httpx.HTTPError as exc:
            logger.warning("视觉 API 请求失败，尝试移除 response_format：%s", exc)
            payload.pop("response_format", None)
            resp = await client.post(url, json=payload, headers=headers)

        if resp.status_code == 400 and "response_format" in resp.text:
            payload.pop("response_format", None)
            resp = await client.post(url, json=payload, headers=headers)

        resp.raise_for_status()
        data = resp.json()

    try:
        return data["choices"][0]["message"]["content"]
    except (KeyError, IndexError) as exc:
        raise RuntimeError(f"视觉 API 返回结构异常: {data}") from exc