Files
SnapAndAnaly/backend/app/providers/openai_vision_client.py
T
congsh 5c028d7952 Initial commit: snapAna 截图智能整理工具
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-27 15:45:50 +08:00

108 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""OpenAI 兼容视觉 API 的公共封装:图片编码 + chat/completions 调用。"""
from __future__ import annotations
import base64
import json
from io import BytesIO
from pathlib import Path
from typing import Any
import httpx
from PIL import Image
from app.core.config import settings
from app.core.logger import get_logger
logger = get_logger(__name__)
def image_to_data_url(image_path: Path, max_side: int | None = None) -> str:
"""将图片压缩并编码为 data URL。"""
max_side = max_side or settings.vlm_max_side
with Image.open(image_path) as img:
img = img.convert("RGB")
w, h = img.size
scale = max(w, h) / max_side
if scale > 1:
img = img.resize((int(w / scale), int(h / scale)), Image.LANCZOS)
buf = BytesIO()
img.save(buf, format="JPEG", quality=82)
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
return f"data:image/jpeg;base64,{encoded}"
def safe_parse_json(content: str) -> dict[str, Any]:
"""解析模型 JSON 输出,兼容 markdown 包裹。"""
text = content.strip()
if text.startswith("```"):
text = text.strip("`")
if text.lower().startswith("json"):
text = text[4:].strip()
try:
return json.loads(text)
except json.JSONDecodeError:
start = text.find("{")
end = text.rfind("}")
if start >= 0 and end > start:
try:
return json.loads(text[start : end + 1])
except json.JSONDecodeError:
pass
return {"text": content}
async def chat_completions(
*,
base_url: str,
api_key: str,
model: str,
system_prompt: str,
user_text: str,
image_path: Path | None = None,
allow_upload: bool = True,
timeout: float = 60.0,
json_mode: bool = True,
) -> str:
"""调用 /v1/chat/completions,返回 message.content 字符串。"""
user_content: list[dict[str, Any]] = [{"type": "text", "text": user_text}]
if image_path is not None and allow_upload:
data_url = image_to_data_url(image_path)
user_content.append({"type": "image_url", "image_url": {"url": data_url}})
payload: dict[str, Any] = {
"model": model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_content},
],
"temperature": 0.2,
}
if json_mode:
payload["response_format"] = {"type": "json_object"}
headers = {"Content-Type": "application/json"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
url = f"{base_url.rstrip('/')}/chat/completions"
async with httpx.AsyncClient(timeout=timeout) as client:
try:
resp = await client.post(url, json=payload, headers=headers)
except httpx.HTTPError as exc:
logger.warning("视觉 API 请求失败,尝试移除 response_format%s", exc)
payload.pop("response_format", None)
resp = await client.post(url, json=payload, headers=headers)
if resp.status_code == 400 and "response_format" in resp.text:
payload.pop("response_format", None)
resp = await client.post(url, json=payload, headers=headers)
resp.raise_for_status()
data = resp.json()
try:
return data["choices"][0]["message"]["content"]
except (KeyError, IndexError) as exc:
raise RuntimeError(f"视觉 API 返回结构异常: {data}") from exc