5c028d7952
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。 Co-authored-by: Cursor <cursoragent@cursor.com>
108 lines
3.4 KiB
Python
108 lines
3.4 KiB
Python
"""OpenAI 兼容视觉 API 的公共封装:图片编码 + chat/completions 调用。"""
|
||
from __future__ import annotations
|
||
|
||
import base64
|
||
import json
|
||
from io import BytesIO
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
import httpx
|
||
from PIL import Image
|
||
|
||
from app.core.config import settings
|
||
from app.core.logger import get_logger
|
||
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
|
||
def image_to_data_url(image_path: Path, max_side: int | None = None) -> str:
|
||
"""将图片压缩并编码为 data URL。"""
|
||
max_side = max_side or settings.vlm_max_side
|
||
with Image.open(image_path) as img:
|
||
img = img.convert("RGB")
|
||
w, h = img.size
|
||
scale = max(w, h) / max_side
|
||
if scale > 1:
|
||
img = img.resize((int(w / scale), int(h / scale)), Image.LANCZOS)
|
||
buf = BytesIO()
|
||
img.save(buf, format="JPEG", quality=82)
|
||
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
|
||
return f"data:image/jpeg;base64,{encoded}"
|
||
|
||
|
||
def safe_parse_json(content: str) -> dict[str, Any]:
|
||
"""解析模型 JSON 输出,兼容 markdown 包裹。"""
|
||
text = content.strip()
|
||
if text.startswith("```"):
|
||
text = text.strip("`")
|
||
if text.lower().startswith("json"):
|
||
text = text[4:].strip()
|
||
try:
|
||
return json.loads(text)
|
||
except json.JSONDecodeError:
|
||
start = text.find("{")
|
||
end = text.rfind("}")
|
||
if start >= 0 and end > start:
|
||
try:
|
||
return json.loads(text[start : end + 1])
|
||
except json.JSONDecodeError:
|
||
pass
|
||
return {"text": content}
|
||
|
||
|
||
async def chat_completions(
|
||
*,
|
||
base_url: str,
|
||
api_key: str,
|
||
model: str,
|
||
system_prompt: str,
|
||
user_text: str,
|
||
image_path: Path | None = None,
|
||
allow_upload: bool = True,
|
||
timeout: float = 60.0,
|
||
json_mode: bool = True,
|
||
) -> str:
|
||
"""调用 /v1/chat/completions,返回 message.content 字符串。"""
|
||
user_content: list[dict[str, Any]] = [{"type": "text", "text": user_text}]
|
||
if image_path is not None and allow_upload:
|
||
data_url = image_to_data_url(image_path)
|
||
user_content.append({"type": "image_url", "image_url": {"url": data_url}})
|
||
|
||
payload: dict[str, Any] = {
|
||
"model": model,
|
||
"messages": [
|
||
{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_content},
|
||
],
|
||
"temperature": 0.2,
|
||
}
|
||
if json_mode:
|
||
payload["response_format"] = {"type": "json_object"}
|
||
|
||
headers = {"Content-Type": "application/json"}
|
||
if api_key:
|
||
headers["Authorization"] = f"Bearer {api_key}"
|
||
|
||
url = f"{base_url.rstrip('/')}/chat/completions"
|
||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||
try:
|
||
resp = await client.post(url, json=payload, headers=headers)
|
||
except httpx.HTTPError as exc:
|
||
logger.warning("视觉 API 请求失败,尝试移除 response_format:%s", exc)
|
||
payload.pop("response_format", None)
|
||
resp = await client.post(url, json=payload, headers=headers)
|
||
|
||
if resp.status_code == 400 and "response_format" in resp.text:
|
||
payload.pop("response_format", None)
|
||
resp = await client.post(url, json=payload, headers=headers)
|
||
|
||
resp.raise_for_status()
|
||
data = resp.json()
|
||
|
||
try:
|
||
return data["choices"][0]["message"]["content"]
|
||
except (KeyError, IndexError) as exc:
|
||
raise RuntimeError(f"视觉 API 返回结构异常: {data}") from exc
|