feat: 任务进度实时展示、接口测试、暗色主题重构及多项 bug 修复
后端 - 新增 app/task_progress.py 线程安全进度注册表 - 任务改为后台线程异步执行(_run_task_background),手动触发立即返回 task_key - 6 个任务函数(summarizer/tagger/scorer/deduplicator/brief/taxonomy)循环内上报进度 - scheduler 定时任务同步上报进度(trigger=scheduled) - 新增 GET /api/tasks/progress 与 POST /api/tasks/progress/reset 接口 - 新增 POST /api/test-connection 接口连通性测试(独立短超时客户端) - 修复 ai_client/rss_client 配置在 import 时固化的 bug(改为 property 运行时读取 settings), 导致实际任务用 .env 假 key 调 LLM 401 - 修复 ai_client 对 reasoning 模型(MiniMax-M3 等)输出 <think> 块的 JSON 解析失败 - 修复 taxonomy bootstrap:LLM 超时(改用 300s 专用 client)、MiniMax 输出审查 (精简样本仅标题 + 约束生成中性类目名)、失败误报 success(改抛异常如实标记) - 修复 models.py 双外键关系映射启动崩溃(显式 foreign_keys) - 修复 main.py SPA 路由 404、ArticleOut.published_at 序列化 500 - 移除 lifespan 同步 bootstrap 阻塞启动,改由 scheduler 后台异步执行 前端 - Deep Ink 高对比度暗色主题重构,修复 Element Plus 暗色模式对比度问题 - Tasks 页面任务进度实时展示(进度条/阶段/计数/状态/触发来源)+ 1.5s 轮询 - 接口测试面板(rssKeeper / LLM 连通性 + 延迟) - 修复 nextJobs jobId 映射 bug 部署与文档 - Dockerfile 优化(BuildKit 缓存挂载、预编译 wheel、去 gcc、阿里云镜像源) - 新增 API.md 接口文档 Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
+84
-21
@@ -1,6 +1,7 @@
|
||||
"""LLM API 客户端,兼容 OpenAI API 格式"""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from openai import OpenAI, APIError
|
||||
@@ -9,9 +10,57 @@ from config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 匹配 reasoning 模型(MiniMax-M3 / DeepSeek-R1 / GLM-Z1 等)的 <think>...</think> 推理块
|
||||
_THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL)
|
||||
|
||||
|
||||
def _parse_llm_json(content: str) -> dict:
|
||||
"""从 LLM 输出中提取 JSON。
|
||||
|
||||
兼容 reasoning 模型在 json_object 模式下仍输出 <think>...</think>
|
||||
推理块、以及 JSON 前后有多余文本的情况。
|
||||
"""
|
||||
if not content or not content.strip():
|
||||
raise ValueError("LLM 返回空内容,无法解析 JSON")
|
||||
|
||||
text = content.strip()
|
||||
# 1) 去掉闭合的 <think>...</think> 块
|
||||
text = _THINK_RE.sub("", text).strip()
|
||||
# 2) 处理只有 <think> 开头但未闭合(content 被截断)的情况
|
||||
if text.startswith("<think>"):
|
||||
text = text.split("</think>", 1)[-1].strip()
|
||||
|
||||
# 3) 尝试直接解析
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 4) 提取首个 { 到最后 } 之间的子串
|
||||
start = text.find("{")
|
||||
end = text.rfind("}")
|
||||
if start != -1 and end > start:
|
||||
try:
|
||||
return json.loads(text[start : end + 1])
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 5) 兜底:尝试数组
|
||||
start = text.find("[")
|
||||
end = text.rfind("]")
|
||||
if start != -1 and end > start:
|
||||
return json.loads(text[start : end + 1])
|
||||
|
||||
logger.error("无法从 LLM 输出提取 JSON: %s", content[:500])
|
||||
raise ValueError("LLM 输出无法解析为 JSON")
|
||||
|
||||
|
||||
class AIClient:
|
||||
"""封装 LLM 调用,支持重试和 JSON 输出"""
|
||||
"""封装 LLM 调用,支持重试和 JSON 输出。
|
||||
|
||||
配置以 property 形式运行时从 settings 读取,避免模块 import 时
|
||||
固化旧值(settings 在 FastAPI lifespan 启动后才会被数据库配置覆盖)。
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -21,24 +70,42 @@ class AIClient:
|
||||
timeout: Optional[int] = None,
|
||||
max_retries: Optional[int] = None,
|
||||
):
|
||||
self.api_key = api_key or settings.OPENAI_API_KEY
|
||||
self.base_url = base_url or settings.OPENAI_BASE_URL
|
||||
self.model = model or settings.OPENAI_MODEL
|
||||
self.timeout = timeout or settings.OPENAI_TIMEOUT
|
||||
self.max_retries = max_retries or settings.OPENAI_MAX_RETRIES
|
||||
# 仅保存显式传入的覆盖值;为 None 时运行时回退到 settings
|
||||
self._api_key = api_key
|
||||
self._base_url = base_url
|
||||
self._model = model
|
||||
self._timeout = timeout
|
||||
self._max_retries = max_retries
|
||||
|
||||
self._client: Optional[OpenAI] = None
|
||||
@property
|
||||
def api_key(self) -> str:
|
||||
return self._api_key or settings.OPENAI_API_KEY
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return self._base_url or settings.OPENAI_BASE_URL
|
||||
|
||||
@property
|
||||
def model(self) -> str:
|
||||
return self._model or settings.OPENAI_MODEL
|
||||
|
||||
@property
|
||||
def timeout(self) -> int:
|
||||
return self._timeout or settings.OPENAI_TIMEOUT
|
||||
|
||||
@property
|
||||
def max_retries(self) -> int:
|
||||
return self._max_retries or settings.OPENAI_MAX_RETRIES
|
||||
|
||||
@property
|
||||
def client(self) -> OpenAI:
|
||||
if self._client is None:
|
||||
self._client = OpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url,
|
||||
timeout=self.timeout,
|
||||
max_retries=self.max_retries,
|
||||
)
|
||||
return self._client
|
||||
# 每次按最新配置创建,确保用到启动后覆盖的真实配置
|
||||
return OpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url,
|
||||
timeout=self.timeout,
|
||||
max_retries=self.max_retries,
|
||||
)
|
||||
|
||||
def chat_completion(
|
||||
self,
|
||||
@@ -75,18 +142,14 @@ class AIClient:
|
||||
user_prompt: str,
|
||||
temperature: float = 0.3,
|
||||
) -> dict:
|
||||
"""调用 LLM 并解析返回的 JSON"""
|
||||
"""调用 LLM 并解析返回的 JSON(兼容 reasoning 模型的 <think> 块)"""
|
||||
content = self.chat_completion(
|
||||
system_prompt=system_prompt,
|
||||
user_prompt=user_prompt,
|
||||
temperature=temperature,
|
||||
json_mode=True,
|
||||
)
|
||||
try:
|
||||
return json.loads(content)
|
||||
except json.JSONDecodeError as exc:
|
||||
logger.error("LLM 返回不是合法 JSON: %s - content=%s", exc, content[:500])
|
||||
raise
|
||||
return _parse_llm_json(content)
|
||||
|
||||
|
||||
ai_client = AIClient()
|
||||
|
||||
Reference in New Issue
Block a user