feat: 任务进度实时展示、接口测试、暗色主题重构及多项 bug 修复
后端 - 新增 app/task_progress.py 线程安全进度注册表 - 任务改为后台线程异步执行(_run_task_background),手动触发立即返回 task_key - 6 个任务函数(summarizer/tagger/scorer/deduplicator/brief/taxonomy)循环内上报进度 - scheduler 定时任务同步上报进度(trigger=scheduled) - 新增 GET /api/tasks/progress 与 POST /api/tasks/progress/reset 接口 - 新增 POST /api/test-connection 接口连通性测试(独立短超时客户端) - 修复 ai_client/rss_client 配置在 import 时固化的 bug(改为 property 运行时读取 settings), 导致实际任务用 .env 假 key 调 LLM 401 - 修复 ai_client 对 reasoning 模型(MiniMax-M3 等)输出 <think> 块的 JSON 解析失败 - 修复 taxonomy bootstrap:LLM 超时(改用 300s 专用 client)、MiniMax 输出审查 (精简样本仅标题 + 约束生成中性类目名)、失败误报 success(改抛异常如实标记) - 修复 models.py 双外键关系映射启动崩溃(显式 foreign_keys) - 修复 main.py SPA 路由 404、ArticleOut.published_at 序列化 500 - 移除 lifespan 同步 bootstrap 阻塞启动,改由 scheduler 后台异步执行 前端 - Deep Ink 高对比度暗色主题重构,修复 Element Plus 暗色模式对比度问题 - Tasks 页面任务进度实时展示(进度条/阶段/计数/状态/触发来源)+ 1.5s 轮询 - 接口测试面板(rssKeeper / LLM 连通性 + 延迟) - 修复 nextJobs jobId 映射 bug 部署与文档 - Dockerfile 优化(BuildKit 缓存挂载、预编译 wheel、去 gcc、阿里云镜像源) - 新增 API.md 接口文档 Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
+7
-1
@@ -12,6 +12,7 @@ import numpy as np
|
||||
|
||||
from config import settings
|
||||
from models import EnrichedArticle, DuplicateGroup
|
||||
from app.task_progress import update_progress, report_loop_progress
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -172,8 +173,11 @@ def deduplicate_articles(
|
||||
|
||||
if not articles:
|
||||
logger.info("日期 %s 无文章可去重", date_str)
|
||||
update_progress("tag_score_dedup", status="running", stage="去重", current=0, total=0, message="无文章可去重")
|
||||
return {"total": 0, "duplicate_groups": 0, "representatives": 0}
|
||||
|
||||
update_progress("tag_score_dedup", status="running", stage="计算相似度并去重", current=0, total=0)
|
||||
|
||||
# 先 URL 去重:相同 link 只保留一篇
|
||||
unique_articles: List[EnrichedArticle] = []
|
||||
seen_links: set = set()
|
||||
@@ -194,8 +198,9 @@ def deduplicate_articles(
|
||||
)
|
||||
|
||||
stats = {"total": len(articles), "duplicate_groups": len(clusters), "representatives": 0}
|
||||
update_progress("tag_score_dedup", status="running", stage="写入重复组", current=0, total=len(clusters))
|
||||
|
||||
for cluster in clusters:
|
||||
for ci, cluster in enumerate(clusters):
|
||||
representative = _pick_representative(unique_articles, cluster)
|
||||
member_ids = [unique_articles[i].id for i in cluster]
|
||||
|
||||
@@ -214,6 +219,7 @@ def deduplicate_articles(
|
||||
art.is_representative = (art.id == representative.id)
|
||||
|
||||
stats["representatives"] += 1
|
||||
report_loop_progress("tag_score_dedup", ci + 1, len(clusters), "写入重复组")
|
||||
|
||||
db.commit()
|
||||
logger.info(
|
||||
|
||||
Reference in New Issue
Block a user