Files
dataClean/app/task_progress.py
T
congsh 778ccefb22 feat: 任务进度实时展示、接口测试、暗色主题重构及多项 bug 修复
后端
- 新增 app/task_progress.py 线程安全进度注册表
- 任务改为后台线程异步执行(_run_task_background),手动触发立即返回 task_key
- 6 个任务函数(summarizer/tagger/scorer/deduplicator/brief/taxonomy)循环内上报进度
- scheduler 定时任务同步上报进度(trigger=scheduled)
- 新增 GET /api/tasks/progress 与 POST /api/tasks/progress/reset 接口
- 新增 POST /api/test-connection 接口连通性测试(独立短超时客户端)
- 修复 ai_client/rss_client 配置在 import 时固化的 bug(改为 property 运行时读取 settings),
  导致实际任务用 .env 假 key 调 LLM 401
- 修复 ai_client 对 reasoning 模型(MiniMax-M3 等)输出 <think> 块的 JSON 解析失败
- 修复 taxonomy bootstrap:LLM 超时(改用 300s 专用 client)、MiniMax 输出审查
  (精简样本仅标题 + 约束生成中性类目名)、失败误报 success(改抛异常如实标记)
- 修复 models.py 双外键关系映射启动崩溃(显式 foreign_keys)
- 修复 main.py SPA 路由 404、ArticleOut.published_at 序列化 500
- 移除 lifespan 同步 bootstrap 阻塞启动,改由 scheduler 后台异步执行

前端
- Deep Ink 高对比度暗色主题重构,修复 Element Plus 暗色模式对比度问题
- Tasks 页面任务进度实时展示(进度条/阶段/计数/状态/触发来源)+ 1.5s 轮询
- 接口测试面板(rssKeeper / LLM 连通性 + 延迟)
- 修复 nextJobs jobId 映射 bug

部署与文档
- Dockerfile 优化(BuildKit 缓存挂载、预编译 wheel、去 gcc、阿里云镜像源)
- 新增 API.md 接口文档

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-14 15:14:40 +08:00

118 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""任务进度注册表(进程内内存,线程安全)。
供手动任务、定时任务在执行过程中上报进度,前端通过
GET /api/tasks/progress 轮询读取展示。
单 workeruvicorn --workers 1)前提下,所有请求/任务线程共享同一份内存。
"""
import copy
import threading
from datetime import datetime, timezone
from typing import Optional
# 4 个稳定任务 key
TASK_KEYS = ("summarize", "tag_score_dedup", "generate_daily_brief", "bootstrap_taxonomy")
_progress: dict = {}
_lock = threading.Lock()
def _now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def _init() -> None:
"""初始化所有任务 key 为 idle"""
for key in TASK_KEYS:
_progress[key] = {
"status": "idle",
"stage": "",
"current": 0,
"total": 0,
"message": None,
"started_at": None,
"updated_at": None,
"finished_at": None,
"trigger": None,
}
_init()
def update_progress(
task_key: str,
*,
status: Optional[str] = None,
stage: Optional[str] = None,
current: Optional[int] = None,
total: Optional[int] = None,
message: Optional[str] = None,
trigger: Optional[str] = None,
) -> None:
"""合并非 None 字段并盖时间戳"""
with _lock:
entry = _progress.get(task_key)
if entry is None:
entry = {
"status": "idle", "stage": "", "current": 0, "total": 0,
"message": None, "started_at": None, "updated_at": None,
"finished_at": None, "trigger": None,
}
_progress[task_key] = entry
now = _now_iso()
if status == "running" and entry.get("started_at") is None:
entry["started_at"] = now
if status in ("success", "error"):
entry["finished_at"] = now
# 若重新进入 running,重置终态时间戳
if status == "running":
entry["finished_at"] = None
if status is not None:
entry["status"] = status
if stage is not None:
entry["stage"] = stage
if current is not None:
entry["current"] = current
if total is not None:
entry["total"] = total
if message is not None:
entry["message"] = message
if trigger is not None:
entry["trigger"] = trigger
entry["updated_at"] = now
def report_loop_progress(
task_key: str,
index: int,
total: int,
stage: str,
message: Optional[str] = None,
every: int = 5,
) -> None:
"""紧凑循环进度上报:每 `every` 次或最后一次(index==total)才上报,减少加锁"""
if index % every == 0 or index >= total:
update_progress(task_key, status="running", stage=stage, current=index, total=total, message=message)
def get_progress(task_key: Optional[str] = None) -> dict:
"""返回深拷贝(单个或全部),防止序列化期间被并发修改"""
with _lock:
if task_key is not None:
return copy.deepcopy(_progress.get(task_key))
return copy.deepcopy(_progress)
def reset_progress(task_key: str) -> None:
"""重置单个任务为 idle(前端清除终态显示用)"""
with _lock:
if task_key in _progress:
_progress[task_key] = {
"status": "idle", "stage": "", "current": 0, "total": 0,
"message": None, "started_at": None, "updated_at": None,
"finished_at": None, "trigger": None,
}