feat: 任务进度实时展示、接口测试、暗色主题重构及多项 bug 修复

后端
- 新增 app/task_progress.py 线程安全进度注册表
- 任务改为后台线程异步执行(_run_task_background),手动触发立即返回 task_key
- 6 个任务函数(summarizer/tagger/scorer/deduplicator/brief/taxonomy)循环内上报进度
- scheduler 定时任务同步上报进度(trigger=scheduled)
- 新增 GET /api/tasks/progress 与 POST /api/tasks/progress/reset 接口
- 新增 POST /api/test-connection 接口连通性测试(独立短超时客户端)
- 修复 ai_client/rss_client 配置在 import 时固化的 bug(改为 property 运行时读取 settings),
  导致实际任务用 .env 假 key 调 LLM 401
- 修复 ai_client 对 reasoning 模型(MiniMax-M3 等)输出 <think> 块的 JSON 解析失败
- 修复 taxonomy bootstrap:LLM 超时(改用 300s 专用 client)、MiniMax 输出审查
  (精简样本仅标题 + 约束生成中性类目名)、失败误报 success(改抛异常如实标记)
- 修复 models.py 双外键关系映射启动崩溃(显式 foreign_keys)
- 修复 main.py SPA 路由 404、ArticleOut.published_at 序列化 500
- 移除 lifespan 同步 bootstrap 阻塞启动,改由 scheduler 后台异步执行

前端
- Deep Ink 高对比度暗色主题重构,修复 Element Plus 暗色模式对比度问题
- Tasks 页面任务进度实时展示(进度条/阶段/计数/状态/触发来源)+ 1.5s 轮询
- 接口测试面板(rssKeeper / LLM 连通性 + 延迟)
- 修复 nextJobs jobId 映射 bug

部署与文档
- Dockerfile 优化(BuildKit 缓存挂载、预编译 wheel、去 gcc、阿里云镜像源)
- 新增 API.md 接口文档

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
congsh
2026-06-14 15:14:40 +08:00
parent bae47a2411
commit 778ccefb22
24 changed files with 1853 additions and 312 deletions
+161 -31
View File
@@ -1,6 +1,7 @@
"""dataClean FastAPI 入口"""
import logging
import os
import threading
from contextlib import asynccontextmanager
from datetime import datetime, timedelta, timezone
from typing import Optional, List
@@ -8,6 +9,7 @@ from typing import Optional, List
from fastapi import FastAPI, Depends, HTTPException, Query, Body, Security, status
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse, Response
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel, ConfigDict
from sqlalchemy.orm import Session
@@ -15,7 +17,10 @@ from sqlalchemy.orm import Session
from config import settings
from database import init_db, get_db, SessionLocal
from scheduler import init_scheduler, stop_scheduler, get_scheduler, get_task_lock
from app.taxonomy import bootstrap_taxonomy, list_taxonomy, ensure_taxonomy
from app.taxonomy import bootstrap_taxonomy, list_taxonomy
from app.rss_client import rss_client, RSSKeeperClient
from app.ai_client import ai_client, AIClient
from app import task_progress
from app.summarizer import fetch_and_summarize
from app.tagger import tag_articles
from app.deduplicator import deduplicate_articles
@@ -69,15 +74,38 @@ def verify_token(credentials: Optional[HTTPAuthorizationCredentials] = Security(
return credentials.credentials
def _run_task_locked(func, db: Session):
"""带互斥锁执行任务"""
acquired = get_task_lock().acquire(blocking=False)
if not acquired:
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
try:
return func(db)
finally:
get_task_lock().release()
def _run_task_background(task_key: str, trigger: str, fn) -> bool:
"""
将任务提交到后台线程执行,立即返回。
请求线程非阻塞获取 _task_lock(失败返回 False → 调用方抛 409),
并把锁所有权交给后台 worker。worker 内创建独立 SessionLocal
上报进度,执行 fn(db),最终释放锁。无 TOCTOU 窗口。
"""
if not get_task_lock().acquire(blocking=False):
return False # 锁被占用,调用方抛 409
def _worker():
db = SessionLocal()
task_progress.update_progress(
task_key, status="running", trigger=trigger,
stage="初始化", current=0, total=0, message=None,
)
try:
fn(db)
task_progress.update_progress(
task_key, status="success", stage="完成", message="任务执行成功"
)
except Exception as exc:
logger.error("后台任务 %s 失败: %s", task_key, exc, exc_info=True)
task_progress.update_progress(
task_key, status="error", stage="失败", message=str(exc)[:500]
)
finally:
db.close()
get_task_lock().release()
threading.Thread(target=_worker, name=f"task-{task_key}", daemon=True).start()
return True
@asynccontextmanager
@@ -92,8 +120,8 @@ async def lifespan(app: FastAPI):
init_default_settings(db)
# 用数据库配置覆盖全局 settings
apply_db_settings_to_config(db)
# 首次启动时确保 taxonomy 表存在
ensure_taxonomy(db)
# 注意:taxonomy 初始化交由 scheduler 的 bootstrap job 后台异步执行,
# 避免在启动时同步调用 LLM 阻塞服务就绪(进度可在前端实时查看)。
except Exception as exc:
logger.error("启动初始化失败: %s", exc)
finally:
@@ -138,7 +166,7 @@ class ArticleOut(BaseModel):
composite_score: float
ai_summary: str
is_representative: bool
published_at: Optional[str]
published_at: Optional[datetime]
model_config = ConfigDict(from_attributes=True)
@@ -199,6 +227,18 @@ class StatsOut(BaseModel):
next_jobs: dict
class ConnectionTestResult(BaseModel):
name: str
status: str
latency_ms: Optional[float] = None
error: Optional[str] = None
class ConnectionTestResponse(BaseModel):
rss_keeper: ConnectionTestResult
llm: ConnectionTestResult
# ---------- 健康检查 ----------
@app.get("/health")
@@ -292,42 +332,108 @@ def get_taxonomy(kind: Optional[str] = Query(None), db: Session = Depends(get_db
@app.post("/api/taxonomy/bootstrap")
def trigger_taxonomy_bootstrap(
force: bool = False,
db: Session = Depends(get_db),
_=Depends(verify_token),
):
ok = bootstrap_taxonomy(db, force=force)
if not ok:
return {"message": "taxonomy 已存在或初始化失败,请检查日志"}
return {"message": "taxonomy 初始化成功"}
def _run(session):
ok = bootstrap_taxonomy(session, force=force)
if not ok:
raise RuntimeError("taxonomy 已存在或初始化失败,请检查日志")
if not _run_task_background("bootstrap_taxonomy", "manual", _run):
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
return {"message": "taxonomy 初始化已开始", "task_key": "bootstrap_taxonomy"}
# ---------- 手动触发任务接口 ----------
# ---------- 手动触发任务接口(后台执行,立即返回,前端轮询进度) ----------
@app.post("/api/tasks/summarize")
def task_summarize(db: Session = Depends(get_db), _=Depends(verify_token)):
stats = _run_task_locked(lambda session: fetch_and_summarize(session, hours=24, limit=200), db)
return {"message": "摘要任务完成", "stats": stats}
def task_summarize(_=Depends(verify_token)):
def _run(session):
fetch_and_summarize(session, hours=24, limit=200)
if not _run_task_background("summarize", "manual", _run):
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
return {"message": "摘要任务已开始", "task_key": "summarize"}
@app.post("/api/tasks/tag-score-dedup")
def task_tag_score_dedup(db: Session = Depends(get_db), _=Depends(verify_token)):
def task_tag_score_dedup(_=Depends(verify_token)):
def _run(session):
tag_articles(session)
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
deduplicate_articles(session, date_str=today)
score_articles(session, update_duplication=True)
return None
_run_task_locked(_run, db)
return {"message": "分类/去重/打分任务完成"}
if not _run_task_background("tag_score_dedup", "manual", _run):
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
return {"message": "分类/去重/打分任务已开始", "task_key": "tag_score_dedup"}
@app.post("/api/tasks/brief")
def task_brief(db: Session = Depends(get_db), _=Depends(verify_token)):
def task_brief(_=Depends(verify_token)):
def _run(session):
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
return generate_daily_brief(session, date_str=today, force=True)
data = _run_task_locked(_run, db)
return {"message": "简报生成任务完成", "data": data}
generate_daily_brief(session, date_str=today, force=True)
if not _run_task_background("generate_daily_brief", "manual", _run):
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
return {"message": "简报生成任务已开始", "task_key": "generate_daily_brief"}
@app.get("/api/tasks/progress")
def get_task_progress(_=Depends(verify_token)):
"""返回所有任务的实时进度(前端轮询)"""
return task_progress.get_progress()
@app.post("/api/tasks/progress/reset")
def reset_task_progress(task_key: str = Query(...), _=Depends(verify_token)):
"""重置指定任务的进度显示为 idle"""
task_progress.reset_progress(task_key)
return {"message": "已重置"}
# ---------- 接口连通性测试 ----------
@app.post("/api/test-connection", response_model=ConnectionTestResponse)
def test_connection(_=Depends(verify_token)):
"""测试 rssKeeper 和 LLM API 连通性,返回状态和延迟"""
import time
# rssKeeper 连通测试(使用短超时,避免长时间等待)
rss_result = {"name": "rssKeeper", "status": "error", "latency_ms": None, "error": None}
try:
t0 = time.monotonic()
# 临时用短超时的 client 测试
test_client = RSSKeeperClient(base_url=settings.RSSKEEPER_BASE_URL, timeout=10)
test_client._get("/api/v1/external/feeds", params={"limit": 1})
rss_result = {
"name": "rssKeeper",
"status": "ok",
"latency_ms": round((time.monotonic() - t0) * 1000, 1),
"error": None,
}
except Exception as exc:
rss_result["error"] = str(exc)[:200]
# LLM 连通测试(使用短超时 + 无重试)
llm_result = {"name": "LLM", "status": "error", "latency_ms": None, "error": None}
try:
t0 = time.monotonic()
test_ai = AIClient(timeout=10, max_retries=0)
test_ai.chat_completion(
system_prompt="You are a connectivity test.",
user_prompt="Reply with exactly: ok",
temperature=0.0,
)
llm_result = {
"name": "LLM",
"status": "ok",
"latency_ms": round((time.monotonic() - t0) * 1000, 1),
"error": None,
}
except Exception as exc:
llm_result["error"] = str(exc)[:200]
return {"rss_keeper": rss_result, "llm": llm_result}
# ---------- 配置管理接口 ----------
@@ -408,7 +514,7 @@ def get_stats(db: Session = Depends(get_db)):
}
# ---------- 静态文件托管(生产环境) ----------
# ---------- 静态文件托管(生产环境 SPA ----------
static_dir = os.path.join(os.path.dirname(__file__), "static")
if not os.path.isdir(static_dir):
@@ -418,7 +524,31 @@ if not os.path.isdir(static_dir):
static_dir = frontend_dist
if os.path.isdir(static_dir):
app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")
# 静态资源(JS/CSS/图片等)走 /assets 子路径挂载
assets_dir = os.path.join(static_dir, "assets")
if os.path.isdir(assets_dir):
app.mount("/assets", StaticFiles(directory=assets_dir), name="assets")
# SPA favicon、vite.svg 等根级静态文件
@app.get("/favicon.ico")
@app.get("/vite.svg")
async def serve_static_root(request):
from starlette.requests import Request
filename = os.path.basename(str(request.url.path))
file_path = os.path.join(static_dir, filename)
if os.path.isfile(file_path):
return FileResponse(file_path)
return Response(status_code=404)
# 所有未匹配的路由 → 返回 index.html(SPA 客户端路由)
@app.get("/{full_path:path}")
async def serve_spa(full_path: str):
# 先尝试匹配静态文件
file_path = os.path.join(static_dir, full_path)
if full_path and os.path.isfile(file_path):
return FileResponse(file_path)
# 否则返回 index.html 让 Vue Router 处理
return FileResponse(os.path.join(static_dir, "index.html"))
if __name__ == "__main__":