feat: 任务进度实时展示、接口测试、暗色主题重构及多项 bug 修复
后端 - 新增 app/task_progress.py 线程安全进度注册表 - 任务改为后台线程异步执行(_run_task_background),手动触发立即返回 task_key - 6 个任务函数(summarizer/tagger/scorer/deduplicator/brief/taxonomy)循环内上报进度 - scheduler 定时任务同步上报进度(trigger=scheduled) - 新增 GET /api/tasks/progress 与 POST /api/tasks/progress/reset 接口 - 新增 POST /api/test-connection 接口连通性测试(独立短超时客户端) - 修复 ai_client/rss_client 配置在 import 时固化的 bug(改为 property 运行时读取 settings), 导致实际任务用 .env 假 key 调 LLM 401 - 修复 ai_client 对 reasoning 模型(MiniMax-M3 等)输出 <think> 块的 JSON 解析失败 - 修复 taxonomy bootstrap:LLM 超时(改用 300s 专用 client)、MiniMax 输出审查 (精简样本仅标题 + 约束生成中性类目名)、失败误报 success(改抛异常如实标记) - 修复 models.py 双外键关系映射启动崩溃(显式 foreign_keys) - 修复 main.py SPA 路由 404、ArticleOut.published_at 序列化 500 - 移除 lifespan 同步 bootstrap 阻塞启动,改由 scheduler 后台异步执行 前端 - Deep Ink 高对比度暗色主题重构,修复 Element Plus 暗色模式对比度问题 - Tasks 页面任务进度实时展示(进度条/阶段/计数/状态/触发来源)+ 1.5s 轮询 - 接口测试面板(rssKeeper / LLM 连通性 + 延迟) - 修复 nextJobs jobId 映射 bug 部署与文档 - Dockerfile 优化(BuildKit 缓存挂载、预编译 wheel、去 gcc、阿里云镜像源) - 新增 API.md 接口文档 Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""dataClean FastAPI 入口"""
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional, List
|
||||
@@ -8,6 +9,7 @@ from typing import Optional, List
|
||||
from fastapi import FastAPI, Depends, HTTPException, Query, Body, Security, status
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse, Response
|
||||
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -15,7 +17,10 @@ from sqlalchemy.orm import Session
|
||||
from config import settings
|
||||
from database import init_db, get_db, SessionLocal
|
||||
from scheduler import init_scheduler, stop_scheduler, get_scheduler, get_task_lock
|
||||
from app.taxonomy import bootstrap_taxonomy, list_taxonomy, ensure_taxonomy
|
||||
from app.taxonomy import bootstrap_taxonomy, list_taxonomy
|
||||
from app.rss_client import rss_client, RSSKeeperClient
|
||||
from app.ai_client import ai_client, AIClient
|
||||
from app import task_progress
|
||||
from app.summarizer import fetch_and_summarize
|
||||
from app.tagger import tag_articles
|
||||
from app.deduplicator import deduplicate_articles
|
||||
@@ -69,15 +74,38 @@ def verify_token(credentials: Optional[HTTPAuthorizationCredentials] = Security(
|
||||
return credentials.credentials
|
||||
|
||||
|
||||
def _run_task_locked(func, db: Session):
|
||||
"""带互斥锁执行任务"""
|
||||
acquired = get_task_lock().acquire(blocking=False)
|
||||
if not acquired:
|
||||
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
|
||||
try:
|
||||
return func(db)
|
||||
finally:
|
||||
get_task_lock().release()
|
||||
def _run_task_background(task_key: str, trigger: str, fn) -> bool:
|
||||
"""
|
||||
将任务提交到后台线程执行,立即返回。
|
||||
请求线程非阻塞获取 _task_lock(失败返回 False → 调用方抛 409),
|
||||
并把锁所有权交给后台 worker。worker 内创建独立 SessionLocal,
|
||||
上报进度,执行 fn(db),最终释放锁。无 TOCTOU 窗口。
|
||||
"""
|
||||
if not get_task_lock().acquire(blocking=False):
|
||||
return False # 锁被占用,调用方抛 409
|
||||
|
||||
def _worker():
|
||||
db = SessionLocal()
|
||||
task_progress.update_progress(
|
||||
task_key, status="running", trigger=trigger,
|
||||
stage="初始化", current=0, total=0, message=None,
|
||||
)
|
||||
try:
|
||||
fn(db)
|
||||
task_progress.update_progress(
|
||||
task_key, status="success", stage="完成", message="任务执行成功"
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("后台任务 %s 失败: %s", task_key, exc, exc_info=True)
|
||||
task_progress.update_progress(
|
||||
task_key, status="error", stage="失败", message=str(exc)[:500]
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
get_task_lock().release()
|
||||
|
||||
threading.Thread(target=_worker, name=f"task-{task_key}", daemon=True).start()
|
||||
return True
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
@@ -92,8 +120,8 @@ async def lifespan(app: FastAPI):
|
||||
init_default_settings(db)
|
||||
# 用数据库配置覆盖全局 settings
|
||||
apply_db_settings_to_config(db)
|
||||
# 首次启动时确保 taxonomy 表存在
|
||||
ensure_taxonomy(db)
|
||||
# 注意:taxonomy 初始化交由 scheduler 的 bootstrap job 后台异步执行,
|
||||
# 避免在启动时同步调用 LLM 阻塞服务就绪(进度可在前端实时查看)。
|
||||
except Exception as exc:
|
||||
logger.error("启动初始化失败: %s", exc)
|
||||
finally:
|
||||
@@ -138,7 +166,7 @@ class ArticleOut(BaseModel):
|
||||
composite_score: float
|
||||
ai_summary: str
|
||||
is_representative: bool
|
||||
published_at: Optional[str]
|
||||
published_at: Optional[datetime]
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
|
||||
@@ -199,6 +227,18 @@ class StatsOut(BaseModel):
|
||||
next_jobs: dict
|
||||
|
||||
|
||||
class ConnectionTestResult(BaseModel):
|
||||
name: str
|
||||
status: str
|
||||
latency_ms: Optional[float] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class ConnectionTestResponse(BaseModel):
|
||||
rss_keeper: ConnectionTestResult
|
||||
llm: ConnectionTestResult
|
||||
|
||||
|
||||
# ---------- 健康检查 ----------
|
||||
|
||||
@app.get("/health")
|
||||
@@ -292,42 +332,108 @@ def get_taxonomy(kind: Optional[str] = Query(None), db: Session = Depends(get_db
|
||||
@app.post("/api/taxonomy/bootstrap")
|
||||
def trigger_taxonomy_bootstrap(
|
||||
force: bool = False,
|
||||
db: Session = Depends(get_db),
|
||||
_=Depends(verify_token),
|
||||
):
|
||||
ok = bootstrap_taxonomy(db, force=force)
|
||||
if not ok:
|
||||
return {"message": "taxonomy 已存在或初始化失败,请检查日志"}
|
||||
return {"message": "taxonomy 初始化成功"}
|
||||
def _run(session):
|
||||
ok = bootstrap_taxonomy(session, force=force)
|
||||
if not ok:
|
||||
raise RuntimeError("taxonomy 已存在或初始化失败,请检查日志")
|
||||
if not _run_task_background("bootstrap_taxonomy", "manual", _run):
|
||||
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
|
||||
return {"message": "taxonomy 初始化已开始", "task_key": "bootstrap_taxonomy"}
|
||||
|
||||
|
||||
# ---------- 手动触发任务接口 ----------
|
||||
|
||||
# ---------- 手动触发任务接口(后台执行,立即返回,前端轮询进度) ----------
|
||||
|
||||
@app.post("/api/tasks/summarize")
|
||||
def task_summarize(db: Session = Depends(get_db), _=Depends(verify_token)):
|
||||
stats = _run_task_locked(lambda session: fetch_and_summarize(session, hours=24, limit=200), db)
|
||||
return {"message": "摘要任务完成", "stats": stats}
|
||||
def task_summarize(_=Depends(verify_token)):
|
||||
def _run(session):
|
||||
fetch_and_summarize(session, hours=24, limit=200)
|
||||
if not _run_task_background("summarize", "manual", _run):
|
||||
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
|
||||
return {"message": "摘要任务已开始", "task_key": "summarize"}
|
||||
|
||||
|
||||
@app.post("/api/tasks/tag-score-dedup")
|
||||
def task_tag_score_dedup(db: Session = Depends(get_db), _=Depends(verify_token)):
|
||||
def task_tag_score_dedup(_=Depends(verify_token)):
|
||||
def _run(session):
|
||||
tag_articles(session)
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
deduplicate_articles(session, date_str=today)
|
||||
score_articles(session, update_duplication=True)
|
||||
return None
|
||||
_run_task_locked(_run, db)
|
||||
return {"message": "分类/去重/打分任务完成"}
|
||||
if not _run_task_background("tag_score_dedup", "manual", _run):
|
||||
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
|
||||
return {"message": "分类/去重/打分任务已开始", "task_key": "tag_score_dedup"}
|
||||
|
||||
|
||||
@app.post("/api/tasks/brief")
|
||||
def task_brief(db: Session = Depends(get_db), _=Depends(verify_token)):
|
||||
def task_brief(_=Depends(verify_token)):
|
||||
def _run(session):
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
return generate_daily_brief(session, date_str=today, force=True)
|
||||
data = _run_task_locked(_run, db)
|
||||
return {"message": "简报生成任务完成", "data": data}
|
||||
generate_daily_brief(session, date_str=today, force=True)
|
||||
if not _run_task_background("generate_daily_brief", "manual", _run):
|
||||
raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试")
|
||||
return {"message": "简报生成任务已开始", "task_key": "generate_daily_brief"}
|
||||
|
||||
|
||||
@app.get("/api/tasks/progress")
|
||||
def get_task_progress(_=Depends(verify_token)):
|
||||
"""返回所有任务的实时进度(前端轮询)"""
|
||||
return task_progress.get_progress()
|
||||
|
||||
|
||||
@app.post("/api/tasks/progress/reset")
|
||||
def reset_task_progress(task_key: str = Query(...), _=Depends(verify_token)):
|
||||
"""重置指定任务的进度显示为 idle"""
|
||||
task_progress.reset_progress(task_key)
|
||||
return {"message": "已重置"}
|
||||
|
||||
|
||||
# ---------- 接口连通性测试 ----------
|
||||
|
||||
@app.post("/api/test-connection", response_model=ConnectionTestResponse)
|
||||
def test_connection(_=Depends(verify_token)):
|
||||
"""测试 rssKeeper 和 LLM API 连通性,返回状态和延迟"""
|
||||
import time
|
||||
|
||||
# rssKeeper 连通测试(使用短超时,避免长时间等待)
|
||||
rss_result = {"name": "rssKeeper", "status": "error", "latency_ms": None, "error": None}
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
# 临时用短超时的 client 测试
|
||||
test_client = RSSKeeperClient(base_url=settings.RSSKEEPER_BASE_URL, timeout=10)
|
||||
test_client._get("/api/v1/external/feeds", params={"limit": 1})
|
||||
rss_result = {
|
||||
"name": "rssKeeper",
|
||||
"status": "ok",
|
||||
"latency_ms": round((time.monotonic() - t0) * 1000, 1),
|
||||
"error": None,
|
||||
}
|
||||
except Exception as exc:
|
||||
rss_result["error"] = str(exc)[:200]
|
||||
|
||||
# LLM 连通测试(使用短超时 + 无重试)
|
||||
llm_result = {"name": "LLM", "status": "error", "latency_ms": None, "error": None}
|
||||
try:
|
||||
t0 = time.monotonic()
|
||||
test_ai = AIClient(timeout=10, max_retries=0)
|
||||
test_ai.chat_completion(
|
||||
system_prompt="You are a connectivity test.",
|
||||
user_prompt="Reply with exactly: ok",
|
||||
temperature=0.0,
|
||||
)
|
||||
llm_result = {
|
||||
"name": "LLM",
|
||||
"status": "ok",
|
||||
"latency_ms": round((time.monotonic() - t0) * 1000, 1),
|
||||
"error": None,
|
||||
}
|
||||
except Exception as exc:
|
||||
llm_result["error"] = str(exc)[:200]
|
||||
|
||||
return {"rss_keeper": rss_result, "llm": llm_result}
|
||||
|
||||
|
||||
# ---------- 配置管理接口 ----------
|
||||
@@ -408,7 +514,7 @@ def get_stats(db: Session = Depends(get_db)):
|
||||
}
|
||||
|
||||
|
||||
# ---------- 静态文件托管(生产环境) ----------
|
||||
# ---------- 静态文件托管(生产环境 SPA) ----------
|
||||
|
||||
static_dir = os.path.join(os.path.dirname(__file__), "static")
|
||||
if not os.path.isdir(static_dir):
|
||||
@@ -418,7 +524,31 @@ if not os.path.isdir(static_dir):
|
||||
static_dir = frontend_dist
|
||||
|
||||
if os.path.isdir(static_dir):
|
||||
app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")
|
||||
# 静态资源(JS/CSS/图片等)走 /assets 子路径挂载
|
||||
assets_dir = os.path.join(static_dir, "assets")
|
||||
if os.path.isdir(assets_dir):
|
||||
app.mount("/assets", StaticFiles(directory=assets_dir), name="assets")
|
||||
|
||||
# SPA favicon、vite.svg 等根级静态文件
|
||||
@app.get("/favicon.ico")
|
||||
@app.get("/vite.svg")
|
||||
async def serve_static_root(request):
|
||||
from starlette.requests import Request
|
||||
filename = os.path.basename(str(request.url.path))
|
||||
file_path = os.path.join(static_dir, filename)
|
||||
if os.path.isfile(file_path):
|
||||
return FileResponse(file_path)
|
||||
return Response(status_code=404)
|
||||
|
||||
# 所有未匹配的路由 → 返回 index.html(SPA 客户端路由)
|
||||
@app.get("/{full_path:path}")
|
||||
async def serve_spa(full_path: str):
|
||||
# 先尝试匹配静态文件
|
||||
file_path = os.path.join(static_dir, full_path)
|
||||
if full_path and os.path.isfile(file_path):
|
||||
return FileResponse(file_path)
|
||||
# 否则返回 index.html 让 Vue Router 处理
|
||||
return FileResponse(os.path.join(static_dir, "index.html"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user