Files
congsh 778ccefb22 feat: 任务进度实时展示、接口测试、暗色主题重构及多项 bug 修复
后端
- 新增 app/task_progress.py 线程安全进度注册表
- 任务改为后台线程异步执行(_run_task_background),手动触发立即返回 task_key
- 6 个任务函数(summarizer/tagger/scorer/deduplicator/brief/taxonomy)循环内上报进度
- scheduler 定时任务同步上报进度(trigger=scheduled)
- 新增 GET /api/tasks/progress 与 POST /api/tasks/progress/reset 接口
- 新增 POST /api/test-connection 接口连通性测试(独立短超时客户端)
- 修复 ai_client/rss_client 配置在 import 时固化的 bug(改为 property 运行时读取 settings),
  导致实际任务用 .env 假 key 调 LLM 401
- 修复 ai_client 对 reasoning 模型(MiniMax-M3 等)输出 <think> 块的 JSON 解析失败
- 修复 taxonomy bootstrap:LLM 超时(改用 300s 专用 client)、MiniMax 输出审查
  (精简样本仅标题 + 约束生成中性类目名)、失败误报 success(改抛异常如实标记)
- 修复 models.py 双外键关系映射启动崩溃(显式 foreign_keys)
- 修复 main.py SPA 路由 404、ArticleOut.published_at 序列化 500
- 移除 lifespan 同步 bootstrap 阻塞启动,改由 scheduler 后台异步执行

前端
- Deep Ink 高对比度暗色主题重构,修复 Element Plus 暗色模式对比度问题
- Tasks 页面任务进度实时展示(进度条/阶段/计数/状态/触发来源)+ 1.5s 轮询
- 接口测试面板(rssKeeper / LLM 连通性 + 延迟)
- 修复 nextJobs jobId 映射 bug

部署与文档
- Dockerfile 优化(BuildKit 缓存挂载、预编译 wheel、去 gcc、阿里云镜像源)
- 新增 API.md 接口文档

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-14 15:14:40 +08:00

176 lines
6.4 KiB
Python

"""每日简报生成"""
import json
import logging
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Dict, Any, List
from sqlalchemy.orm import Session
from config import settings
from models import EnrichedArticle, DailyBrief
from app.task_progress import update_progress
logger = logging.getLogger(__name__)
def _format_article(article: EnrichedArticle) -> Dict[str, Any]:
"""把文章格式化为简报中的条目"""
return {
"id": article.id,
"rk_article_id": article.rk_article_id,
"title": article.title or "",
"link": article.link or "",
"author": article.author or "",
"feed_title": article.feed_title or "",
"summary": article.ai_summary or article.original_summary or "",
"tags": article.tags or [],
"heat_score": article.heat_score,
"importance_score": article.importance_score,
"duplication_score": article.duplication_score,
"composite_score": article.composite_score,
"published_at": article.published_at.isoformat() if article.published_at else None,
}
def _build_markdown(date_str: str, by_category: Dict[str, List[Dict[str, Any]]], stats: Dict[str, int]) -> str:
"""生成 Markdown 简报"""
lines = [
f"# RSS 每日简报 ({date_str})",
"",
f"- 去重前文章数: {stats['total_articles']}",
f"- 去重后文章数: {stats['unique_articles']}",
f"- 生成分类数: {len(by_category)}",
"",
"---",
"",
]
for category, items in sorted(by_category.items(), key=lambda x: x[0]):
lines.append(f"## {category}")
lines.append("")
for item in items:
tags = " ".join([f"`{t}`" for t in item["tags"]]) if item["tags"] else ""
lines.append(f"### {item['title']}")
lines.append(f"- 来源: {item['feed_title']} | 作者: {item.get('author') or '未知'}")
lines.append(f"- 标签: {tags}")
lines.append(f"- 热度: {item['heat_score']:.1f} | 重要性: {item['importance_score']:.1f} | 重复度: {item['duplication_score']:.1f} | 综合: {item['composite_score']:.1f}")
if item["summary"]:
lines.append(f"- 摘要: {item['summary']}")
if item["link"]:
lines.append(f"- [阅读原文]({item['link']})")
lines.append("")
return "\n".join(lines)
def generate_daily_brief(db: Session, date_str: str = None, force: bool = False) -> Dict[str, Any]:
"""
生成指定日期的每日简报。
若 date_str 为空则处理今天。
返回简报数据字典。
"""
if date_str is None:
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
# 检查是否已存在
existing = db.query(DailyBrief).filter(DailyBrief.brief_date == date_str).first()
if existing and not force:
logger.info("日期 %s 简报已存在,跳过生成", date_str)
update_progress("generate_daily_brief", status="running", stage="简报已存在", current=0, total=0, message="简报已存在,跳过生成")
return {
"date": date_str,
"total_articles": existing.total_articles,
"unique_articles": existing.unique_articles,
"markdown_path": existing.markdown_path,
}
day_start = datetime.strptime(date_str, "%Y-%m-%d")
day_end = day_start + timedelta(days=1)
update_progress("generate_daily_brief", status="running", stage="加载文章", current=0, total=0)
# 取当天去重后的代表文章
query = (
db.query(EnrichedArticle)
.filter(
EnrichedArticle.fetched_at >= day_start,
EnrichedArticle.fetched_at < day_end,
)
)
# 默认只取代表文章或未归入重复组的文章
representative_articles = (
query.filter(
(EnrichedArticle.is_representative == True)
| (EnrichedArticle.duplicate_group_id == None)
)
.order_by(EnrichedArticle.composite_score.desc())
.all()
)
# 按分类分组并排序
update_progress("generate_daily_brief", status="running", stage="按分类整理", current=0, total=0)
by_category: Dict[str, List[Dict[str, Any]]] = {}
for art in representative_articles:
cat = art.category or "未分类"
if cat not in by_category:
by_category[cat] = []
by_category[cat].append(_format_article(art))
# 每个分类只保留 TOP N
top_n = settings.BRIEF_TOP_N_PER_CATEGORY
for cat in by_category:
by_category[cat] = by_category[cat][:top_n]
total_before_dedup = query.count()
unique_count = sum(len(items) for items in by_category.values())
stats = {
"total_articles": total_before_dedup,
"unique_articles": unique_count,
}
# 生成 Markdown 文件
update_progress("generate_daily_brief", status="running", stage="生成 Markdown", current=0, total=0)
output_dir = settings.brief_output_dir_path / date_str
output_dir.mkdir(parents=True, exist_ok=True)
markdown_path = output_dir / "daily-brief.md"
markdown_content = _build_markdown(date_str, by_category, stats)
markdown_path.write_text(markdown_content, encoding="utf-8")
# 更新文章 brief_date
update_progress("generate_daily_brief", status="running", stage="保存简报", current=0, total=0)
for art in representative_articles:
art.brief_date = date_str
# 保存到数据库
brief_data = {
"date": date_str,
"total_articles": stats["total_articles"],
"unique_articles": stats["unique_articles"],
"by_category": by_category,
"markdown_path": str(markdown_path),
}
if existing:
existing.total_articles = stats["total_articles"]
existing.unique_articles = stats["unique_articles"]
existing.by_category = by_category
existing.markdown_path = str(markdown_path)
existing.updated_at = datetime.now(timezone.utc)
else:
db.add(
DailyBrief(
brief_date=date_str,
total_articles=stats["total_articles"],
unique_articles=stats["unique_articles"],
by_category=by_category,
markdown_path=str(markdown_path),
)
)
db.commit()
logger.info("简报生成完成: 日期=%s, 去重前=%d, 去重后=%d", date_str, stats["total_articles"], stats["unique_articles"])
return brief_data