Files
dataClean/models.py
T
congsh 778ccefb22 feat: 任务进度实时展示、接口测试、暗色主题重构及多项 bug 修复
后端
- 新增 app/task_progress.py 线程安全进度注册表
- 任务改为后台线程异步执行(_run_task_background),手动触发立即返回 task_key
- 6 个任务函数(summarizer/tagger/scorer/deduplicator/brief/taxonomy)循环内上报进度
- scheduler 定时任务同步上报进度(trigger=scheduled)
- 新增 GET /api/tasks/progress 与 POST /api/tasks/progress/reset 接口
- 新增 POST /api/test-connection 接口连通性测试(独立短超时客户端)
- 修复 ai_client/rss_client 配置在 import 时固化的 bug(改为 property 运行时读取 settings),
  导致实际任务用 .env 假 key 调 LLM 401
- 修复 ai_client 对 reasoning 模型(MiniMax-M3 等)输出 <think> 块的 JSON 解析失败
- 修复 taxonomy bootstrap:LLM 超时(改用 300s 专用 client)、MiniMax 输出审查
  (精简样本仅标题 + 约束生成中性类目名)、失败误报 success(改抛异常如实标记)
- 修复 models.py 双外键关系映射启动崩溃(显式 foreign_keys)
- 修复 main.py SPA 路由 404、ArticleOut.published_at 序列化 500
- 移除 lifespan 同步 bootstrap 阻塞启动,改由 scheduler 后台异步执行

前端
- Deep Ink 高对比度暗色主题重构,修复 Element Plus 暗色模式对比度问题
- Tasks 页面任务进度实时展示(进度条/阶段/计数/状态/触发来源)+ 1.5s 轮询
- 接口测试面板(rssKeeper / LLM 连通性 + 延迟)
- 修复 nextJobs jobId 映射 bug

部署与文档
- Dockerfile 优化(BuildKit 缓存挂载、预编译 wheel、去 gcc、阿里云镜像源)
- 新增 API.md 接口文档

Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-14 15:14:40 +08:00

110 lines
4.1 KiB
Python

"""SQLAlchemy 数据模型"""
from datetime import datetime, timezone
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, Float, ForeignKey, JSON
from sqlalchemy.orm import relationship
from database import Base
def _utc_now():
return datetime.now(timezone.utc)
class EnrichedArticle(Base):
"""加工后的文章,存储 AI 摘要、分类、标签、分数和去重信息"""
__tablename__ = "articles_enriched"
id = Column(Integer, primary_key=True, index=True)
rk_article_id = Column(Integer, unique=True, nullable=False, index=True)
title = Column(String(1024), default="", index=True)
link = Column(String(2048), default="", index=True)
feed_id = Column(Integer, nullable=False, index=True)
feed_title = Column(String(512), default="")
feed_category = Column(String(128), default="")
author = Column(String(256), default="")
published_at = Column(DateTime, nullable=True, index=True)
fetched_at = Column(DateTime, default=_utc_now, index=True)
original_summary = Column(Text, default="")
content = Column(Text, default="")
ai_summary = Column(Text, default="")
category = Column(String(128), default="", index=True)
tags = Column(JSON, default=lambda: list())
heat_score = Column(Float, default=0.0)
importance_score = Column(Float, default=0.0)
duplication_score = Column(Float, default=0.0)
composite_score = Column(Float, default=0.0)
duplicate_group_id = Column(Integer, ForeignKey("duplicate_groups.id", ondelete="SET NULL"), nullable=True, index=True)
is_representative = Column(Boolean, default=False, index=True)
brief_date = Column(String(10), default="", index=True)
created_at = Column(DateTime, default=_utc_now)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)
duplicate_group = relationship("DuplicateGroup", back_populates="articles", foreign_keys=[duplicate_group_id])
class Taxonomy(Base):
"""分类、标签、打分规则表"""
__tablename__ = "taxonomy"
id = Column(Integer, primary_key=True, index=True)
name = Column(String(128), nullable=False, index=True)
kind = Column(String(32), nullable=False, index=True) # category/tag/heat_rule/importance_rule/duplication_rule
description = Column(Text, default="")
keywords = Column(JSON, default=list) # 关键词或规则列表
weight = Column(Float, default=1.0)
created_by_ai = Column(Boolean, default=False)
created_at = Column(DateTime, default=_utc_now)
class DuplicateGroup(Base):
"""重复文章组"""
__tablename__ = "duplicate_groups"
id = Column(Integer, primary_key=True, index=True)
representative_article_id = Column(Integer, ForeignKey("articles_enriched.id", ondelete="SET NULL"), nullable=True)
member_article_ids = Column(JSON, default=lambda: list())
similarity_matrix = Column(JSON, default=lambda: dict())
brief_date = Column(String(10), default="", index=True)
created_at = Column(DateTime, default=_utc_now)
articles = relationship("EnrichedArticle", back_populates="duplicate_group", foreign_keys="EnrichedArticle.duplicate_group_id")
class DailyBrief(Base):
"""每日简报"""
__tablename__ = "daily_briefs"
id = Column(Integer, primary_key=True, index=True)
brief_date = Column(String(10), unique=True, nullable=False, index=True)
total_articles = Column(Integer, default=0)
unique_articles = Column(Integer, default=0)
by_category = Column(JSON, default=lambda: dict())
markdown_path = Column(String(512), default="")
created_at = Column(DateTime, default=_utc_now)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)
class AppSetting(Base):
"""运行时配置表"""
__tablename__ = "app_settings"
id = Column(Integer, primary_key=True, index=True)
key = Column(String(128), unique=True, nullable=False, index=True)
value = Column(Text, default="")
description = Column(Text, default="")
is_sensitive = Column(Boolean, default=False)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)