Files
dataClean/models.py
T

110 lines
4.1 KiB
Python
Raw Normal View History

2026-06-12 16:04:03 +08:00
"""SQLAlchemy 数据模型"""
from datetime import datetime, timezone
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, Float, ForeignKey, JSON
from sqlalchemy.orm import relationship
from database import Base
def _utc_now():
return datetime.now(timezone.utc)
class EnrichedArticle(Base):
"""加工后的文章,存储 AI 摘要、分类、标签、分数和去重信息"""
__tablename__ = "articles_enriched"
id = Column(Integer, primary_key=True, index=True)
rk_article_id = Column(Integer, unique=True, nullable=False, index=True)
title = Column(String(1024), default="", index=True)
link = Column(String(2048), default="", index=True)
feed_id = Column(Integer, nullable=False, index=True)
feed_title = Column(String(512), default="")
feed_category = Column(String(128), default="")
author = Column(String(256), default="")
published_at = Column(DateTime, nullable=True, index=True)
fetched_at = Column(DateTime, default=_utc_now, index=True)
original_summary = Column(Text, default="")
content = Column(Text, default="")
ai_summary = Column(Text, default="")
category = Column(String(128), default="", index=True)
tags = Column(JSON, default=lambda: list())
heat_score = Column(Float, default=0.0)
importance_score = Column(Float, default=0.0)
duplication_score = Column(Float, default=0.0)
composite_score = Column(Float, default=0.0)
duplicate_group_id = Column(Integer, ForeignKey("duplicate_groups.id", ondelete="SET NULL"), nullable=True, index=True)
is_representative = Column(Boolean, default=False, index=True)
brief_date = Column(String(10), default="", index=True)
created_at = Column(DateTime, default=_utc_now)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)
duplicate_group = relationship("DuplicateGroup", back_populates="articles", foreign_keys=[duplicate_group_id])
2026-06-12 16:04:03 +08:00
class Taxonomy(Base):
"""分类、标签、打分规则表"""
__tablename__ = "taxonomy"
id = Column(Integer, primary_key=True, index=True)
name = Column(String(128), nullable=False, index=True)
kind = Column(String(32), nullable=False, index=True) # category/tag/heat_rule/importance_rule/duplication_rule
description = Column(Text, default="")
keywords = Column(JSON, default=list) # 关键词或规则列表
weight = Column(Float, default=1.0)
created_by_ai = Column(Boolean, default=False)
created_at = Column(DateTime, default=_utc_now)
class DuplicateGroup(Base):
"""重复文章组"""
__tablename__ = "duplicate_groups"
id = Column(Integer, primary_key=True, index=True)
representative_article_id = Column(Integer, ForeignKey("articles_enriched.id", ondelete="SET NULL"), nullable=True)
member_article_ids = Column(JSON, default=lambda: list())
similarity_matrix = Column(JSON, default=lambda: dict())
brief_date = Column(String(10), default="", index=True)
created_at = Column(DateTime, default=_utc_now)
articles = relationship("EnrichedArticle", back_populates="duplicate_group", foreign_keys="EnrichedArticle.duplicate_group_id")
2026-06-12 16:04:03 +08:00
class DailyBrief(Base):
"""每日简报"""
__tablename__ = "daily_briefs"
id = Column(Integer, primary_key=True, index=True)
brief_date = Column(String(10), unique=True, nullable=False, index=True)
total_articles = Column(Integer, default=0)
unique_articles = Column(Integer, default=0)
by_category = Column(JSON, default=lambda: dict())
markdown_path = Column(String(512), default="")
created_at = Column(DateTime, default=_utc_now)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)
class AppSetting(Base):
"""运行时配置表"""
__tablename__ = "app_settings"
id = Column(Integer, primary_key=True, index=True)
key = Column(String(128), unique=True, nullable=False, index=True)
value = Column(Text, default="")
description = Column(Text, default="")
is_sensitive = Column(Boolean, default=False)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)