Files
dataClean/models.py
T
2026-06-12 16:04:03 +08:00

110 lines
4.0 KiB
Python

"""SQLAlchemy 数据模型"""
from datetime import datetime, timezone
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, Float, ForeignKey, JSON
from sqlalchemy.orm import relationship
from database import Base
def _utc_now():
return datetime.now(timezone.utc)
class EnrichedArticle(Base):
"""加工后的文章,存储 AI 摘要、分类、标签、分数和去重信息"""
__tablename__ = "articles_enriched"
id = Column(Integer, primary_key=True, index=True)
rk_article_id = Column(Integer, unique=True, nullable=False, index=True)
title = Column(String(1024), default="", index=True)
link = Column(String(2048), default="", index=True)
feed_id = Column(Integer, nullable=False, index=True)
feed_title = Column(String(512), default="")
feed_category = Column(String(128), default="")
author = Column(String(256), default="")
published_at = Column(DateTime, nullable=True, index=True)
fetched_at = Column(DateTime, default=_utc_now, index=True)
original_summary = Column(Text, default="")
content = Column(Text, default="")
ai_summary = Column(Text, default="")
category = Column(String(128), default="", index=True)
tags = Column(JSON, default=lambda: list())
heat_score = Column(Float, default=0.0)
importance_score = Column(Float, default=0.0)
duplication_score = Column(Float, default=0.0)
composite_score = Column(Float, default=0.0)
duplicate_group_id = Column(Integer, ForeignKey("duplicate_groups.id", ondelete="SET NULL"), nullable=True, index=True)
is_representative = Column(Boolean, default=False, index=True)
brief_date = Column(String(10), default="", index=True)
created_at = Column(DateTime, default=_utc_now)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)
duplicate_group = relationship("DuplicateGroup", back_populates="articles")
class Taxonomy(Base):
"""分类、标签、打分规则表"""
__tablename__ = "taxonomy"
id = Column(Integer, primary_key=True, index=True)
name = Column(String(128), nullable=False, index=True)
kind = Column(String(32), nullable=False, index=True) # category/tag/heat_rule/importance_rule/duplication_rule
description = Column(Text, default="")
keywords = Column(JSON, default=list) # 关键词或规则列表
weight = Column(Float, default=1.0)
created_by_ai = Column(Boolean, default=False)
created_at = Column(DateTime, default=_utc_now)
class DuplicateGroup(Base):
"""重复文章组"""
__tablename__ = "duplicate_groups"
id = Column(Integer, primary_key=True, index=True)
representative_article_id = Column(Integer, ForeignKey("articles_enriched.id", ondelete="SET NULL"), nullable=True)
member_article_ids = Column(JSON, default=lambda: list())
similarity_matrix = Column(JSON, default=lambda: dict())
brief_date = Column(String(10), default="", index=True)
created_at = Column(DateTime, default=_utc_now)
articles = relationship("EnrichedArticle", back_populates="duplicate_group")
class DailyBrief(Base):
"""每日简报"""
__tablename__ = "daily_briefs"
id = Column(Integer, primary_key=True, index=True)
brief_date = Column(String(10), unique=True, nullable=False, index=True)
total_articles = Column(Integer, default=0)
unique_articles = Column(Integer, default=0)
by_category = Column(JSON, default=lambda: dict())
markdown_path = Column(String(512), default="")
created_at = Column(DateTime, default=_utc_now)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)
class AppSetting(Base):
"""运行时配置表"""
__tablename__ = "app_settings"
id = Column(Integer, primary_key=True, index=True)
key = Column(String(128), unique=True, nullable=False, index=True)
value = Column(Text, default="")
description = Column(Text, default="")
is_sensitive = Column(Boolean, default=False)
updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now)