"""Article models: raw and cleaned.""" from datetime import datetime from sqlalchemy import Boolean, DateTime, Float, ForeignKey, Integer, JSON, String, Text from sqlalchemy.orm import Mapped, mapped_column, relationship from app.models.base import Base, TimestampMixin, UUIDMixin class RawArticle(Base, UUIDMixin, TimestampMixin): """Raw article fetched from RSS feed.""" __tablename__ = "raw_articles" feed_id: Mapped[str] = mapped_column( ForeignKey("feeds.id", ondelete="CASCADE"), nullable=False, index=True ) external_id: Mapped[str | None] = mapped_column(String(255), nullable=True, index=True) title: Mapped[str | None] = mapped_column(String(1024), default="", index=True) link: Mapped[str] = mapped_column(String(2048), nullable=False, index=True) author: Mapped[str | None] = mapped_column(String(256), default="") published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True, index=True) fetched_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), nullable=False, index=True ) content: Mapped[str | None] = mapped_column(Text, default="") summary: Mapped[str | None] = mapped_column(Text, default="") raw_html: Mapped[str | None] = mapped_column(Text, default="") content_hash: Mapped[str | None] = mapped_column(String(64), default="") language: Mapped[str | None] = mapped_column(String(16), default="") status: Mapped[str] = mapped_column(String(32), default="pending", nullable=False, index=True) feed: Mapped["Feed"] = relationship("Feed", back_populates="raw_articles") cleaned_article: Mapped["CleanedArticle | None"] = relationship( "CleanedArticle", back_populates="raw_article", uselist=False ) class CleanedArticle(Base, UUIDMixin, TimestampMixin): """Cleaned and AI-enriched article.""" __tablename__ = "cleaned_articles" raw_article_id: Mapped[str | None] = mapped_column( ForeignKey("raw_articles.id", ondelete="SET NULL"), nullable=True, index=True ) feed_id: Mapped[str] = mapped_column( ForeignKey("feeds.id", ondelete="CASCADE"), nullable=False, index=True ) title: Mapped[str | None] = mapped_column(String(1024), default="", index=True) link: Mapped[str] = mapped_column(String(2048), default="", index=True) author: Mapped[str | None] = mapped_column(String(256), default="") feed_title: Mapped[str | None] = mapped_column(String(512), default="") feed_category: Mapped[str | None] = mapped_column(String(128), default="") published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True, index=True) fetched_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, index=True) content: Mapped[str | None] = mapped_column(Text, default="") content_length: Mapped[int] = mapped_column(Integer, default=0, nullable=False) original_summary: Mapped[str | None] = mapped_column(Text, default="") ai_summary: Mapped[str | None] = mapped_column(Text, default="") category: Mapped[str | None] = mapped_column(String(128), default="", index=True) tags: Mapped[list] = mapped_column(JSON, default=list, nullable=False) heat_score: Mapped[float] = mapped_column(Float, default=0.0, nullable=False) importance_score: Mapped[float] = mapped_column(Float, default=0.0, nullable=False) duplication_score: Mapped[float] = mapped_column(Float, default=0.0, nullable=False) composite_score: Mapped[float] = mapped_column(Float, default=0.0, nullable=False) duplicate_group_id: Mapped[str | None] = mapped_column( ForeignKey("duplicate_groups.id", ondelete="SET NULL"), nullable=True, index=True ) is_representative: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False, index=True) reference_links: Mapped[list] = mapped_column(JSON, default=list, nullable=False) processing_status: Mapped[str] = mapped_column(String(32), default="pending", nullable=False, index=True) raw_article: Mapped["RawArticle | None"] = relationship("RawArticle", back_populates="cleaned_article") duplicate_group: Mapped["DuplicateGroup | None"] = relationship("DuplicateGroup", back_populates="articles")