82 lines
4.2 KiB
Python
82 lines
4.2 KiB
Python
|
|
"""Article models: raw and cleaned."""
|
||
|
|
from datetime import datetime
|
||
|
|
|
||
|
|
from sqlalchemy import Boolean, DateTime, Float, ForeignKey, Integer, JSON, String, Text
|
||
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||
|
|
|
||
|
|
from app.models.base import Base, TimestampMixin, UUIDMixin
|
||
|
|
|
||
|
|
|
||
|
|
class RawArticle(Base, UUIDMixin, TimestampMixin):
|
||
|
|
"""Raw article fetched from RSS feed."""
|
||
|
|
|
||
|
|
__tablename__ = "raw_articles"
|
||
|
|
|
||
|
|
feed_id: Mapped[str] = mapped_column(
|
||
|
|
ForeignKey("feeds.id", ondelete="CASCADE"), nullable=False, index=True
|
||
|
|
)
|
||
|
|
external_id: Mapped[str | None] = mapped_column(String(255), nullable=True, index=True)
|
||
|
|
title: Mapped[str | None] = mapped_column(String(1024), default="", index=True)
|
||
|
|
link: Mapped[str] = mapped_column(String(2048), nullable=False, index=True)
|
||
|
|
author: Mapped[str | None] = mapped_column(String(256), default="")
|
||
|
|
published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True, index=True)
|
||
|
|
fetched_at: Mapped[datetime] = mapped_column(
|
||
|
|
DateTime(timezone=True), nullable=False, index=True
|
||
|
|
)
|
||
|
|
content: Mapped[str | None] = mapped_column(Text, default="")
|
||
|
|
summary: Mapped[str | None] = mapped_column(Text, default="")
|
||
|
|
raw_html: Mapped[str | None] = mapped_column(Text, default="")
|
||
|
|
content_hash: Mapped[str | None] = mapped_column(String(64), default="")
|
||
|
|
language: Mapped[str | None] = mapped_column(String(16), default="")
|
||
|
|
status: Mapped[str] = mapped_column(String(32), default="pending", nullable=False, index=True)
|
||
|
|
|
||
|
|
feed: Mapped["Feed"] = relationship("Feed", back_populates="raw_articles")
|
||
|
|
cleaned_article: Mapped["CleanedArticle | None"] = relationship(
|
||
|
|
"CleanedArticle", back_populates="raw_article", uselist=False
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class CleanedArticle(Base, UUIDMixin, TimestampMixin):
|
||
|
|
"""Cleaned and AI-enriched article."""
|
||
|
|
|
||
|
|
__tablename__ = "cleaned_articles"
|
||
|
|
|
||
|
|
raw_article_id: Mapped[str | None] = mapped_column(
|
||
|
|
ForeignKey("raw_articles.id", ondelete="SET NULL"), nullable=True, index=True
|
||
|
|
)
|
||
|
|
feed_id: Mapped[str] = mapped_column(
|
||
|
|
ForeignKey("feeds.id", ondelete="CASCADE"), nullable=False, index=True
|
||
|
|
)
|
||
|
|
|
||
|
|
title: Mapped[str | None] = mapped_column(String(1024), default="", index=True)
|
||
|
|
link: Mapped[str] = mapped_column(String(2048), default="", index=True)
|
||
|
|
author: Mapped[str | None] = mapped_column(String(256), default="")
|
||
|
|
feed_title: Mapped[str | None] = mapped_column(String(512), default="")
|
||
|
|
feed_category: Mapped[str | None] = mapped_column(String(128), default="")
|
||
|
|
|
||
|
|
published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True, index=True)
|
||
|
|
fetched_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False, index=True)
|
||
|
|
|
||
|
|
content: Mapped[str | None] = mapped_column(Text, default="")
|
||
|
|
content_length: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
|
||
|
|
original_summary: Mapped[str | None] = mapped_column(Text, default="")
|
||
|
|
ai_summary: Mapped[str | None] = mapped_column(Text, default="")
|
||
|
|
|
||
|
|
category: Mapped[str | None] = mapped_column(String(128), default="", index=True)
|
||
|
|
tags: Mapped[list] = mapped_column(JSON, default=list, nullable=False)
|
||
|
|
|
||
|
|
heat_score: Mapped[float] = mapped_column(Float, default=0.0, nullable=False)
|
||
|
|
importance_score: Mapped[float] = mapped_column(Float, default=0.0, nullable=False)
|
||
|
|
duplication_score: Mapped[float] = mapped_column(Float, default=0.0, nullable=False)
|
||
|
|
composite_score: Mapped[float] = mapped_column(Float, default=0.0, nullable=False)
|
||
|
|
|
||
|
|
duplicate_group_id: Mapped[str | None] = mapped_column(
|
||
|
|
ForeignKey("duplicate_groups.id", ondelete="SET NULL"), nullable=True, index=True
|
||
|
|
)
|
||
|
|
is_representative: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False, index=True)
|
||
|
|
reference_links: Mapped[list] = mapped_column(JSON, default=list, nullable=False)
|
||
|
|
processing_status: Mapped[str] = mapped_column(String(32), default="pending", nullable=False, index=True)
|
||
|
|
|
||
|
|
raw_article: Mapped["RawArticle | None"] = relationship("RawArticle", back_populates="cleaned_article")
|
||
|
|
duplicate_group: Mapped["DuplicateGroup | None"] = relationship("DuplicateGroup", back_populates="articles")
|