""" 新聞來源與文章資料模型 """ from datetime import datetime from sqlalchemy import String, Boolean, ForeignKey, Text, JSON, Enum as SQLEnum, UniqueConstraint, Index from sqlalchemy.orm import Mapped, mapped_column, relationship from typing import Optional, List import enum from app.db.session import Base class SourceType(str, enum.Enum): """來源類型""" SUBSCRIPTION = "subscription" PUBLIC = "public" class CrawlStatus(str, enum.Enum): """抓取任務狀態""" PENDING = "pending" RUNNING = "running" COMPLETED = "completed" FAILED = "failed" class NewsSource(Base): """新聞來源表""" __tablename__ = "news_sources" id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) code: Mapped[str] = mapped_column(String(30), unique=True, nullable=False, comment="來源代碼") name: Mapped[str] = mapped_column(String(100), nullable=False, comment="來源名稱") base_url: Mapped[str] = mapped_column(String(255), nullable=False, comment="網站基礎URL") source_type: Mapped[SourceType] = mapped_column(SQLEnum(SourceType), nullable=False, comment="來源類型") login_username: Mapped[Optional[str]] = mapped_column(String(100), comment="登入帳號") login_password_encrypted: Mapped[Optional[str]] = mapped_column(String(255), comment="加密後密碼") is_active: Mapped[bool] = mapped_column(Boolean, default=True, comment="是否啟用") crawl_config: Mapped[Optional[dict]] = mapped_column(JSON, comment="爬蟲設定") created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow) updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow) # 關聯 articles: Mapped[List["NewsArticle"]] = relationship(back_populates="source") crawl_jobs: Mapped[List["CrawlJob"]] = relationship(back_populates="source") class NewsArticle(Base): """新聞文章表""" __tablename__ = "news_articles" __table_args__ = ( UniqueConstraint("source_id", "external_id", name="uk_source_external"), Index("idx_articles_published", "published_at"), Index("idx_articles_crawled", "crawled_at"), ) id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) source_id: Mapped[int] = mapped_column(ForeignKey("news_sources.id"), nullable=False, comment="來源ID") external_id: Mapped[Optional[str]] = mapped_column(String(100), comment="外部文章ID") title: Mapped[str] = mapped_column(String(500), nullable=False, comment="文章標題") content: Mapped[Optional[str]] = mapped_column(Text, comment="文章全文") summary: Mapped[Optional[str]] = mapped_column(Text, comment="原文摘要") url: Mapped[str] = mapped_column(String(500), nullable=False, comment="原文連結") author: Mapped[Optional[str]] = mapped_column(String(100), comment="作者") published_at: Mapped[Optional[datetime]] = mapped_column(comment="發布時間") crawled_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, comment="抓取時間") created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow) # 關聯 source: Mapped["NewsSource"] = relationship(back_populates="articles") group_matches: Mapped[List["ArticleGroupMatch"]] = relationship(back_populates="article", cascade="all, delete-orphan") report_articles: Mapped[List["ReportArticle"]] = relationship(back_populates="article") class CrawlJob(Base): """抓取任務記錄表""" __tablename__ = "crawl_jobs" __table_args__ = ( Index("idx_crawl_jobs_status", "status"), Index("idx_crawl_jobs_scheduled", "scheduled_at"), ) id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) source_id: Mapped[int] = mapped_column(ForeignKey("news_sources.id"), nullable=False, comment="來源ID") status: Mapped[CrawlStatus] = mapped_column(SQLEnum(CrawlStatus), default=CrawlStatus.PENDING) scheduled_at: Mapped[datetime] = mapped_column(nullable=False, comment="排程時間") started_at: Mapped[Optional[datetime]] = mapped_column(comment="開始時間") completed_at: Mapped[Optional[datetime]] = mapped_column(comment="完成時間") articles_count: Mapped[int] = mapped_column(default=0, comment="抓取文章數") error_message: Mapped[Optional[str]] = mapped_column(Text, comment="錯誤訊息") retry_count: Mapped[int] = mapped_column(default=0, comment="重試次數") created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow) # 關聯 source: Mapped["NewsSource"] = relationship(back_populates="crawl_jobs") # 避免循環引入 from app.models.group import ArticleGroupMatch from app.models.report import ReportArticle