Files
daily-news-app/app/models/news.py
donald db0f0bbfe7 Initial commit: Daily News App
企業內部新聞彙整與分析系統
- 自動新聞抓取 (Digitimes, 經濟日報, 工商時報)
- AI 智慧摘要 (OpenAI/Claude/Ollama)
- 群組管理與訂閱通知
- 已清理 Python 快取檔案

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-03 23:53:24 +08:00

101 lines
4.7 KiB
Python

"""
新聞來源與文章資料模型
"""
from datetime import datetime
from sqlalchemy import String, Boolean, ForeignKey, Text, JSON, Enum as SQLEnum, UniqueConstraint, Index
from sqlalchemy.orm import Mapped, mapped_column, relationship
from typing import Optional, List
import enum
from app.db.session import Base
class SourceType(str, enum.Enum):
"""來源類型"""
SUBSCRIPTION = "subscription"
PUBLIC = "public"
class CrawlStatus(str, enum.Enum):
"""抓取任務狀態"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
class NewsSource(Base):
"""新聞來源表"""
__tablename__ = "news_sources"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
code: Mapped[str] = mapped_column(String(30), unique=True, nullable=False, comment="來源代碼")
name: Mapped[str] = mapped_column(String(100), nullable=False, comment="來源名稱")
base_url: Mapped[str] = mapped_column(String(255), nullable=False, comment="網站基礎URL")
source_type: Mapped[SourceType] = mapped_column(SQLEnum(SourceType), nullable=False, comment="來源類型")
login_username: Mapped[Optional[str]] = mapped_column(String(100), comment="登入帳號")
login_password_encrypted: Mapped[Optional[str]] = mapped_column(String(255), comment="加密後密碼")
is_active: Mapped[bool] = mapped_column(Boolean, default=True, comment="是否啟用")
crawl_config: Mapped[Optional[dict]] = mapped_column(JSON, comment="爬蟲設定")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
# 關聯
articles: Mapped[List["NewsArticle"]] = relationship(back_populates="source")
crawl_jobs: Mapped[List["CrawlJob"]] = relationship(back_populates="source")
class NewsArticle(Base):
"""新聞文章表"""
__tablename__ = "news_articles"
__table_args__ = (
UniqueConstraint("source_id", "external_id", name="uk_source_external"),
Index("idx_articles_published", "published_at"),
Index("idx_articles_crawled", "crawled_at"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
source_id: Mapped[int] = mapped_column(ForeignKey("news_sources.id"), nullable=False, comment="來源ID")
external_id: Mapped[Optional[str]] = mapped_column(String(100), comment="外部文章ID")
title: Mapped[str] = mapped_column(String(500), nullable=False, comment="文章標題")
content: Mapped[Optional[str]] = mapped_column(Text, comment="文章全文")
summary: Mapped[Optional[str]] = mapped_column(Text, comment="原文摘要")
url: Mapped[str] = mapped_column(String(500), nullable=False, comment="原文連結")
author: Mapped[Optional[str]] = mapped_column(String(100), comment="作者")
published_at: Mapped[Optional[datetime]] = mapped_column(comment="發布時間")
crawled_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, comment="抓取時間")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
source: Mapped["NewsSource"] = relationship(back_populates="articles")
group_matches: Mapped[List["ArticleGroupMatch"]] = relationship(back_populates="article", cascade="all, delete-orphan")
report_articles: Mapped[List["ReportArticle"]] = relationship(back_populates="article")
class CrawlJob(Base):
"""抓取任務記錄表"""
__tablename__ = "crawl_jobs"
__table_args__ = (
Index("idx_crawl_jobs_status", "status"),
Index("idx_crawl_jobs_scheduled", "scheduled_at"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
source_id: Mapped[int] = mapped_column(ForeignKey("news_sources.id"), nullable=False, comment="來源ID")
status: Mapped[CrawlStatus] = mapped_column(SQLEnum(CrawlStatus), default=CrawlStatus.PENDING)
scheduled_at: Mapped[datetime] = mapped_column(nullable=False, comment="排程時間")
started_at: Mapped[Optional[datetime]] = mapped_column(comment="開始時間")
completed_at: Mapped[Optional[datetime]] = mapped_column(comment="完成時間")
articles_count: Mapped[int] = mapped_column(default=0, comment="抓取文章數")
error_message: Mapped[Optional[str]] = mapped_column(Text, comment="錯誤訊息")
retry_count: Mapped[int] = mapped_column(default=0, comment="重試次數")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
source: Mapped["NewsSource"] = relationship(back_populates="crawl_jobs")
# 避免循環引入
from app.models.group import ArticleGroupMatch
from app.models.report import ReportArticle