Initial commit: Daily News App
企業內部新聞彙整與分析系統 - 自動新聞抓取 (Digitimes, 經濟日報, 工商時報) - AI 智慧摘要 (OpenAI/Claude/Ollama) - 群組管理與訂閱通知 - 已清理 Python 快取檔案 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
100
app/models/news.py
Normal file
100
app/models/news.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
新聞來源與文章資料模型
|
||||
"""
|
||||
from datetime import datetime
|
||||
from sqlalchemy import String, Boolean, ForeignKey, Text, JSON, Enum as SQLEnum, UniqueConstraint, Index
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
from typing import Optional, List
|
||||
import enum
|
||||
|
||||
from app.db.session import Base
|
||||
|
||||
|
||||
class SourceType(str, enum.Enum):
|
||||
"""來源類型"""
|
||||
SUBSCRIPTION = "subscription"
|
||||
PUBLIC = "public"
|
||||
|
||||
|
||||
class CrawlStatus(str, enum.Enum):
|
||||
"""抓取任務狀態"""
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class NewsSource(Base):
|
||||
"""新聞來源表"""
|
||||
__tablename__ = "news_sources"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
code: Mapped[str] = mapped_column(String(30), unique=True, nullable=False, comment="來源代碼")
|
||||
name: Mapped[str] = mapped_column(String(100), nullable=False, comment="來源名稱")
|
||||
base_url: Mapped[str] = mapped_column(String(255), nullable=False, comment="網站基礎URL")
|
||||
source_type: Mapped[SourceType] = mapped_column(SQLEnum(SourceType), nullable=False, comment="來源類型")
|
||||
login_username: Mapped[Optional[str]] = mapped_column(String(100), comment="登入帳號")
|
||||
login_password_encrypted: Mapped[Optional[str]] = mapped_column(String(255), comment="加密後密碼")
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, default=True, comment="是否啟用")
|
||||
crawl_config: Mapped[Optional[dict]] = mapped_column(JSON, comment="爬蟲設定")
|
||||
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
|
||||
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
||||
# 關聯
|
||||
articles: Mapped[List["NewsArticle"]] = relationship(back_populates="source")
|
||||
crawl_jobs: Mapped[List["CrawlJob"]] = relationship(back_populates="source")
|
||||
|
||||
|
||||
class NewsArticle(Base):
|
||||
"""新聞文章表"""
|
||||
__tablename__ = "news_articles"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("source_id", "external_id", name="uk_source_external"),
|
||||
Index("idx_articles_published", "published_at"),
|
||||
Index("idx_articles_crawled", "crawled_at"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
source_id: Mapped[int] = mapped_column(ForeignKey("news_sources.id"), nullable=False, comment="來源ID")
|
||||
external_id: Mapped[Optional[str]] = mapped_column(String(100), comment="外部文章ID")
|
||||
title: Mapped[str] = mapped_column(String(500), nullable=False, comment="文章標題")
|
||||
content: Mapped[Optional[str]] = mapped_column(Text, comment="文章全文")
|
||||
summary: Mapped[Optional[str]] = mapped_column(Text, comment="原文摘要")
|
||||
url: Mapped[str] = mapped_column(String(500), nullable=False, comment="原文連結")
|
||||
author: Mapped[Optional[str]] = mapped_column(String(100), comment="作者")
|
||||
published_at: Mapped[Optional[datetime]] = mapped_column(comment="發布時間")
|
||||
crawled_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, comment="抓取時間")
|
||||
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
|
||||
|
||||
# 關聯
|
||||
source: Mapped["NewsSource"] = relationship(back_populates="articles")
|
||||
group_matches: Mapped[List["ArticleGroupMatch"]] = relationship(back_populates="article", cascade="all, delete-orphan")
|
||||
report_articles: Mapped[List["ReportArticle"]] = relationship(back_populates="article")
|
||||
|
||||
|
||||
class CrawlJob(Base):
|
||||
"""抓取任務記錄表"""
|
||||
__tablename__ = "crawl_jobs"
|
||||
__table_args__ = (
|
||||
Index("idx_crawl_jobs_status", "status"),
|
||||
Index("idx_crawl_jobs_scheduled", "scheduled_at"),
|
||||
)
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
source_id: Mapped[int] = mapped_column(ForeignKey("news_sources.id"), nullable=False, comment="來源ID")
|
||||
status: Mapped[CrawlStatus] = mapped_column(SQLEnum(CrawlStatus), default=CrawlStatus.PENDING)
|
||||
scheduled_at: Mapped[datetime] = mapped_column(nullable=False, comment="排程時間")
|
||||
started_at: Mapped[Optional[datetime]] = mapped_column(comment="開始時間")
|
||||
completed_at: Mapped[Optional[datetime]] = mapped_column(comment="完成時間")
|
||||
articles_count: Mapped[int] = mapped_column(default=0, comment="抓取文章數")
|
||||
error_message: Mapped[Optional[str]] = mapped_column(Text, comment="錯誤訊息")
|
||||
retry_count: Mapped[int] = mapped_column(default=0, comment="重試次數")
|
||||
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
|
||||
|
||||
# 關聯
|
||||
source: Mapped["NewsSource"] = relationship(back_populates="crawl_jobs")
|
||||
|
||||
|
||||
# 避免循環引入
|
||||
from app.models.group import ArticleGroupMatch
|
||||
from app.models.report import ReportArticle
|
||||
Reference in New Issue
Block a user