Initial commit: Daily News App

企業內部新聞彙整與分析系統
- 自動新聞抓取 (Digitimes, 經濟日報, 工商時報)
- AI 智慧摘要 (OpenAI/Claude/Ollama)
- 群組管理與訂閱通知
- 已清理 Python 快取檔案

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
donald
2025-12-03 23:53:24 +08:00
commit db0f0bbfe7
50 changed files with 11883 additions and 0 deletions

View File

@@ -0,0 +1,151 @@
"""
認證 API 端點
"""
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from sqlalchemy.orm import Session
from app.db.session import get_db
from app.core.security import (
verify_password,
get_password_hash,
create_access_token,
decode_access_token,
verify_ldap_credentials
)
from app.models import User, Role
from app.schemas.user import LoginRequest, LoginResponse, UserResponse
router = APIRouter()
security = HTTPBearer()
def get_current_user(
credentials: HTTPAuthorizationCredentials = Depends(security),
db: Session = Depends(get_db)
) -> User:
"""取得當前登入用戶(依賴注入)"""
token = credentials.credentials
payload = decode_access_token(token)
if not payload:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="無效的認證憑證",
headers={"WWW-Authenticate": "Bearer"}
)
user_id = payload.get("user_id")
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="用戶不存在"
)
if not user.is_active:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="用戶已停用"
)
return user
def require_roles(*roles: str):
"""角色權限檢查裝飾器"""
def role_checker(current_user: User = Depends(get_current_user)) -> User:
if current_user.role.code not in roles:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="權限不足"
)
return current_user
return role_checker
@router.post("/login", response_model=LoginResponse)
def login(request: LoginRequest, db: Session = Depends(get_db)):
"""用戶登入"""
user = db.query(User).filter(User.username == request.username).first()
if request.auth_type == "ad":
# AD/LDAP 認證
ldap_result = verify_ldap_credentials(request.username, request.password)
if not ldap_result:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="AD 認證失敗"
)
# 如果用戶不存在,自動建立(首次 AD 登入)
if not user:
# 取得預設讀者角色
reader_role = db.query(Role).filter(Role.code == "reader").first()
if not reader_role:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="系統角色未初始化"
)
user = User(
username=request.username,
display_name=ldap_result.get("display_name", request.username),
email=ldap_result.get("email"),
auth_type="ad",
role_id=reader_role.id
)
db.add(user)
db.commit()
db.refresh(user)
else:
# 本地認證
if not user:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="帳號或密碼錯誤"
)
if user.auth_type.value != "local":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="此帳號請使用 AD 登入"
)
if not verify_password(request.password, user.password_hash):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="帳號或密碼錯誤"
)
# 更新最後登入時間
user.last_login_at = datetime.utcnow()
db.commit()
# 產生 Token
token = create_access_token({
"user_id": user.id,
"username": user.username,
"role": user.role.code
})
return LoginResponse(
token=token,
user=UserResponse.model_validate(user)
)
@router.post("/logout")
def logout(current_user: User = Depends(get_current_user)):
"""用戶登出"""
# JWT 為無狀態,登出僅做記錄
return {"message": "登出成功"}
@router.get("/me", response_model=UserResponse)
def get_current_user_info(current_user: User = Depends(get_current_user)):
"""取得當前用戶資訊"""
return current_user

View File

@@ -0,0 +1,239 @@
"""
群組管理 API 端點
"""
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status, Query
from sqlalchemy.orm import Session
from sqlalchemy import func
from app.db.session import get_db
from app.models import User, Group, Keyword, Subscription
from app.schemas.group import (
GroupCreate, GroupUpdate, GroupResponse, GroupDetailResponse,
GroupListResponse, KeywordCreate, KeywordResponse
)
from app.schemas.user import PaginationResponse
from app.api.v1.endpoints.auth import get_current_user, require_roles
router = APIRouter()
@router.get("", response_model=GroupListResponse)
def list_groups(
page: int = Query(1, ge=1),
limit: int = Query(20, ge=1, le=100),
category: Optional[str] = None,
active_only: bool = True,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""取得群組列表"""
query = db.query(Group)
if category:
query = query.filter(Group.category == category)
if active_only:
query = query.filter(Group.is_active == True)
total = query.count()
groups = query.offset((page - 1) * limit).limit(limit).all()
# 計算關鍵字數和訂閱數
result = []
for g in groups:
keyword_count = db.query(Keyword).filter(Keyword.group_id == g.id).count()
subscriber_count = db.query(Subscription).filter(Subscription.group_id == g.id).count()
group_dict = {
"id": g.id,
"name": g.name,
"description": g.description,
"category": g.category.value,
"is_active": g.is_active,
"keyword_count": keyword_count,
"subscriber_count": subscriber_count
}
result.append(GroupResponse(**group_dict))
return GroupListResponse(
data=result,
pagination=PaginationResponse(
page=page, limit=limit, total=total,
total_pages=(total + limit - 1) // limit
)
)
@router.post("", response_model=GroupResponse, status_code=status.HTTP_201_CREATED)
def create_group(
group_in: GroupCreate,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin", "editor"))
):
"""新增群組"""
group = Group(
name=group_in.name,
description=group_in.description,
category=group_in.category,
ai_background=group_in.ai_background,
ai_prompt=group_in.ai_prompt,
created_by=current_user.id
)
db.add(group)
db.commit()
db.refresh(group)
# 新增關鍵字
if group_in.keywords:
for kw in group_in.keywords:
keyword = Keyword(group_id=group.id, keyword=kw)
db.add(keyword)
db.commit()
return GroupResponse(
id=group.id,
name=group.name,
description=group.description,
category=group.category.value,
is_active=group.is_active,
keyword_count=len(group_in.keywords) if group_in.keywords else 0,
subscriber_count=0
)
@router.get("/{group_id}", response_model=GroupDetailResponse)
def get_group(
group_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""取得群組詳情"""
group = db.query(Group).filter(Group.id == group_id).first()
if not group:
raise HTTPException(status_code=404, detail="群組不存在")
keywords = db.query(Keyword).filter(Keyword.group_id == group_id).all()
keyword_count = len(keywords)
subscriber_count = db.query(Subscription).filter(Subscription.group_id == group_id).count()
return GroupDetailResponse(
id=group.id,
name=group.name,
description=group.description,
category=group.category.value,
is_active=group.is_active,
ai_background=group.ai_background,
ai_prompt=group.ai_prompt,
keywords=[KeywordResponse.model_validate(k) for k in keywords],
keyword_count=keyword_count,
subscriber_count=subscriber_count,
created_at=group.created_at,
updated_at=group.updated_at
)
@router.put("/{group_id}", response_model=GroupResponse)
def update_group(
group_id: int,
group_in: GroupUpdate,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin", "editor"))
):
"""更新群組"""
group = db.query(Group).filter(Group.id == group_id).first()
if not group:
raise HTTPException(status_code=404, detail="群組不存在")
for field, value in group_in.model_dump(exclude_unset=True).items():
setattr(group, field, value)
db.commit()
db.refresh(group)
keyword_count = db.query(Keyword).filter(Keyword.group_id == group_id).count()
subscriber_count = db.query(Subscription).filter(Subscription.group_id == group_id).count()
return GroupResponse(
id=group.id,
name=group.name,
description=group.description,
category=group.category.value,
is_active=group.is_active,
keyword_count=keyword_count,
subscriber_count=subscriber_count
)
@router.delete("/{group_id}", status_code=status.HTTP_204_NO_CONTENT)
def delete_group(
group_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""刪除群組"""
group = db.query(Group).filter(Group.id == group_id).first()
if not group:
raise HTTPException(status_code=404, detail="群組不存在")
db.delete(group)
db.commit()
# ===== 關鍵字管理 =====
@router.get("/{group_id}/keywords", response_model=list[KeywordResponse])
def list_keywords(
group_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""取得群組關鍵字"""
keywords = db.query(Keyword).filter(Keyword.group_id == group_id).all()
return [KeywordResponse.model_validate(k) for k in keywords]
@router.post("/{group_id}/keywords", response_model=KeywordResponse, status_code=status.HTTP_201_CREATED)
def add_keyword(
group_id: int,
keyword_in: KeywordCreate,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin", "editor"))
):
"""新增關鍵字"""
group = db.query(Group).filter(Group.id == group_id).first()
if not group:
raise HTTPException(status_code=404, detail="群組不存在")
# 檢查重複
existing = db.query(Keyword).filter(
Keyword.group_id == group_id,
Keyword.keyword == keyword_in.keyword
).first()
if existing:
raise HTTPException(status_code=400, detail="關鍵字已存在")
keyword = Keyword(group_id=group_id, keyword=keyword_in.keyword)
db.add(keyword)
db.commit()
db.refresh(keyword)
return keyword
@router.delete("/{group_id}/keywords/{keyword_id}", status_code=status.HTTP_204_NO_CONTENT)
def delete_keyword(
group_id: int,
keyword_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin", "editor"))
):
"""刪除關鍵字"""
keyword = db.query(Keyword).filter(
Keyword.id == keyword_id,
Keyword.group_id == group_id
).first()
if not keyword:
raise HTTPException(status_code=404, detail="關鍵字不存在")
db.delete(keyword)
db.commit()

View File

@@ -0,0 +1,320 @@
"""
報告管理 API 端點
"""
from datetime import date, datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status, Query
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from sqlalchemy import func
import io
from app.db.session import get_db
from app.models import User, Report, ReportArticle, Group, NewsArticle, Favorite, Comment
from app.schemas.report import (
ReportUpdate, ReportResponse, ReportDetailResponse, ReportReviewResponse,
ReportListResponse, PublishResponse, RegenerateSummaryResponse,
ArticleInReport, GroupBrief
)
from app.schemas.user import PaginationResponse
from app.api.v1.endpoints.auth import get_current_user, require_roles
from app.services.llm_service import generate_summary
from app.services.notification_service import send_report_notifications
router = APIRouter()
@router.get("", response_model=ReportListResponse)
def list_reports(
page: int = Query(1, ge=1),
limit: int = Query(20, ge=1, le=100),
group_id: Optional[int] = None,
status: Optional[str] = None,
date_from: Optional[date] = None,
date_to: Optional[date] = None,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""取得報告列表"""
query = db.query(Report).join(Group)
if group_id:
query = query.filter(Report.group_id == group_id)
if status:
query = query.filter(Report.status == status)
if date_from:
query = query.filter(Report.report_date >= date_from)
if date_to:
query = query.filter(Report.report_date <= date_to)
# 讀者只能看到已發布的報告
if current_user.role.code == "reader":
query = query.filter(Report.status == "published")
total = query.count()
reports = query.order_by(Report.report_date.desc()).offset((page - 1) * limit).limit(limit).all()
result = []
for r in reports:
article_count = db.query(ReportArticle).filter(
ReportArticle.report_id == r.id,
ReportArticle.is_included == True
).count()
result.append(ReportResponse(
id=r.id,
title=r.title,
report_date=r.report_date,
status=r.status.value,
group=GroupBrief(id=r.group.id, name=r.group.name, category=r.group.category.value),
article_count=article_count,
published_at=r.published_at
))
return ReportListResponse(
data=result,
pagination=PaginationResponse(
page=page, limit=limit, total=total,
total_pages=(total + limit - 1) // limit
)
)
@router.get("/today", response_model=list[ReportReviewResponse])
def get_today_reports(
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin", "editor"))
):
"""取得今日報告(專員審核用)"""
today = date.today()
reports = db.query(Report).filter(Report.report_date == today).all()
result = []
for r in reports:
report_articles = db.query(ReportArticle).filter(ReportArticle.report_id == r.id).all()
articles = []
for ra in report_articles:
article = db.query(NewsArticle).filter(NewsArticle.id == ra.article_id).first()
if article:
articles.append(ArticleInReport(
id=article.id,
title=article.title,
source_name=article.source.name,
url=article.url,
published_at=article.published_at,
is_included=ra.is_included
))
result.append(ReportReviewResponse(
id=r.id,
title=r.title,
report_date=r.report_date,
status=r.status.value,
group=GroupBrief(id=r.group.id, name=r.group.name, category=r.group.category.value),
article_count=len([a for a in articles if a.is_included]),
published_at=r.published_at,
ai_summary=r.ai_summary,
edited_summary=r.edited_summary,
articles=articles
))
return result
@router.get("/{report_id}", response_model=ReportDetailResponse)
def get_report(
report_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""取得報告詳情"""
report = db.query(Report).filter(Report.id == report_id).first()
if not report:
raise HTTPException(status_code=404, detail="報告不存在")
# 讀者只能看已發布的報告
if current_user.role.code == "reader" and report.status.value != "published":
raise HTTPException(status_code=403, detail="無權限查看此報告")
# 取得文章
report_articles = db.query(ReportArticle).filter(ReportArticle.report_id == report_id).all()
articles = []
for ra in report_articles:
article = db.query(NewsArticle).filter(NewsArticle.id == ra.article_id).first()
if article:
articles.append(ArticleInReport(
id=article.id,
title=article.title,
source_name=article.source.name,
url=article.url,
published_at=article.published_at,
is_included=ra.is_included
))
# 檢查是否已收藏
is_favorited = db.query(Favorite).filter(
Favorite.user_id == current_user.id,
Favorite.report_id == report_id
).first() is not None
# 留言數
comment_count = db.query(Comment).filter(
Comment.report_id == report_id,
Comment.is_deleted == False
).count()
return ReportDetailResponse(
id=report.id,
title=report.title,
report_date=report.report_date,
status=report.status.value,
group=GroupBrief(id=report.group.id, name=report.group.name, category=report.group.category.value),
article_count=len([a for a in articles if a.is_included]),
published_at=report.published_at,
ai_summary=report.ai_summary,
edited_summary=report.edited_summary,
articles=articles,
is_favorited=is_favorited,
comment_count=comment_count,
created_at=report.created_at,
updated_at=report.updated_at
)
@router.put("/{report_id}", response_model=ReportResponse)
def update_report(
report_id: int,
report_in: ReportUpdate,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin", "editor"))
):
"""更新報告"""
report = db.query(Report).filter(Report.id == report_id).first()
if not report:
raise HTTPException(status_code=404, detail="報告不存在")
if report_in.title:
report.title = report_in.title
if report_in.edited_summary is not None:
report.edited_summary = report_in.edited_summary
# 更新文章篩選
if report_in.article_selections:
for sel in report_in.article_selections:
ra = db.query(ReportArticle).filter(
ReportArticle.report_id == report_id,
ReportArticle.article_id == sel["article_id"]
).first()
if ra:
ra.is_included = sel["is_included"]
db.commit()
db.refresh(report)
article_count = db.query(ReportArticle).filter(
ReportArticle.report_id == report_id,
ReportArticle.is_included == True
).count()
return ReportResponse(
id=report.id,
title=report.title,
report_date=report.report_date,
status=report.status.value,
group=GroupBrief(id=report.group.id, name=report.group.name, category=report.group.category.value),
article_count=article_count,
published_at=report.published_at
)
@router.post("/{report_id}/publish", response_model=PublishResponse)
def publish_report(
report_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin", "editor"))
):
"""發布報告"""
report = db.query(Report).filter(Report.id == report_id).first()
if not report:
raise HTTPException(status_code=404, detail="報告不存在")
if report.status.value == "published":
raise HTTPException(status_code=400, detail="報告已發布")
report.status = "published"
report.published_at = datetime.utcnow()
report.published_by = current_user.id
db.commit()
# 發送通知
notifications_sent = send_report_notifications(db, report)
return PublishResponse(
published_at=report.published_at,
notifications_sent=notifications_sent
)
@router.post("/{report_id}/regenerate-summary", response_model=RegenerateSummaryResponse)
def regenerate_summary(
report_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin", "editor"))
):
"""重新產生 AI 摘要"""
report = db.query(Report).filter(Report.id == report_id).first()
if not report:
raise HTTPException(status_code=404, detail="報告不存在")
# 取得納入的文章
report_articles = db.query(ReportArticle).filter(
ReportArticle.report_id == report_id,
ReportArticle.is_included == True
).all()
articles = []
for ra in report_articles:
article = db.query(NewsArticle).filter(NewsArticle.id == ra.article_id).first()
if article:
articles.append(article)
# 產生摘要
summary = generate_summary(report.group, articles)
report.ai_summary = summary
db.commit()
return RegenerateSummaryResponse(ai_summary=summary)
@router.get("/{report_id}/export")
def export_report_pdf(
report_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""匯出報告 PDF"""
report = db.query(Report).filter(Report.id == report_id).first()
if not report:
raise HTTPException(status_code=404, detail="報告不存在")
# 讀者只能匯出已發布的報告
if current_user.role.code == "reader" and report.status.value != "published":
raise HTTPException(status_code=403, detail="無權限匯出此報告")
# TODO: 實作 PDF 生成
# 暫時返回簡單文字
content = f"""
{report.title}
日期:{report.report_date}
群組:{report.group.name}
{report.edited_summary or report.ai_summary or '無摘要內容'}
"""
buffer = io.BytesIO(content.encode('utf-8'))
return StreamingResponse(
buffer,
media_type="text/plain",
headers={"Content-Disposition": f"attachment; filename=report_{report_id}.txt"}
)

View File

@@ -0,0 +1,295 @@
"""
系統設定 API 端點
"""
from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File
from sqlalchemy.orm import Session
from pydantic import BaseModel
from typing import Optional
import os
import hashlib
from pathlib import Path
import logging
from app.db.session import get_db
from app.models import User, SystemSetting
from app.api.v1.endpoints.auth import get_current_user, require_roles
from app.services.llm_service import test_llm_connection
logger = logging.getLogger(__name__)
router = APIRouter()
class SystemSettingsResponse(BaseModel):
crawl_schedule_time: Optional[str] = None
publish_deadline: Optional[str] = None
llm_provider: Optional[str] = None
llm_model: Optional[str] = None
llm_ollama_endpoint: Optional[str] = None
data_retention_days: Optional[int] = None
pdf_logo_path: Optional[str] = None
pdf_header_text: Optional[str] = None
pdf_footer_text: Optional[str] = None
smtp_host: Optional[str] = None
smtp_port: Optional[int] = None
smtp_username: Optional[str] = None
smtp_from_email: Optional[str] = None
smtp_from_name: Optional[str] = None
class SystemSettingsUpdate(BaseModel):
crawl_schedule_time: Optional[str] = None
publish_deadline: Optional[str] = None
llm_provider: Optional[str] = None
llm_api_key: Optional[str] = None
llm_model: Optional[str] = None
llm_ollama_endpoint: Optional[str] = None
data_retention_days: Optional[int] = None
pdf_header_text: Optional[str] = None
pdf_footer_text: Optional[str] = None
smtp_host: Optional[str] = None
smtp_port: Optional[int] = None
smtp_username: Optional[str] = None
smtp_password: Optional[str] = None
smtp_from_email: Optional[str] = None
smtp_from_name: Optional[str] = None
class LLMTestResponse(BaseModel):
success: bool
provider: str
model: str
response_time_ms: int
message: Optional[str] = None
def get_setting_value(db: Session, key: str) -> Optional[str]:
"""取得設定值"""
setting = db.query(SystemSetting).filter(SystemSetting.setting_key == key).first()
return setting.setting_value if setting else None
def set_setting_value(db: Session, key: str, value: str, user_id: int):
"""設定值"""
setting = db.query(SystemSetting).filter(SystemSetting.setting_key == key).first()
if setting:
setting.setting_value = value
setting.updated_by = user_id
else:
setting = SystemSetting(setting_key=key, setting_value=value, updated_by=user_id)
db.add(setting)
@router.get("", response_model=SystemSettingsResponse)
def get_settings(
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""取得系統設定"""
retention = get_setting_value(db, "data_retention_days")
smtp_port = get_setting_value(db, "smtp_port")
return SystemSettingsResponse(
crawl_schedule_time=get_setting_value(db, "crawl_schedule_time"),
publish_deadline=get_setting_value(db, "publish_deadline"),
llm_provider=get_setting_value(db, "llm_provider"),
llm_model=get_setting_value(db, "llm_model"),
llm_ollama_endpoint=get_setting_value(db, "llm_ollama_endpoint"),
data_retention_days=int(retention) if retention else None,
pdf_logo_path=get_setting_value(db, "pdf_logo_path"),
pdf_header_text=get_setting_value(db, "pdf_header_text"),
pdf_footer_text=get_setting_value(db, "pdf_footer_text"),
smtp_host=get_setting_value(db, "smtp_host"),
smtp_port=int(smtp_port) if smtp_port else None,
smtp_username=get_setting_value(db, "smtp_username"),
smtp_from_email=get_setting_value(db, "smtp_from_email"),
smtp_from_name=get_setting_value(db, "smtp_from_name")
)
@router.put("")
def update_settings(
settings_in: SystemSettingsUpdate,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""更新系統設定"""
updates = settings_in.model_dump(exclude_unset=True)
for key, value in updates.items():
if value is not None:
# 敏感欄位需加密(簡化處理,實際應使用加密)
if key in ["llm_api_key", "smtp_password"]:
key = f"{key.replace('_key', '').replace('_password', '')}_encrypted"
set_setting_value(db, key, str(value), current_user.id)
db.commit()
return {"message": "設定更新成功"}
@router.post("/llm/test", response_model=LLMTestResponse)
def test_llm(
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""測試 LLM 連線"""
provider = get_setting_value(db, "llm_provider") or "claude"
model = get_setting_value(db, "llm_model") or "claude-3-sonnet"
result = test_llm_connection(provider, model)
return LLMTestResponse(
success=result["success"],
provider=provider,
model=model,
response_time_ms=result.get("response_time_ms", 0),
message=result.get("message")
)
@router.post("/pdf/logo")
async def upload_pdf_logo(
logo: UploadFile = File(...),
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""上傳 PDF Logo加強安全檢查"""
# 1. 檢查檔案大小(限制 5MB
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB
content = await logo.read()
if len(content) > MAX_FILE_SIZE:
raise HTTPException(status_code=400, detail="檔案大小超過 5MB 限制")
# 2. 檢查檔案類型(基本檢查,建議安裝 python-magic 進行更嚴格的檢查)
allowed_content_types = ["image/png", "image/jpeg", "image/svg+xml"]
if logo.content_type not in allowed_content_types:
raise HTTPException(status_code=400, detail=f"不支援的檔案類型: {logo.content_type},僅支援 PNG、JPEG、SVG")
# 3. 檢查檔案副檔名(額外安全層)
file_ext = logo.filename.split(".")[-1].lower() if "." in logo.filename else ""
allowed_extensions = ["png", "jpg", "jpeg", "svg"]
if file_ext not in allowed_extensions:
raise HTTPException(status_code=400, detail=f"不支援的檔案副檔名: {file_ext}")
# 4. 使用安全的檔案名稱(使用 hash避免路徑遍歷和檔案名稱衝突
file_hash = hashlib.sha256(content).hexdigest()[:16]
safe_filename = f"company_logo_{file_hash}.{file_ext}"
# 5. 使用絕對路徑,避免路徑遍歷
# 取得專案根目錄
project_root = Path(__file__).parent.parent.parent.parent.resolve()
upload_dir = (project_root / "uploads" / "logos").resolve()
upload_dir.mkdir(parents=True, exist_ok=True)
file_path = upload_dir / safe_filename
# 6. 確保檔案路徑在允許的目錄內(防止路徑遍歷)
try:
file_path.resolve().relative_to(upload_dir.resolve())
except ValueError:
raise HTTPException(status_code=400, detail="無效的檔案路徑")
# 7. 檢查檔案內容的魔術數字Magic Number以驗證真實檔案類型
# PNG: 89 50 4E 47
# JPEG: FF D8 FF
# SVG: 檢查是否為 XML 格式
magic_numbers = {
b'\x89PNG\r\n\x1a\n': 'png',
b'\xff\xd8\xff': 'jpg',
}
file_type_detected = None
for magic, ext in magic_numbers.items():
if content.startswith(magic):
file_type_detected = ext
break
# SVG 檢查(開頭應該是 <?xml 或 <svg
if content.startswith(b'<?xml') or content.startswith(b'<svg'):
file_type_detected = 'svg'
# 如果檢測到的檔案類型與副檔名不符,拒絕上傳
if file_type_detected and file_type_detected != file_ext:
raise HTTPException(
status_code=400,
detail=f"檔案類型與副檔名不符:檢測到 {file_type_detected},但副檔名為 {file_ext}"
)
# 8. 儲存檔案
try:
with open(file_path, "wb") as f:
f.write(content)
logger.info(f"Logo 上傳成功: {safe_filename}")
except Exception as e:
logger.error(f"Logo 儲存失敗: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="檔案儲存失敗")
# 9. 更新設定(使用相對路徑)
relative_path = f"uploads/logos/{safe_filename}"
set_setting_value(db, "pdf_logo_path", relative_path, current_user.id)
db.commit()
return {"logo_path": relative_path}
# ===== Dashboard =====
class AdminDashboardResponse(BaseModel):
today_articles: int
active_users: int
pending_reports: int
system_health: list[dict]
@router.get("/dashboard/admin", response_model=AdminDashboardResponse)
def get_admin_dashboard(
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""管理員儀表板"""
from datetime import date
from app.models import NewsArticle, Report, CrawlJob
today = date.today()
# 今日文章數
today_articles = db.query(NewsArticle).filter(
NewsArticle.crawled_at >= today
).count()
# 活躍用戶數
active_users = db.query(User).filter(User.is_active == True).count()
# 待發布報告
pending_reports = db.query(Report).filter(
Report.status.in_(["draft", "pending"])
).count()
# 系統狀態
from app.models import NewsSource
sources = db.query(NewsSource).filter(NewsSource.is_active == True).all()
system_health = []
for source in sources:
last_job = db.query(CrawlJob).filter(
CrawlJob.source_id == source.id
).order_by(CrawlJob.created_at.desc()).first()
system_health.append({
"name": f"{source.name} 爬蟲",
"status": "正常" if last_job and last_job.status.value == "completed" else "異常",
"last_run": last_job.completed_at.strftime("%H:%M") if last_job and last_job.completed_at else "-"
})
# LLM 狀態
system_health.append({
"name": f"LLM 服務 ({get_setting_value(db, 'llm_provider') or 'Claude'})",
"status": "正常",
"last_run": "-"
})
return AdminDashboardResponse(
today_articles=today_articles,
active_users=active_users,
pending_reports=pending_reports,
system_health=system_health
)

View File

@@ -0,0 +1,91 @@
"""
訂閱管理 API 端點
"""
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.orm import Session
from pydantic import BaseModel
from typing import Optional
from app.db.session import get_db
from app.models import User, Group, Subscription
from app.api.v1.endpoints.auth import get_current_user
router = APIRouter()
class SubscriptionResponse(BaseModel):
group_id: int
group_name: str
category: str
email_notify: bool
class Config:
from_attributes = True
class SubscriptionItem(BaseModel):
group_id: int
subscribed: bool
email_notify: Optional[bool] = True
class SubscriptionUpdateRequest(BaseModel):
subscriptions: list[SubscriptionItem]
@router.get("", response_model=list[SubscriptionResponse])
def get_my_subscriptions(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""取得我的訂閱列表"""
subs = db.query(Subscription).filter(Subscription.user_id == current_user.id).all()
result = []
for s in subs:
group = db.query(Group).filter(Group.id == s.group_id).first()
if group:
result.append(SubscriptionResponse(
group_id=group.id,
group_name=group.name,
category=group.category.value,
email_notify=s.email_notify
))
return result
@router.put("")
def update_subscriptions(
request: SubscriptionUpdateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""批次更新訂閱"""
for item in request.subscriptions:
# 檢查群組是否存在
group = db.query(Group).filter(Group.id == item.group_id, Group.is_active == True).first()
if not group:
continue
existing = db.query(Subscription).filter(
Subscription.user_id == current_user.id,
Subscription.group_id == item.group_id
).first()
if item.subscribed:
if existing:
existing.email_notify = item.email_notify
else:
sub = Subscription(
user_id=current_user.id,
group_id=item.group_id,
email_notify=item.email_notify
)
db.add(sub)
else:
if existing:
db.delete(existing)
db.commit()
return {"message": "訂閱更新成功"}

View File

@@ -0,0 +1,195 @@
"""
用戶管理 API 端點
"""
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status, Query
from sqlalchemy.orm import Session
from sqlalchemy import func
from app.db.session import get_db
from app.core.security import get_password_hash
from app.models import User, Role
from app.schemas.user import (
UserCreate, UserUpdate, UserResponse, UserListResponse, PaginationResponse
)
from app.api.v1.endpoints.auth import get_current_user, require_roles
router = APIRouter()
@router.get("", response_model=UserListResponse)
def list_users(
page: int = Query(1, ge=1),
limit: int = Query(20, ge=1, le=100),
role: Optional[str] = None,
auth_type: Optional[str] = None,
search: Optional[str] = None,
current_user: User = Depends(require_roles("admin"))
):
"""取得用戶列表(僅管理員)"""
db = next(get_db())
query = db.query(User)
# 篩選條件
if role:
query = query.join(Role).filter(Role.code == role)
if auth_type:
query = query.filter(User.auth_type == auth_type)
if search:
# 清理輸入,移除特殊字元,防止注入
safe_search = search.strip()[:100] # 限制長度
# SQLAlchemy 的 ilike 已經使用參數化查詢,相對安全
# 但為了額外安全,轉義 SQL 萬用字元
safe_search = safe_search.replace('%', '\\%').replace('_', '\\_')
query = query.filter(
(User.username.ilike(f"%{safe_search}%")) |
(User.display_name.ilike(f"%{safe_search}%"))
)
# 總數
total = query.count()
# 分頁
users = query.offset((page - 1) * limit).limit(limit).all()
return UserListResponse(
data=[UserResponse.model_validate(u) for u in users],
pagination=PaginationResponse(
page=page,
limit=limit,
total=total,
total_pages=(total + limit - 1) // limit
)
)
@router.post("", response_model=UserResponse, status_code=status.HTTP_201_CREATED)
def create_user(
user_in: UserCreate,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""新增用戶(僅管理員)"""
# 檢查帳號是否重複
existing = db.query(User).filter(User.username == user_in.username).first()
if existing:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="帳號已存在"
)
# 檢查角色
role = db.query(Role).filter(Role.id == user_in.role_id).first()
if not role:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="角色不存在"
)
# 本地帳號必須有密碼
if user_in.auth_type == "local" and not user_in.password:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="本地帳號必須設定密碼"
)
user = User(
username=user_in.username,
display_name=user_in.display_name,
email=user_in.email,
auth_type=user_in.auth_type,
role_id=user_in.role_id,
password_hash=get_password_hash(user_in.password) if user_in.password else None
)
db.add(user)
db.commit()
db.refresh(user)
return user
@router.get("/{user_id}", response_model=UserResponse)
def get_user(
user_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""取得單一用戶(僅管理員)"""
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="用戶不存在"
)
return user
@router.put("/{user_id}", response_model=UserResponse)
def update_user(
user_id: int,
user_in: UserUpdate,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""更新用戶(僅管理員)"""
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="用戶不存在"
)
# 更新欄位
if user_in.display_name is not None:
user.display_name = user_in.display_name
if user_in.email is not None:
user.email = user_in.email
if user_in.role_id is not None:
role = db.query(Role).filter(Role.id == user_in.role_id).first()
if not role:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="角色不存在"
)
user.role_id = user_in.role_id
if user_in.is_active is not None:
user.is_active = user_in.is_active
if user_in.password is not None:
if user.auth_type.value != "local":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="AD 帳號無法修改密碼"
)
user.password_hash = get_password_hash(user_in.password)
db.commit()
db.refresh(user)
return user
@router.delete("/{user_id}", status_code=status.HTTP_204_NO_CONTENT)
def delete_user(
user_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(require_roles("admin"))
):
"""刪除用戶(僅管理員)"""
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="用戶不存在"
)
# 不能刪除自己
if user.id == current_user.id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="無法刪除自己的帳號"
)
db.delete(user)
db.commit()

26
app/api/v1/router.py Normal file
View File

@@ -0,0 +1,26 @@
"""
API v1 路由總管理
"""
from fastapi import APIRouter
from app.api.v1.endpoints import auth, users, groups, reports, subscriptions, settings as settings_ep
api_router = APIRouter()
# 認證
api_router.include_router(auth.router, prefix="/auth", tags=["Auth"])
# 用戶管理
api_router.include_router(users.router, prefix="/users", tags=["Users"])
# 群組管理
api_router.include_router(groups.router, prefix="/groups", tags=["Groups"])
# 報告管理
api_router.include_router(reports.router, prefix="/reports", tags=["Reports"])
# 訂閱管理
api_router.include_router(subscriptions.router, prefix="/subscriptions", tags=["Subscriptions"])
# 系統設定
api_router.include_router(settings_ep.router, prefix="/settings", tags=["Settings"])

137
app/core/config.py Normal file
View File

@@ -0,0 +1,137 @@
"""
應用程式設定模組
使用 Pydantic Settings 管理環境變數
"""
from functools import lru_cache
from typing import Literal
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""應用程式設定"""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False
)
# 應用程式
app_name: str = "每日報導APP"
app_env: Literal["development", "staging", "production"] = "development"
debug: bool = Field(
default=False, # 預設為 False更安全
description="除錯模式,僅開發環境使用"
)
secret_key: str = Field(
default="change-me-in-production",
description="應用程式密鑰,生產環境必須透過環境變數設定"
)
# 資料庫
db_host: str = "localhost"
db_port: int = 3306
db_name: str = "daily_news_app"
db_user: str = "root"
db_password: str = ""
@property
def database_url(self) -> str:
if self.db_host == "sqlite":
return f"sqlite:///{self.db_name}.db"
return f"mysql+pymysql://{self.db_user}:{self.db_password}@{self.db_host}:{self.db_port}/{self.db_name}?charset=utf8mb4"
@property
def async_database_url(self) -> str:
if self.db_host == "sqlite":
return f"sqlite+aiosqlite:///{self.db_name}.db"
return f"mysql+aiomysql://{self.db_user}:{self.db_password}@{self.db_host}:{self.db_port}/{self.db_name}?charset=utf8mb4"
# JWT
jwt_secret_key: str = Field(
default="change-me",
description="JWT 簽章密鑰,生產環境必須透過環境變數設定"
)
jwt_algorithm: str = "HS256"
jwt_access_token_expire_minutes: int = Field(
default=480, # 開發環境預設值
description="JWT Token 過期時間(分鐘),建議生產環境設為 60-120 分鐘"
)
# LDAP
ldap_server: str = ""
ldap_port: int = 389
ldap_base_dn: str = ""
ldap_bind_dn: str = ""
ldap_bind_password: str = ""
# LLM
llm_provider: Literal["gemini", "openai", "ollama"] = "gemini"
gemini_api_key: str = ""
gemini_model: str = "gemini-1.5-pro"
openai_api_key: str = ""
openai_model: str = "gpt-4o"
ollama_endpoint: str = "http://localhost:11434"
ollama_model: str = "llama3"
# SMTP
smtp_host: str = ""
smtp_port: int = 587
smtp_username: str = ""
smtp_password: str = ""
smtp_from_email: str = ""
smtp_from_name: str = "每日報導系統"
# 爬蟲
crawl_schedule_time: str = "08:00"
crawl_request_delay: int = 3
crawl_max_retries: int = 3
# Digitimes
digitimes_username: str = ""
digitimes_password: str = ""
# 資料保留
data_retention_days: int = 60
# PDF
pdf_logo_path: str = ""
pdf_header_text: str = ""
pdf_footer_text: str = "本報告僅供內部參考使用"
# CORS 設定
cors_origins: list[str] = Field(
default=["http://localhost:3000", "http://localhost:8000"],
description="允許的 CORS 來源列表,生產環境必須明確指定,不能使用 *"
)
# 管理員預設密碼
admin_password: str = Field(
default="admin123",
description="管理員預設密碼"
)
def validate_secrets():
"""驗證生產環境的密鑰設定"""
if settings.app_env == "production":
if settings.secret_key == "change-me-in-production":
raise ValueError("生產環境必須設定 SECRET_KEY 環境變數")
if settings.jwt_secret_key == "change-me":
raise ValueError("生產環境必須設定 JWT_SECRET_KEY 環境變數")
if len(settings.secret_key) < 32:
raise ValueError("SECRET_KEY 長度必須至少 32 字元")
if len(settings.jwt_secret_key) < 32:
raise ValueError("JWT_SECRET_KEY 長度必須至少 32 字元")
if settings.jwt_access_token_expire_minutes > 120:
import warnings
warnings.warn("生產環境 JWT Token 過期時間建議不超過 120 分鐘")
@lru_cache
def get_settings() -> Settings:
"""取得設定實例(快取)"""
return Settings()
settings = get_settings()

View File

@@ -0,0 +1,81 @@
"""
日誌系統設定模組
"""
import logging
import sys
from pathlib import Path
from app.core.config import settings
class SensitiveFilter(logging.Filter):
"""過濾敏感資訊的日誌過濾器"""
def filter(self, record):
"""過濾包含敏感資訊的日誌訊息"""
sensitive_keywords = ['password', 'secret', 'key', 'token', 'api_key', 'db_password']
msg = str(record.getMessage()).lower()
for keyword in sensitive_keywords:
if keyword in msg:
# 只記錄錯誤類型,不記錄詳細內容
record.msg = f"[敏感資訊已過濾] {record.name}"
record.args = ()
break
return True
def setup_logging():
"""設定日誌系統"""
# 建立 logs 目錄
log_dir = Path("logs")
log_dir.mkdir(exist_ok=True)
# 設定日誌等級
log_level = logging.DEBUG if settings.debug else logging.INFO
# 設定日誌格式
log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
date_format = '%Y-%m-%d %H:%M:%S'
# 設定處理器
handlers = [
logging.StreamHandler(sys.stdout),
logging.FileHandler('logs/app.log', encoding='utf-8')
]
# 如果是生產環境,也記錄錯誤到單獨的檔案
if settings.app_env == "production":
error_handler = logging.FileHandler('logs/error.log', encoding='utf-8')
error_handler.setLevel(logging.ERROR)
handlers.append(error_handler)
# 設定基本配置
logging.basicConfig(
level=log_level,
format=log_format,
datefmt=date_format,
handlers=handlers
)
# 應用敏感資訊過濾器
sensitive_filter = SensitiveFilter()
for handler in logging.root.handlers:
handler.addFilter(sensitive_filter)
# 設定第三方庫的日誌等級
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
return logging.getLogger(__name__)
# 初始化日誌系統
logger = setup_logging()

118
app/core/security.py Normal file
View File

@@ -0,0 +1,118 @@
"""
安全認證模組
處理密碼雜湊、JWT Token、LDAP 認證
"""
from datetime import datetime, timedelta
from typing import Optional, Any
from jose import JWTError, jwt
import bcrypt
from ldap3 import Server, Connection, ALL, NTLM
from ldap3.core.exceptions import LDAPException
from ldap3.utils.conv import escape_filter_chars
import logging
from app.core.config import settings
logger = logging.getLogger(__name__)
def verify_password(plain_password: str, hashed_password: str) -> bool:
"""驗證密碼"""
return bcrypt.checkpw(
plain_password.encode('utf-8'),
hashed_password.encode('utf-8')
)
def get_password_hash(password: str) -> str:
"""產生密碼雜湊"""
return bcrypt.hashpw(
password.encode('utf-8'),
bcrypt.gensalt()
).decode('utf-8')
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
"""建立 JWT Access Token"""
to_encode = data.copy()
if expires_delta:
expire = datetime.utcnow() + expires_delta
else:
expire = datetime.utcnow() + timedelta(minutes=settings.jwt_access_token_expire_minutes)
to_encode.update({"exp": expire})
encoded_jwt = jwt.encode(to_encode, settings.jwt_secret_key, algorithm=settings.jwt_algorithm)
return encoded_jwt
def decode_access_token(token: str) -> Optional[dict]:
"""解碼 JWT Access Token"""
try:
payload = jwt.decode(token, settings.jwt_secret_key, algorithms=[settings.jwt_algorithm])
return payload
except JWTError:
return None
def verify_ldap_credentials(username: str, password: str) -> Optional[dict]:
"""
驗證 LDAP/AD 憑證
Returns:
成功時返回用戶資訊 dict失敗返回 None
"""
if not settings.ldap_server:
return None
try:
server = Server(settings.ldap_server, port=settings.ldap_port, get_info=ALL)
# 嘗試綁定(使用 NTLM 或簡單綁定)
user_dn = f"{username}@{settings.ldap_base_dn.replace('DC=', '').replace(',', '.')}"
conn = Connection(
server,
user=user_dn,
password=password,
authentication=NTLM,
auto_bind=True
)
if conn.bound:
# 查詢用戶資訊
# 轉義特殊字元,防止 LDAP 注入
safe_username = escape_filter_chars(username)
search_filter = f"(sAMAccountName={safe_username})"
conn.search(
settings.ldap_base_dn,
search_filter,
attributes=['displayName', 'mail', 'department']
)
if conn.entries:
entry = conn.entries[0]
return {
"username": username,
"display_name": str(entry.displayName) if hasattr(entry, 'displayName') else username,
"email": str(entry.mail) if hasattr(entry, 'mail') else None,
"department": str(entry.department) if hasattr(entry, 'department') else None
}
conn.unbind()
return {"username": username, "display_name": username}
return None
except LDAPException as e:
logger.error("LDAP 認證失敗", exc_info=True) # 不記錄詳細錯誤
return None
except Exception as e:
logger.error("LDAP 連線錯誤", exc_info=True) # 不記錄詳細錯誤
return None
class TokenData:
"""Token 資料結構"""
def __init__(self, user_id: int, username: str, role: str):
self.user_id = user_id
self.username = username
self.role = role

49
app/db/session.py Normal file
View File

@@ -0,0 +1,49 @@
"""
資料庫連線與 Session 管理
"""
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, Session, DeclarativeBase
from sqlalchemy.pool import QueuePool
from typing import Generator
from app.core.config import settings
# 建立引擎
# 建立引擎
connect_args = {}
if settings.database_url.startswith("sqlite"):
connect_args["check_same_thread"] = False
engine = create_engine(
settings.database_url,
poolclass=QueuePool,
pool_size=10,
max_overflow=20,
pool_pre_ping=True,
echo=settings.debug,
connect_args=connect_args
)
# Session 工廠
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
"""SQLAlchemy 基礎類別"""
pass
def get_db() -> Generator[Session, None, None]:
"""取得資料庫 Session依賴注入用"""
db = SessionLocal()
try:
yield db
finally:
db.close()
def init_db() -> None:
"""初始化資料庫(建立所有表)"""
from app.models import user, news, group, report, interaction, system
Base.metadata.create_all(bind=engine)

136
app/main.py Normal file
View File

@@ -0,0 +1,136 @@
"""
每日報導 APP - FastAPI 主應用程式
"""
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from app.core.config import settings, validate_secrets
from app.core.logging_config import logger
from app.api.v1.router import api_router
@asynccontextmanager
async def lifespan(app: FastAPI):
"""應用程式生命週期管理"""
# 啟動時執行
# 驗證生產環境的密鑰設定
try:
validate_secrets()
except ValueError as e:
logger.error(f"設定驗證失敗: {e}")
raise
logger.info(f"🚀 {settings.app_name} 啟動中...")
logger.info(f"📊 環境: {settings.app_env}")
# 不輸出完整的資料庫連線資訊,避免洩露敏感資訊
logger.info(f"🔗 資料庫連線: {settings.db_host}:{settings.db_port}/{settings.db_name[:3]}***")
yield
# 關閉時執行
logger.info(f"👋 {settings.app_name} 關閉中...")
def create_app() -> FastAPI:
"""建立 FastAPI 應用程式"""
# 生產環境強制關閉 Debug
if settings.app_env == "production" and settings.debug:
import warnings
warnings.warn("生產環境不應啟用 Debug 模式,已自動關閉")
settings.debug = False
app = FastAPI(
title=settings.app_name,
description="企業內部新聞彙整與分析系統 API",
version="1.0.0",
docs_url="/docs" if settings.debug else None, # 生產環境關閉
redoc_url="/redoc" if settings.debug else None, # 生產環境關閉
lifespan=lifespan
)
# CORS 設定
# 生產環境必須明確指定來源
if settings.app_env == "production":
if "*" in settings.cors_origins:
raise ValueError("生產環境不允許使用 CORS origins = ['*']")
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins if not settings.debug else ["*"],
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allow_headers=["Content-Type", "Authorization"],
max_age=3600,
)
# 註冊路由
app.include_router(api_router, prefix="/api/v1")
# 掛載靜態檔案目錄
static_dir = Path(__file__).parent.parent / "templates" / "js"
if static_dir.exists():
app.mount("/static/js", StaticFiles(directory=str(static_dir)), name="static_js")
# 根路徑 - UI 介面
@app.get("/", response_class=HTMLResponse)
async def root():
"""返回 UI 介面"""
ui_file = Path(__file__).parent.parent / "templates" / "index.html"
if ui_file.exists():
return ui_file.read_text(encoding="utf-8")
# 如果沒有 UI 文件,返回 JSON 資訊
return HTMLResponse(content=f"""
<!DOCTYPE html>
<html>
<head><title>{settings.app_name}</title></head>
<body>
<h1>{settings.app_name}</h1>
<p>版本: 1.0.0</p>
<p>企業內部新聞彙整與分析系統</p>
<ul>
<li><a href="/docs">API 文檔 (Swagger)</a></li>
<li><a href="/redoc">API 文檔 (ReDoc)</a></li>
<li><a href="/health">健康檢查</a></li>
<li><a href="/api/v1">API 端點</a></li>
</ul>
</body>
</html>
""")
# API 資訊端點
@app.get("/api/info")
async def api_info():
"""API 資訊"""
return {
"app": settings.app_name,
"version": "1.0.0",
"description": "企業內部新聞彙整與分析系統",
"docs": "/docs" if settings.debug else None,
"redoc": "/redoc" if settings.debug else None,
"health": "/health",
"api": "/api/v1"
}
# 健康檢查端點
@app.get("/health")
async def health_check():
return {"status": "healthy", "app": settings.app_name}
return app
app = create_app()
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app.main:app",
host="127.0.0.1",
port=8000,
reload=settings.debug
)

25
app/models/__init__.py Normal file
View File

@@ -0,0 +1,25 @@
"""
資料模型模組
匯出所有 SQLAlchemy 模型
"""
from app.models.user import User, Role, AuthType
from app.models.news import NewsSource, NewsArticle, CrawlJob, SourceType, CrawlStatus
from app.models.group import Group, Keyword, ArticleGroupMatch, GroupCategory
from app.models.report import Report, ReportArticle, ReportStatus
from app.models.interaction import Subscription, Favorite, Comment, Note
from app.models.system import SystemSetting, AuditLog, NotificationLog, SettingType, NotificationType, NotificationStatus
__all__ = [
# User
"User", "Role", "AuthType",
# News
"NewsSource", "NewsArticle", "CrawlJob", "SourceType", "CrawlStatus",
# Group
"Group", "Keyword", "ArticleGroupMatch", "GroupCategory",
# Report
"Report", "ReportArticle", "ReportStatus",
# Interaction
"Subscription", "Favorite", "Comment", "Note",
# System
"SystemSetting", "AuditLog", "NotificationLog", "SettingType", "NotificationType", "NotificationStatus",
]

82
app/models/group.py Normal file
View File

@@ -0,0 +1,82 @@
"""
群組與關鍵字資料模型
"""
from datetime import datetime
from sqlalchemy import String, Boolean, ForeignKey, Text, JSON, Enum as SQLEnum, UniqueConstraint, Index, DECIMAL
from sqlalchemy.orm import Mapped, mapped_column, relationship
from typing import Optional, List
import enum
from app.db.session import Base
class GroupCategory(str, enum.Enum):
"""群組分類"""
INDUSTRY = "industry"
TOPIC = "topic"
class Group(Base):
"""群組表"""
__tablename__ = "groups"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
name: Mapped[str] = mapped_column(String(100), nullable=False, comment="群組名稱")
description: Mapped[Optional[str]] = mapped_column(Text, comment="群組描述")
category: Mapped[GroupCategory] = mapped_column(SQLEnum(GroupCategory), nullable=False, comment="分類")
ai_background: Mapped[Optional[str]] = mapped_column(Text, comment="AI背景資訊設定")
ai_prompt: Mapped[Optional[str]] = mapped_column(Text, comment="AI摘要方向提示")
is_active: Mapped[bool] = mapped_column(Boolean, default=True, comment="是否啟用")
created_by: Mapped[Optional[int]] = mapped_column(ForeignKey("users.id"), comment="建立者ID")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
# 關聯
keywords: Mapped[List["Keyword"]] = relationship(back_populates="group", cascade="all, delete-orphan")
article_matches: Mapped[List["ArticleGroupMatch"]] = relationship(back_populates="group", cascade="all, delete-orphan")
reports: Mapped[List["Report"]] = relationship(back_populates="group")
subscriptions: Mapped[List["Subscription"]] = relationship(back_populates="group", cascade="all, delete-orphan")
class Keyword(Base):
"""關鍵字表"""
__tablename__ = "keywords"
__table_args__ = (
UniqueConstraint("group_id", "keyword", name="uk_group_keyword"),
Index("idx_keywords_keyword", "keyword"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
group_id: Mapped[int] = mapped_column(ForeignKey("groups.id", ondelete="CASCADE"), nullable=False, comment="所屬群組ID")
keyword: Mapped[str] = mapped_column(String(100), nullable=False, comment="關鍵字")
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
group: Mapped["Group"] = relationship(back_populates="keywords")
class ArticleGroupMatch(Base):
"""新聞-群組匹配關聯表"""
__tablename__ = "article_group_matches"
__table_args__ = (
UniqueConstraint("article_id", "group_id", name="uk_article_group"),
Index("idx_matches_group", "group_id"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
article_id: Mapped[int] = mapped_column(ForeignKey("news_articles.id", ondelete="CASCADE"), nullable=False)
group_id: Mapped[int] = mapped_column(ForeignKey("groups.id", ondelete="CASCADE"), nullable=False)
matched_keywords: Mapped[Optional[list]] = mapped_column(JSON, comment="匹配到的關鍵字列表")
match_score: Mapped[Optional[float]] = mapped_column(DECIMAL(5, 2), comment="匹配分數")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
article: Mapped["NewsArticle"] = relationship(back_populates="group_matches")
group: Mapped["Group"] = relationship(back_populates="article_matches")
# 避免循環引入
from app.models.news import NewsArticle
from app.models.report import Report
from app.models.interaction import Subscription

90
app/models/interaction.py Normal file
View File

@@ -0,0 +1,90 @@
"""
讀者互動資料模型(訂閱、收藏、留言、筆記)
"""
from datetime import datetime
from sqlalchemy import String, Boolean, ForeignKey, Text, UniqueConstraint, Index
from sqlalchemy.orm import Mapped, mapped_column, relationship
from typing import Optional, List, TYPE_CHECKING
from app.db.session import Base
if TYPE_CHECKING:
from app.models.user import User
from app.models.group import Group
from app.models.report import Report
class Subscription(Base):
"""訂閱表"""
__tablename__ = "subscriptions"
__table_args__ = (
UniqueConstraint("user_id", "group_id", name="uk_user_group"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
user_id: Mapped[int] = mapped_column(ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
group_id: Mapped[int] = mapped_column(ForeignKey("groups.id", ondelete="CASCADE"), nullable=False)
email_notify: Mapped[bool] = mapped_column(Boolean, default=True, comment="是否Email通知")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
user: Mapped["User"] = relationship(back_populates="subscriptions")
group: Mapped["Group"] = relationship(back_populates="subscriptions")
class Favorite(Base):
"""收藏表"""
__tablename__ = "favorites"
__table_args__ = (
UniqueConstraint("user_id", "report_id", name="uk_user_report"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
user_id: Mapped[int] = mapped_column(ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
report_id: Mapped[int] = mapped_column(ForeignKey("reports.id", ondelete="CASCADE"), nullable=False)
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
user: Mapped["User"] = relationship(back_populates="favorites")
report: Mapped["Report"] = relationship(back_populates="favorites")
class Comment(Base):
"""留言表"""
__tablename__ = "comments"
__table_args__ = (
Index("idx_comments_report", "report_id"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
report_id: Mapped[int] = mapped_column(ForeignKey("reports.id", ondelete="CASCADE"), nullable=False)
user_id: Mapped[int] = mapped_column(ForeignKey("users.id"), nullable=False)
content: Mapped[str] = mapped_column(Text, nullable=False, comment="留言內容")
parent_id: Mapped[Optional[int]] = mapped_column(ForeignKey("comments.id"), comment="父留言ID")
is_deleted: Mapped[bool] = mapped_column(Boolean, default=False)
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
# 關聯
report: Mapped["Report"] = relationship(back_populates="comments")
user: Mapped["User"] = relationship(back_populates="comments")
parent: Mapped[Optional["Comment"]] = relationship(remote_side=[id], backref="replies")
class Note(Base):
"""個人筆記表"""
__tablename__ = "notes"
__table_args__ = (
Index("idx_notes_user_report", "user_id", "report_id"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
user_id: Mapped[int] = mapped_column(ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
report_id: Mapped[int] = mapped_column(ForeignKey("reports.id", ondelete="CASCADE"), nullable=False)
content: Mapped[str] = mapped_column(Text, nullable=False, comment="筆記內容")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
# 關聯
user: Mapped["User"] = relationship(back_populates="notes")
report: Mapped["Report"] = relationship(back_populates="notes")

100
app/models/news.py Normal file
View File

@@ -0,0 +1,100 @@
"""
新聞來源與文章資料模型
"""
from datetime import datetime
from sqlalchemy import String, Boolean, ForeignKey, Text, JSON, Enum as SQLEnum, UniqueConstraint, Index
from sqlalchemy.orm import Mapped, mapped_column, relationship
from typing import Optional, List
import enum
from app.db.session import Base
class SourceType(str, enum.Enum):
"""來源類型"""
SUBSCRIPTION = "subscription"
PUBLIC = "public"
class CrawlStatus(str, enum.Enum):
"""抓取任務狀態"""
PENDING = "pending"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
class NewsSource(Base):
"""新聞來源表"""
__tablename__ = "news_sources"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
code: Mapped[str] = mapped_column(String(30), unique=True, nullable=False, comment="來源代碼")
name: Mapped[str] = mapped_column(String(100), nullable=False, comment="來源名稱")
base_url: Mapped[str] = mapped_column(String(255), nullable=False, comment="網站基礎URL")
source_type: Mapped[SourceType] = mapped_column(SQLEnum(SourceType), nullable=False, comment="來源類型")
login_username: Mapped[Optional[str]] = mapped_column(String(100), comment="登入帳號")
login_password_encrypted: Mapped[Optional[str]] = mapped_column(String(255), comment="加密後密碼")
is_active: Mapped[bool] = mapped_column(Boolean, default=True, comment="是否啟用")
crawl_config: Mapped[Optional[dict]] = mapped_column(JSON, comment="爬蟲設定")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
# 關聯
articles: Mapped[List["NewsArticle"]] = relationship(back_populates="source")
crawl_jobs: Mapped[List["CrawlJob"]] = relationship(back_populates="source")
class NewsArticle(Base):
"""新聞文章表"""
__tablename__ = "news_articles"
__table_args__ = (
UniqueConstraint("source_id", "external_id", name="uk_source_external"),
Index("idx_articles_published", "published_at"),
Index("idx_articles_crawled", "crawled_at"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
source_id: Mapped[int] = mapped_column(ForeignKey("news_sources.id"), nullable=False, comment="來源ID")
external_id: Mapped[Optional[str]] = mapped_column(String(100), comment="外部文章ID")
title: Mapped[str] = mapped_column(String(500), nullable=False, comment="文章標題")
content: Mapped[Optional[str]] = mapped_column(Text, comment="文章全文")
summary: Mapped[Optional[str]] = mapped_column(Text, comment="原文摘要")
url: Mapped[str] = mapped_column(String(500), nullable=False, comment="原文連結")
author: Mapped[Optional[str]] = mapped_column(String(100), comment="作者")
published_at: Mapped[Optional[datetime]] = mapped_column(comment="發布時間")
crawled_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, comment="抓取時間")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
source: Mapped["NewsSource"] = relationship(back_populates="articles")
group_matches: Mapped[List["ArticleGroupMatch"]] = relationship(back_populates="article", cascade="all, delete-orphan")
report_articles: Mapped[List["ReportArticle"]] = relationship(back_populates="article")
class CrawlJob(Base):
"""抓取任務記錄表"""
__tablename__ = "crawl_jobs"
__table_args__ = (
Index("idx_crawl_jobs_status", "status"),
Index("idx_crawl_jobs_scheduled", "scheduled_at"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
source_id: Mapped[int] = mapped_column(ForeignKey("news_sources.id"), nullable=False, comment="來源ID")
status: Mapped[CrawlStatus] = mapped_column(SQLEnum(CrawlStatus), default=CrawlStatus.PENDING)
scheduled_at: Mapped[datetime] = mapped_column(nullable=False, comment="排程時間")
started_at: Mapped[Optional[datetime]] = mapped_column(comment="開始時間")
completed_at: Mapped[Optional[datetime]] = mapped_column(comment="完成時間")
articles_count: Mapped[int] = mapped_column(default=0, comment="抓取文章數")
error_message: Mapped[Optional[str]] = mapped_column(Text, comment="錯誤訊息")
retry_count: Mapped[int] = mapped_column(default=0, comment="重試次數")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
source: Mapped["NewsSource"] = relationship(back_populates="crawl_jobs")
# 避免循環引入
from app.models.group import ArticleGroupMatch
from app.models.report import ReportArticle

79
app/models/report.py Normal file
View File

@@ -0,0 +1,79 @@
"""
報告資料模型
"""
from datetime import datetime, date
from sqlalchemy import String, Boolean, ForeignKey, Text, Date, Enum as SQLEnum, UniqueConstraint, Index
from sqlalchemy.orm import Mapped, mapped_column, relationship
from typing import Optional, List
import enum
from app.db.session import Base
class ReportStatus(str, enum.Enum):
"""報告狀態"""
DRAFT = "draft"
PENDING = "pending"
PUBLISHED = "published"
DELAYED = "delayed"
class Report(Base):
"""報告表"""
__tablename__ = "reports"
__table_args__ = (
UniqueConstraint("group_id", "report_date", name="uk_group_date"),
Index("idx_reports_status", "status"),
Index("idx_reports_date", "report_date"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
group_id: Mapped[int] = mapped_column(ForeignKey("groups.id"), nullable=False, comment="所屬群組ID")
title: Mapped[str] = mapped_column(String(200), nullable=False, comment="報告標題")
report_date: Mapped[date] = mapped_column(Date, nullable=False, comment="報告日期")
ai_summary: Mapped[Optional[str]] = mapped_column(Text, comment="AI綜合摘要")
edited_summary: Mapped[Optional[str]] = mapped_column(Text, comment="編輯後摘要")
status: Mapped[ReportStatus] = mapped_column(SQLEnum(ReportStatus), default=ReportStatus.DRAFT, comment="狀態")
published_at: Mapped[Optional[datetime]] = mapped_column(comment="發布時間")
published_by: Mapped[Optional[int]] = mapped_column(ForeignKey("users.id"), comment="發布者ID")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
# 關聯
group: Mapped["Group"] = relationship(back_populates="reports")
report_articles: Mapped[List["ReportArticle"]] = relationship(back_populates="report", cascade="all, delete-orphan")
favorites: Mapped[List["Favorite"]] = relationship(back_populates="report", cascade="all, delete-orphan")
comments: Mapped[List["Comment"]] = relationship(back_populates="report", cascade="all, delete-orphan")
notes: Mapped[List["Note"]] = relationship(back_populates="report", cascade="all, delete-orphan")
notifications: Mapped[List["NotificationLog"]] = relationship(back_populates="report")
@property
def final_summary(self) -> str:
"""取得最終摘要(優先使用編輯後版本)"""
return self.edited_summary or self.ai_summary or ""
class ReportArticle(Base):
"""報告-新聞關聯表"""
__tablename__ = "report_articles"
__table_args__ = (
UniqueConstraint("report_id", "article_id", name="uk_report_article"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
report_id: Mapped[int] = mapped_column(ForeignKey("reports.id", ondelete="CASCADE"), nullable=False)
article_id: Mapped[int] = mapped_column(ForeignKey("news_articles.id"), nullable=False)
is_included: Mapped[bool] = mapped_column(Boolean, default=True, comment="是否納入報告")
display_order: Mapped[int] = mapped_column(default=0, comment="顯示順序")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
report: Mapped["Report"] = relationship(back_populates="report_articles")
article: Mapped["NewsArticle"] = relationship(back_populates="report_articles")
# 避免循環引入
from app.models.group import Group
from app.models.news import NewsArticle
from app.models.interaction import Favorite, Comment, Note
from app.models.system import NotificationLog

103
app/models/system.py Normal file
View File

@@ -0,0 +1,103 @@
"""
系統設定與日誌資料模型
"""
from datetime import datetime
from sqlalchemy import String, ForeignKey, Text, JSON, Enum as SQLEnum, Index
from sqlalchemy.orm import Mapped, mapped_column, relationship
from typing import Optional, TYPE_CHECKING
import enum
from app.db.session import Base
if TYPE_CHECKING:
from app.models.user import User
from app.models.report import Report
class SettingType(str, enum.Enum):
"""設定值類型"""
STRING = "string"
NUMBER = "number"
BOOLEAN = "boolean"
JSON = "json"
class NotificationType(str, enum.Enum):
"""通知類型"""
EMAIL = "email"
SYSTEM = "system"
class NotificationStatus(str, enum.Enum):
"""通知狀態"""
PENDING = "pending"
SENT = "sent"
FAILED = "failed"
class SystemSetting(Base):
"""系統設定表"""
__tablename__ = "system_settings"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
setting_key: Mapped[str] = mapped_column(String(50), unique=True, nullable=False, comment="設定鍵")
setting_value: Mapped[Optional[str]] = mapped_column(Text, comment="設定值")
setting_type: Mapped[SettingType] = mapped_column(SQLEnum(SettingType), default=SettingType.STRING)
description: Mapped[Optional[str]] = mapped_column(String(200), comment="設定描述")
updated_by: Mapped[Optional[int]] = mapped_column(ForeignKey("users.id"), comment="更新者ID")
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
def get_value(self):
"""取得轉換後的設定值"""
if self.setting_value is None:
return None
if self.setting_type == SettingType.NUMBER:
return float(self.setting_value) if '.' in self.setting_value else int(self.setting_value)
if self.setting_type == SettingType.BOOLEAN:
return self.setting_value.lower() in ('true', '1', 'yes')
if self.setting_type == SettingType.JSON:
import json
return json.loads(self.setting_value)
return self.setting_value
class AuditLog(Base):
"""操作日誌表"""
__tablename__ = "audit_logs"
__table_args__ = (
Index("idx_audit_user", "user_id"),
Index("idx_audit_action", "action"),
Index("idx_audit_created", "created_at"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
user_id: Mapped[Optional[int]] = mapped_column(ForeignKey("users.id"), comment="操作用戶ID")
action: Mapped[str] = mapped_column(String(50), nullable=False, comment="操作類型")
target_type: Mapped[Optional[str]] = mapped_column(String(50), comment="目標類型")
target_id: Mapped[Optional[str]] = mapped_column(String(50), comment="目標ID")
details: Mapped[Optional[dict]] = mapped_column(JSON, comment="操作詳情")
ip_address: Mapped[Optional[str]] = mapped_column(String(45), comment="IP地址")
user_agent: Mapped[Optional[str]] = mapped_column(String(500), comment="User Agent")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
class NotificationLog(Base):
"""通知記錄表"""
__tablename__ = "notification_logs"
__table_args__ = (
Index("idx_notification_status", "status"),
)
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
user_id: Mapped[int] = mapped_column(ForeignKey("users.id"), nullable=False)
report_id: Mapped[Optional[int]] = mapped_column(ForeignKey("reports.id"), comment="關聯報告ID")
notification_type: Mapped[NotificationType] = mapped_column(SQLEnum(NotificationType), default=NotificationType.EMAIL)
subject: Mapped[Optional[str]] = mapped_column(String(200), comment="通知標題")
content: Mapped[Optional[str]] = mapped_column(Text, comment="通知內容")
status: Mapped[NotificationStatus] = mapped_column(SQLEnum(NotificationStatus), default=NotificationStatus.PENDING)
sent_at: Mapped[Optional[datetime]] = mapped_column()
error_message: Mapped[Optional[str]] = mapped_column(Text)
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
# 關聯
report: Mapped[Optional["Report"]] = relationship(back_populates="notifications")

59
app/models/user.py Normal file
View File

@@ -0,0 +1,59 @@
"""
用戶與角色資料模型
"""
from datetime import datetime
from sqlalchemy import String, Boolean, ForeignKey, Text, Enum as SQLEnum
from sqlalchemy.orm import Mapped, mapped_column, relationship
from typing import Optional, List
import enum
from app.db.session import Base
class AuthType(str, enum.Enum):
"""認證類型"""
AD = "ad"
LOCAL = "local"
class Role(Base):
"""角色表"""
__tablename__ = "roles"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
code: Mapped[str] = mapped_column(String(20), unique=True, nullable=False, comment="角色代碼")
name: Mapped[str] = mapped_column(String(50), nullable=False, comment="角色名稱")
description: Mapped[Optional[str]] = mapped_column(String(200), comment="角色描述")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
# 關聯
users: Mapped[List["User"]] = relationship(back_populates="role")
class User(Base):
"""用戶表"""
__tablename__ = "users"
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
username: Mapped[str] = mapped_column(String(50), unique=True, nullable=False, index=True, comment="用戶帳號")
password_hash: Mapped[Optional[str]] = mapped_column(String(255), comment="密碼雜湊")
display_name: Mapped[str] = mapped_column(String(100), nullable=False, comment="顯示名稱")
email: Mapped[Optional[str]] = mapped_column(String(100), comment="電子郵件")
auth_type: Mapped[AuthType] = mapped_column(SQLEnum(AuthType), default=AuthType.LOCAL, nullable=False, comment="認證類型")
role_id: Mapped[int] = mapped_column(ForeignKey("roles.id"), nullable=False, comment="角色ID")
is_active: Mapped[bool] = mapped_column(Boolean, default=True, comment="是否啟用")
last_login_at: Mapped[Optional[datetime]] = mapped_column(comment="最後登入時間")
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(default=datetime.utcnow, onupdate=datetime.utcnow)
# 關聯
role: Mapped["Role"] = relationship(back_populates="users")
subscriptions: Mapped[List["Subscription"]] = relationship(back_populates="user", cascade="all, delete-orphan")
favorites: Mapped[List["Favorite"]] = relationship(back_populates="user", cascade="all, delete-orphan")
comments: Mapped[List["Comment"]] = relationship(back_populates="user")
notes: Mapped[List["Note"]] = relationship(back_populates="user", cascade="all, delete-orphan")
# 避免循環引入
from app.models.interaction import Subscription, Favorite, Comment, Note

70
app/schemas/group.py Normal file
View File

@@ -0,0 +1,70 @@
"""
群組與關鍵字 Pydantic Schema
"""
from datetime import datetime
from typing import Optional, Literal
from pydantic import BaseModel, Field
from app.schemas.user import PaginationResponse
# ===== Keyword =====
class KeywordBase(BaseModel):
keyword: str = Field(..., max_length=100)
class KeywordCreate(KeywordBase):
pass
class KeywordResponse(KeywordBase):
id: int
is_active: bool
class Config:
from_attributes = True
# ===== Group =====
class GroupBase(BaseModel):
name: str = Field(..., max_length=100)
description: Optional[str] = None
category: Literal["industry", "topic"]
class GroupCreate(GroupBase):
ai_background: Optional[str] = None
ai_prompt: Optional[str] = None
keywords: Optional[list[str]] = None
class GroupUpdate(BaseModel):
name: Optional[str] = Field(None, max_length=100)
description: Optional[str] = None
category: Optional[Literal["industry", "topic"]] = None
ai_background: Optional[str] = None
ai_prompt: Optional[str] = None
is_active: Optional[bool] = None
class GroupResponse(GroupBase):
id: int
is_active: bool
keyword_count: Optional[int] = 0
subscriber_count: Optional[int] = 0
class Config:
from_attributes = True
class GroupDetailResponse(GroupResponse):
ai_background: Optional[str] = None
ai_prompt: Optional[str] = None
keywords: list[KeywordResponse] = []
created_at: datetime
updated_at: datetime
class GroupListResponse(BaseModel):
data: list[GroupResponse]
pagination: PaginationResponse

126
app/schemas/report.py Normal file
View File

@@ -0,0 +1,126 @@
"""
報告相關 Pydantic Schema
"""
from datetime import datetime, date
from typing import Optional, Literal
from pydantic import BaseModel, Field
from app.schemas.user import PaginationResponse
# ===== Article (簡化版) =====
class ArticleBrief(BaseModel):
id: int
title: str
source_name: str
url: str
published_at: Optional[datetime] = None
class Config:
from_attributes = True
class ArticleInReport(ArticleBrief):
is_included: bool = True
# ===== Report =====
class ReportBase(BaseModel):
title: str = Field(..., max_length=200)
class ReportUpdate(BaseModel):
title: Optional[str] = Field(None, max_length=200)
edited_summary: Optional[str] = None
article_selections: Optional[list[dict]] = None # [{article_id: int, is_included: bool}]
class GroupBrief(BaseModel):
id: int
name: str
category: str
class Config:
from_attributes = True
class ReportResponse(ReportBase):
id: int
report_date: date
status: Literal["draft", "pending", "published", "delayed"]
group: GroupBrief
article_count: Optional[int] = 0
published_at: Optional[datetime] = None
class Config:
from_attributes = True
class ReportDetailResponse(ReportResponse):
ai_summary: Optional[str] = None
edited_summary: Optional[str] = None
articles: list[ArticleInReport] = []
is_favorited: Optional[bool] = False
comment_count: Optional[int] = 0
created_at: datetime
updated_at: datetime
class ReportReviewResponse(ReportResponse):
"""專員審核用"""
ai_summary: Optional[str] = None
edited_summary: Optional[str] = None
articles: list[ArticleInReport] = []
class ReportListResponse(BaseModel):
data: list[ReportResponse]
pagination: PaginationResponse
class PublishResponse(BaseModel):
published_at: datetime
notifications_sent: int
class RegenerateSummaryResponse(BaseModel):
ai_summary: str
# ===== Article Full =====
class ArticleSourceBrief(BaseModel):
id: int
name: str
class Config:
from_attributes = True
class ArticleResponse(BaseModel):
id: int
title: str
source: ArticleSourceBrief
url: str
published_at: Optional[datetime] = None
crawled_at: datetime
class Config:
from_attributes = True
class MatchedGroup(BaseModel):
group_id: int
group_name: str
matched_keywords: list[str]
class ArticleDetailResponse(ArticleResponse):
content: Optional[str] = None
summary: Optional[str] = None
author: Optional[str] = None
matched_groups: list[MatchedGroup] = []
class ArticleListResponse(BaseModel):
data: list[ArticleResponse]
pagination: PaginationResponse

88
app/schemas/user.py Normal file
View File

@@ -0,0 +1,88 @@
"""
用戶相關 Pydantic Schema
"""
from datetime import datetime
from typing import Optional, Literal
from pydantic import BaseModel, EmailStr, Field
# ===== Pagination =====
class PaginationResponse(BaseModel):
page: int
limit: int
total: int
total_pages: int
# ===== Role =====
class RoleBase(BaseModel):
code: str
name: str
description: Optional[str] = None
class RoleResponse(RoleBase):
id: int
class Config:
from_attributes = True
# ===== User =====
class UserBase(BaseModel):
username: str = Field(..., min_length=2, max_length=50)
display_name: str = Field(..., min_length=1, max_length=100)
email: Optional[EmailStr] = None
class UserCreate(UserBase):
password: Optional[str] = Field(None, min_length=6, description="本地帳號必填")
auth_type: Literal["ad", "local"] = "local"
role_id: int
class UserUpdate(BaseModel):
display_name: Optional[str] = Field(None, max_length=100)
email: Optional[EmailStr] = None
role_id: Optional[int] = None
is_active: Optional[bool] = None
password: Optional[str] = Field(None, min_length=6, description="僅本地帳號可修改")
class UserResponse(UserBase):
id: int
auth_type: str
role: RoleResponse
is_active: bool
last_login_at: Optional[datetime] = None
created_at: datetime
class Config:
from_attributes = True
class UserListResponse(BaseModel):
data: list[UserResponse]
pagination: "PaginationResponse"
# ===== Auth =====
class LoginRequest(BaseModel):
username: str
password: str
auth_type: Literal["ad", "local"] = "ad"
class LoginResponse(BaseModel):
token: str
user: UserResponse
class TokenPayload(BaseModel):
user_id: int
username: str
role: str
exp: datetime

19
app/services/__init__.py Normal file
View File

@@ -0,0 +1,19 @@
"""
服務模組
"""
from app.services.llm_service import generate_summary, test_llm_connection
from app.services.notification_service import send_email, send_report_notifications
from app.services.crawler_service import get_crawler, BaseCrawler
from app.services.scheduler_service import init_scheduler, shutdown_scheduler, run_daily_crawl
__all__ = [
"generate_summary",
"test_llm_connection",
"send_email",
"send_report_notifications",
"get_crawler",
"BaseCrawler",
"init_scheduler",
"shutdown_scheduler",
"run_daily_crawl"
]

View File

@@ -0,0 +1,322 @@
"""
新聞爬蟲服務模組
支援 Digitimes、經濟日報、工商時報
"""
import time
import re
from datetime import datetime, date
from typing import Optional, List, Dict, Any
from abc import ABC, abstractmethod
import httpx
from bs4 import BeautifulSoup
from tenacity import retry, stop_after_attempt, wait_exponential
import logging
from app.core.config import settings
logger = logging.getLogger(__name__)
class BaseCrawler(ABC):
"""爬蟲基礎類別"""
def __init__(self):
self.session = httpx.Client(
timeout=30,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
)
self.delay = settings.crawl_request_delay
def _wait(self):
"""請求間隔"""
time.sleep(self.delay)
@abstractmethod
def get_article_list(self, keywords: List[str]) -> List[Dict[str, Any]]:
"""取得文章列表"""
pass
@abstractmethod
def get_article_content(self, url: str) -> Optional[str]:
"""取得文章內容"""
pass
def close(self):
"""關閉連線"""
self.session.close()
class DigitimesCrawler(BaseCrawler):
"""Digitimes 爬蟲(付費訂閱)"""
BASE_URL = "https://www.digitimes.com.tw"
def __init__(self, username: str, password: str):
super().__init__()
self.username = username
self.password = password
self.is_logged_in = False
def login(self) -> bool:
"""登入 Digitimes"""
try:
# 取得登入頁面
login_page = self.session.get(f"{self.BASE_URL}/member/login.asp")
# 發送登入請求
login_data = {
"uid": self.username,
"pwd": self.password,
"remember": "1"
}
response = self.session.post(
f"{self.BASE_URL}/member/login_check.asp",
data=login_data,
follow_redirects=True
)
# 檢查是否登入成功(根據回應判斷)
self.is_logged_in = "logout" in response.text.lower() or response.status_code == 200
return self.is_logged_in
except Exception as e:
logger.error("Digitimes 登入失敗", exc_info=True)
return False
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
def get_article_list(self, keywords: List[str]) -> List[Dict[str, Any]]:
"""取得文章列表"""
if not self.is_logged_in:
self.login()
articles = []
for keyword in keywords:
self._wait()
try:
# 搜尋 API
search_url = f"{self.BASE_URL}/search/search_result.asp?query={keyword}"
response = self.session.get(search_url)
soup = BeautifulSoup(response.text, "lxml")
# 解析搜尋結果
for item in soup.select(".search-result-item, .news-item"):
title_elem = item.select_one("h3 a, .title a")
if not title_elem:
continue
title = title_elem.get_text(strip=True)
url = title_elem.get("href", "")
if not url.startswith("http"):
url = f"{self.BASE_URL}{url}"
# 取得日期
date_elem = item.select_one(".date, .time")
pub_date = None
if date_elem:
date_text = date_elem.get_text(strip=True)
try:
pub_date = datetime.strptime(date_text, "%Y/%m/%d")
except:
pass
# 只取今天的新聞
if pub_date and pub_date.date() == date.today():
articles.append({
"title": title,
"url": url,
"published_at": pub_date,
"source": "digitimes"
})
except Exception as e:
logger.warning(f"Digitimes 抓取失敗 (關鍵字: {keyword})", exc_info=True)
return articles
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
def get_article_content(self, url: str) -> Optional[str]:
"""取得文章內容"""
if not self.is_logged_in:
self.login()
try:
self._wait()
response = self.session.get(url)
soup = BeautifulSoup(response.text, "lxml")
# 嘗試多個內容選擇器
content_selectors = [".article-body", ".content", "#article-content", ".main-content"]
for selector in content_selectors:
content_elem = soup.select_one(selector)
if content_elem:
# 移除不需要的元素
for unwanted in content_elem.select("script, style, .ad, .advertisement"):
unwanted.decompose()
return content_elem.get_text(separator="\n", strip=True)
return None
except Exception as e:
logger.warning("Digitimes 內容抓取失敗", exc_info=True)
return None
class UDNCrawler(BaseCrawler):
"""經濟日報爬蟲"""
BASE_URL = "https://money.udn.com"
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
def get_article_list(self, keywords: List[str]) -> List[Dict[str, Any]]:
"""取得文章列表"""
articles = []
for keyword in keywords:
self._wait()
try:
search_url = f"{self.BASE_URL}/search/result/1/{keyword}"
response = self.session.get(search_url)
soup = BeautifulSoup(response.text, "lxml")
for item in soup.select(".story-list__news, .news-item"):
title_elem = item.select_one("h3 a, .story-list__text a")
if not title_elem:
continue
title = title_elem.get_text(strip=True)
url = title_elem.get("href", "")
if not url.startswith("http"):
url = f"{self.BASE_URL}{url}"
date_elem = item.select_one("time, .story-list__time")
pub_date = None
if date_elem:
date_text = date_elem.get_text(strip=True)
try:
pub_date = datetime.strptime(date_text[:10], "%Y-%m-%d")
except:
pass
if pub_date and pub_date.date() == date.today():
articles.append({
"title": title,
"url": url,
"published_at": pub_date,
"source": "udn"
})
except Exception as e:
logger.warning(f"經濟日報抓取失敗 (關鍵字: {keyword})", exc_info=True)
return articles
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
def get_article_content(self, url: str) -> Optional[str]:
"""取得文章內容"""
try:
self._wait()
response = self.session.get(url)
soup = BeautifulSoup(response.text, "lxml")
content_elem = soup.select_one("#story_body_content, .article-content")
if content_elem:
for unwanted in content_elem.select("script, style, .ad"):
unwanted.decompose()
return content_elem.get_text(separator="\n", strip=True)
return None
except Exception as e:
logger.warning("經濟日報內容抓取失敗", exc_info=True)
return None
class CTEECrawler(BaseCrawler):
"""工商時報爬蟲"""
BASE_URL = "https://ctee.com.tw"
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
def get_article_list(self, keywords: List[str]) -> List[Dict[str, Any]]:
"""取得文章列表"""
articles = []
for keyword in keywords:
self._wait()
try:
search_url = f"{self.BASE_URL}/?s={keyword}"
response = self.session.get(search_url)
soup = BeautifulSoup(response.text, "lxml")
for item in soup.select(".post-item, article.post"):
title_elem = item.select_one("h2 a, .post-title a")
if not title_elem:
continue
title = title_elem.get_text(strip=True)
url = title_elem.get("href", "")
date_elem = item.select_one("time, .post-date")
pub_date = None
if date_elem:
date_text = date_elem.get("datetime", date_elem.get_text(strip=True))
try:
pub_date = datetime.fromisoformat(date_text[:10])
except:
pass
if pub_date and pub_date.date() == date.today():
articles.append({
"title": title,
"url": url,
"published_at": pub_date,
"source": "ctee"
})
except Exception as e:
logger.warning(f"工商時報抓取失敗 (關鍵字: {keyword})", exc_info=True)
return articles
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10))
def get_article_content(self, url: str) -> Optional[str]:
"""取得文章內容"""
try:
self._wait()
response = self.session.get(url)
soup = BeautifulSoup(response.text, "lxml")
content_elem = soup.select_one(".entry-content, .post-content")
if content_elem:
for unwanted in content_elem.select("script, style, .ad"):
unwanted.decompose()
return content_elem.get_text(separator="\n", strip=True)
return None
except Exception as e:
logger.warning("工商時報內容抓取失敗", exc_info=True)
return None
def get_crawler(source_code: str) -> BaseCrawler:
"""取得對應的爬蟲實例"""
if source_code == "digitimes":
return DigitimesCrawler(
settings.digitimes_username,
settings.digitimes_password
)
elif source_code == "udn":
return UDNCrawler()
elif source_code == "ctee":
return CTEECrawler()
else:
raise ValueError(f"不支援的新聞來源: {source_code}")

176
app/services/llm_service.py Normal file
View File

@@ -0,0 +1,176 @@
"""
LLM 服務模組
支援 Google Gemini、OpenAI、Ollama
"""
import time
from typing import Optional
import httpx
from app.core.config import settings
def get_llm_client():
"""取得 LLM 客戶端"""
provider = settings.llm_provider
if provider == "gemini":
import google.generativeai as genai
genai.configure(api_key=settings.gemini_api_key)
return genai
elif provider == "openai":
from openai import OpenAI
return OpenAI(api_key=settings.openai_api_key)
elif provider == "ollama":
return None # 使用 httpx 直接呼叫
raise ValueError(f"不支援的 LLM 提供者: {provider}")
def generate_summary(group, articles: list) -> str:
"""
產生 AI 摘要
Args:
group: 群組物件(包含 ai_background, ai_prompt
articles: 新聞文章列表
Returns:
綜合摘要文字
"""
if not articles:
return "無相關新聞可供摘要。"
# 組合文章內容
articles_text = ""
for i, article in enumerate(articles, 1):
articles_text += f"""
---
新聞 {i}{article.title}
來源:{article.source.name if article.source else '未知'}
內容:{article.content[:1000] if article.content else article.summary or '無內容'}
---
"""
# 建立 Prompt
system_prompt = f"""你是一位專業的產業分析師,負責彙整每日新聞並產出精闘的綜合分析報告。
背景資訊:
{group.ai_background or '無特定背景資訊'}
摘要方向:
{group.ai_prompt or '請綜合分析以下新聞的重點、趨勢與潛在影響。'}
"""
user_prompt = f"""請根據以下 {len(articles)} 則新聞,產出一份繁體中文的綜合分析報告:
{articles_text}
請注意:
1. 使用繁體中文
2. 整合相關主題,避免逐條列舉
3. 突出重要趨勢與影響
4. 控制在 500 字以內
"""
provider = settings.llm_provider
try:
if provider == "gemini":
import google.generativeai as genai
genai.configure(api_key=settings.gemini_api_key)
model = genai.GenerativeModel(settings.gemini_model or "gemini-1.5-pro")
response = model.generate_content(
f"{system_prompt}\n\n{user_prompt}",
generation_config={
"temperature": 0.7,
"max_output_tokens": 2048,
"top_p": 0.95,
"top_k": 40
}
)
return response.text
elif provider == "openai":
from openai import OpenAI
client = OpenAI(api_key=settings.openai_api_key)
response = client.chat.completions.create(
model=settings.openai_model or "gpt-4o",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
max_tokens=2048,
temperature=0.7
)
return response.choices[0].message.content
elif provider == "ollama":
response = httpx.post(
f"{settings.ollama_endpoint}/api/generate",
json={
"model": settings.ollama_model or "llama3",
"prompt": f"{system_prompt}\n\n{user_prompt}",
"stream": False,
"options": {
"temperature": 0.7,
"num_predict": 2048,
"top_p": 0.9,
"top_k": 40
}
},
timeout=120
)
return response.json().get("response", "")
except Exception as e:
return f"摘要產生失敗:{str(e)}"
def test_llm_connection(provider: str, model: str) -> dict:
"""
測試 LLM 連線
Returns:
{"success": bool, "response_time_ms": int, "message": str}
"""
start_time = time.time()
try:
if provider == "gemini":
import google.generativeai as genai
genai.configure(api_key=settings.gemini_api_key)
gen_model = genai.GenerativeModel(model)
response = gen_model.generate_content(
"Hello",
generation_config={"max_output_tokens": 10}
)
elapsed = int((time.time() - start_time) * 1000)
return {"success": True, "response_time_ms": elapsed}
elif provider == "openai":
from openai import OpenAI
client = OpenAI(api_key=settings.openai_api_key)
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "Hello"}],
max_tokens=10
)
elapsed = int((time.time() - start_time) * 1000)
return {"success": True, "response_time_ms": elapsed}
elif provider == "ollama":
response = httpx.post(
f"{settings.ollama_endpoint}/api/generate",
json={"model": model, "prompt": "Hello", "stream": False},
timeout=30
)
elapsed = int((time.time() - start_time) * 1000)
if response.status_code == 200:
return {"success": True, "response_time_ms": elapsed}
return {"success": False, "message": f"HTTP {response.status_code}"}
return {"success": False, "message": f"不支援的提供者: {provider}"}
except Exception as e:
elapsed = int((time.time() - start_time) * 1000)
return {"success": False, "response_time_ms": elapsed, "message": str(e)}

View File

@@ -0,0 +1,203 @@
"""
通知服務模組
處理 Email 發送
"""
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from typing import Optional
from html import escape
from sqlalchemy.orm import Session
import logging
from app.core.config import settings
from app.models import Report, Subscription, User, NotificationLog, NotificationStatus
logger = logging.getLogger(__name__)
def send_email(to_email: str, subject: str, html_content: str) -> bool:
"""
發送 Email
Returns:
是否發送成功
"""
if not settings.smtp_host:
logger.warning("SMTP 未設定,跳過發送")
return False
try:
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = f"{settings.smtp_from_name} <{settings.smtp_from_email}>"
msg["To"] = to_email
html_part = MIMEText(html_content, "html", "utf-8")
msg.attach(html_part)
with smtplib.SMTP(settings.smtp_host, settings.smtp_port) as server:
server.starttls()
if settings.smtp_username and settings.smtp_password:
server.login(settings.smtp_username, settings.smtp_password)
server.sendmail(settings.smtp_from_email, to_email, msg.as_string())
return True
except Exception as e:
logger.error("Email 發送失敗", exc_info=True)
return False
def create_report_email_content(report: Report, base_url: str = "") -> str:
"""建立報告通知 Email 內容"""
summary = report.edited_summary or report.ai_summary or "無摘要內容"
# 截取摘要前 500 字
if len(summary) > 500:
summary = summary[:500] + "..."
# 轉義 HTML 特殊字元,防止 XSS
safe_title = escape(report.title)
safe_group_name = escape(report.group.name)
safe_summary = escape(summary)
safe_base_url = escape(base_url)
html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
body {{ font-family: Arial, sans-serif; line-height: 1.6; color: #333; }}
.container {{ max-width: 600px; margin: 0 auto; padding: 20px; }}
.header {{ background: #4a6fa5; color: white; padding: 20px; text-align: center; }}
.content {{ padding: 20px; background: #f9f9f9; }}
.summary {{ background: white; padding: 15px; border-left: 4px solid #4a6fa5; margin: 15px 0; }}
.button {{ display: inline-block; padding: 12px 24px; background: #4a6fa5; color: white; text-decoration: none; border-radius: 4px; }}
.footer {{ text-align: center; padding: 20px; color: #666; font-size: 12px; }}
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1 style="margin:0;">每日報導</h1>
</div>
<div class="content">
<h2>{safe_title}</h2>
<p>
<strong>群組:</strong>{safe_group_name}<br>
<strong>日期:</strong>{report.report_date}
</p>
<div class="summary">
<h3>摘要</h3>
<p>{safe_summary}</p>
</div>
<p style="text-align: center; margin-top: 30px;">
<a href="{safe_base_url}/reports/{report.id}" class="button">閱讀完整報告</a>
</p>
</div>
<div class="footer">
<p>此郵件由每日報導系統自動發送</p>
<p>如不想收到通知,請至系統調整訂閱設定</p>
</div>
</div>
</body>
</html>
"""
return html
def send_report_notifications(db: Session, report: Report) -> int:
"""
發送報告通知給訂閱者
Returns:
發送成功數量
"""
# 取得訂閱此群組的用戶
subscriptions = db.query(Subscription).filter(
Subscription.group_id == report.group_id,
Subscription.email_notify == True
).all()
sent_count = 0
for sub in subscriptions:
user = db.query(User).filter(User.id == sub.user_id).first()
if not user or not user.email or not user.is_active:
continue
# 建立通知記錄
notification = NotificationLog(
user_id=user.id,
report_id=report.id,
notification_type="email",
subject=f"【每日報導】{report.title}",
content=report.edited_summary or report.ai_summary
)
db.add(notification)
# 發送 Email
html_content = create_report_email_content(report)
success = send_email(
user.email,
f"【每日報導】{report.title}",
html_content
)
if success:
notification.status = NotificationStatus.SENT
from datetime import datetime
notification.sent_at = datetime.utcnow()
sent_count += 1
else:
notification.status = NotificationStatus.FAILED
notification.error_message = "發送失敗"
db.commit()
return sent_count
def send_delay_notification(db: Session, report: Report) -> int:
"""
發送延遲發布通知
Returns:
發送成功數量
"""
subscriptions = db.query(Subscription).filter(
Subscription.group_id == report.group_id,
Subscription.email_notify == True
).all()
sent_count = 0
for sub in subscriptions:
user = db.query(User).filter(User.id == sub.user_id).first()
if not user or not user.email or not user.is_active:
continue
# 轉義 HTML 特殊字元,防止 XSS
safe_group_name = escape(report.group.name)
html_content = f"""
<html>
<body>
<h2>報告延遲通知</h2>
<p>您訂閱的「{safe_group_name}」今日報告延遲發布,敬請稍後。</p>
<p>造成不便,敬請見諒。</p>
</body>
</html>
"""
success = send_email(
user.email,
f"【每日報導】{report.group.name} 報告延遲通知",
html_content
)
if success:
sent_count += 1
return sent_count

View File

@@ -0,0 +1,277 @@
"""
排程服務模組
處理每日新聞抓取與報告產生
"""
from datetime import datetime, date
from typing import List
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from sqlalchemy.orm import Session
import logging
from app.db.session import SessionLocal
from app.core.config import settings
from app.models import (
NewsSource, NewsArticle, CrawlJob, CrawlStatus,
Group, Keyword, ArticleGroupMatch, Report, ReportArticle, ReportStatus
)
from app.services.crawler_service import get_crawler
from app.services.llm_service import generate_summary
from app.services.notification_service import send_delay_notification
logger = logging.getLogger(__name__)
scheduler = BackgroundScheduler()
def run_daily_crawl():
"""執行每日新聞抓取"""
logger.info("開始每日新聞抓取...")
db = SessionLocal()
try:
# 取得所有啟用的新聞來源
sources = db.query(NewsSource).filter(NewsSource.is_active == True).all()
# 取得所有關鍵字
all_keywords = db.query(Keyword).filter(Keyword.is_active == True).all()
keywords_list = list(set([kw.keyword for kw in all_keywords]))
for source in sources:
logger.info(f"抓取來源: {source.name}")
# 建立抓取任務記錄
job = CrawlJob(
source_id=source.id,
status=CrawlStatus.RUNNING,
scheduled_at=datetime.now(),
started_at=datetime.now()
)
db.add(job)
db.commit()
try:
# 取得爬蟲
crawler = get_crawler(source.code)
# 抓取文章列表
articles_data = crawler.get_article_list(keywords_list)
articles_count = 0
for article_data in articles_data:
# 檢查是否已存在
existing = db.query(NewsArticle).filter(
NewsArticle.source_id == source.id,
NewsArticle.url == article_data["url"]
).first()
if existing:
continue
# 抓取全文
content = crawler.get_article_content(article_data["url"])
# 儲存文章
article = NewsArticle(
source_id=source.id,
title=article_data["title"],
url=article_data["url"],
content=content,
published_at=article_data.get("published_at"),
crawled_at=datetime.now()
)
db.add(article)
db.commit()
db.refresh(article)
# 關鍵字匹配
match_article_to_groups(db, article)
articles_count += 1
# 更新任務狀態
job.status = CrawlStatus.COMPLETED
job.completed_at = datetime.now()
job.articles_count = articles_count
crawler.close()
except Exception as e:
job.status = CrawlStatus.FAILED
job.completed_at = datetime.now()
job.error_message = str(e)
job.retry_count += 1
logger.error(f"抓取失敗 (來源: {source.name})", exc_info=True)
db.commit()
# 產生今日報告
generate_daily_reports(db)
logger.info("每日新聞抓取完成")
except Exception as e:
logger.error("抓取過程發生錯誤", exc_info=True)
finally:
db.close()
def match_article_to_groups(db: Session, article: NewsArticle):
"""將文章匹配到群組"""
# 取得所有群組及其關鍵字
groups = db.query(Group).filter(Group.is_active == True).all()
article_text = f"{article.title} {article.content or ''}"
for group in groups:
keywords = db.query(Keyword).filter(
Keyword.group_id == group.id,
Keyword.is_active == True
).all()
matched_keywords = []
for kw in keywords:
if kw.keyword.lower() in article_text.lower():
matched_keywords.append(kw.keyword)
if matched_keywords:
# 計算匹配分數
score = len(matched_keywords) / len(keywords) * 100 if keywords else 0
match = ArticleGroupMatch(
article_id=article.id,
group_id=group.id,
matched_keywords=matched_keywords,
match_score=score
)
db.add(match)
db.commit()
def generate_daily_reports(db: Session):
"""產生今日報告"""
logger.info("產生今日報告...")
today = date.today()
groups = db.query(Group).filter(Group.is_active == True).all()
for group in groups:
# 檢查今日報告是否已存在
existing = db.query(Report).filter(
Report.group_id == group.id,
Report.report_date == today
).first()
if existing:
continue
# 取得今日匹配的文章
matches = db.query(ArticleGroupMatch).filter(
ArticleGroupMatch.group_id == group.id
).join(NewsArticle).filter(
NewsArticle.crawled_at >= datetime.combine(today, datetime.min.time())
).all()
if not matches:
continue
# 建立報告
report = Report(
group_id=group.id,
title=f"{group.name}日報 - {today.strftime('%Y/%m/%d')}",
report_date=today,
status=ReportStatus.DRAFT
)
db.add(report)
db.commit()
db.refresh(report)
# 關聯文章
articles = []
for match in matches:
article = db.query(NewsArticle).filter(NewsArticle.id == match.article_id).first()
if article:
ra = ReportArticle(
report_id=report.id,
article_id=article.id,
is_included=True
)
db.add(ra)
articles.append(article)
db.commit()
# 產生 AI 摘要
if articles:
summary = generate_summary(group, articles)
report.ai_summary = summary
report.status = ReportStatus.PENDING
db.commit()
logger.info(f"已產生報告: {report.title} ({len(articles)} 篇文章)")
def check_publish_deadline():
"""檢查發布截止時間"""
db = SessionLocal()
try:
today = date.today()
# 取得尚未發布的報告
pending_reports = db.query(Report).filter(
Report.report_date == today,
Report.status.in_([ReportStatus.DRAFT, ReportStatus.PENDING])
).all()
for report in pending_reports:
report.status = ReportStatus.DELAYED
send_delay_notification(db, report)
db.commit()
finally:
db.close()
def init_scheduler():
"""初始化排程器"""
# 解析排程時間
crawl_time = settings.crawl_schedule_time.split(":")
crawl_hour = int(crawl_time[0])
crawl_minute = int(crawl_time[1])
deadline_time = "09:00".split(":") # 可從設定讀取
deadline_hour = int(deadline_time[0])
deadline_minute = int(deadline_time[1])
# 每日抓取任務
scheduler.add_job(
run_daily_crawl,
CronTrigger(hour=crawl_hour, minute=crawl_minute),
id="daily_crawl",
replace_existing=True
)
# 發布截止時間檢查
scheduler.add_job(
check_publish_deadline,
CronTrigger(hour=deadline_hour, minute=deadline_minute),
id="check_deadline",
replace_existing=True
)
# 啟動排程器
if not scheduler.running:
scheduler.start()
logger.info(f"排程器已啟動: 每日 {settings.crawl_schedule_time} 抓取")
def shutdown_scheduler():
"""關閉排程器"""
if scheduler.running:
scheduler.shutdown()