Initial commit: Daily News App

企業內部新聞彙整與分析系統
- 自動新聞抓取 (Digitimes, 經濟日報, 工商時報)
- AI 智慧摘要 (OpenAI/Claude/Ollama)
- 群組管理與訂閱通知
- 已清理 Python 快取檔案

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
donald
2025-12-03 23:53:24 +08:00
commit db0f0bbfe7
50 changed files with 11883 additions and 0 deletions

425
scripts/init.sql Normal file
View File

@@ -0,0 +1,425 @@
-- ============================================================
-- 每日報導 APP - 資料庫 Schema 設計
-- Database: MySQL 8.0+
-- Charset: utf8mb4
-- ============================================================
-- 建立資料庫
CREATE DATABASE IF NOT EXISTS daily_news_app
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
USE daily_news_app;
-- ============================================================
-- 1. 用戶與權限相關
-- ============================================================
-- 1.1 角色表
CREATE TABLE roles (
id INT PRIMARY KEY AUTO_INCREMENT,
code VARCHAR(20) NOT NULL UNIQUE COMMENT '角色代碼: admin, editor, reader',
name VARCHAR(50) NOT NULL COMMENT '角色名稱',
description VARCHAR(200) COMMENT '角色描述',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
) COMMENT='系統角色表';
-- 預設角色資料
INSERT INTO roles (code, name, description) VALUES
('admin', '系統管理員', 'LLM設定、AD整合、群組管理、用戶管理、系統設定'),
('editor', '市場分析專員', '新聞抓取管理、篩選編輯、報告發布、群組內容設定'),
('reader', '讀者', '訂閱群組、閱讀報告、留言討論、個人收藏、匯出PDF');
-- 1.2 用戶表
CREATE TABLE users (
id INT PRIMARY KEY AUTO_INCREMENT,
username VARCHAR(50) NOT NULL UNIQUE COMMENT '用戶帳號',
password_hash VARCHAR(255) COMMENT '密碼雜湊(本地帳號使用)',
display_name VARCHAR(100) NOT NULL COMMENT '顯示名稱',
email VARCHAR(100) COMMENT '電子郵件',
auth_type ENUM('ad', 'local') NOT NULL DEFAULT 'local' COMMENT '認證類型',
role_id INT NOT NULL COMMENT '角色ID',
is_active BOOLEAN DEFAULT TRUE COMMENT '是否啟用',
last_login_at TIMESTAMP NULL COMMENT '最後登入時間',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (role_id) REFERENCES roles(id)
) COMMENT='用戶表';
CREATE INDEX idx_users_username ON users(username);
CREATE INDEX idx_users_auth_type ON users(auth_type);
-- ============================================================
-- 2. 新聞來源與抓取相關
-- ============================================================
-- 2.1 新聞來源表
CREATE TABLE news_sources (
id INT PRIMARY KEY AUTO_INCREMENT,
code VARCHAR(30) NOT NULL UNIQUE COMMENT '來源代碼',
name VARCHAR(100) NOT NULL COMMENT '來源名稱',
base_url VARCHAR(255) NOT NULL COMMENT '網站基礎URL',
source_type ENUM('subscription', 'public') NOT NULL COMMENT '來源類型:付費訂閱/公開',
login_username VARCHAR(100) COMMENT '登入帳號(付費訂閱用)',
login_password_encrypted VARCHAR(255) COMMENT '加密後密碼',
is_active BOOLEAN DEFAULT TRUE COMMENT '是否啟用',
crawl_config JSON COMMENT '爬蟲設定(選擇器、間隔等)',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
) COMMENT='新聞來源表';
-- 預設新聞來源
INSERT INTO news_sources (code, name, base_url, source_type) VALUES
('digitimes', 'Digitimes', 'https://www.digitimes.com.tw', 'subscription'),
('udn', '經濟日報', 'https://money.udn.com', 'public'),
('ctee', '工商時報', 'https://ctee.com.tw', 'public');
-- 2.2 新聞文章表
CREATE TABLE news_articles (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
source_id INT NOT NULL COMMENT '來源ID',
external_id VARCHAR(100) COMMENT '外部文章ID防重複抓取',
title VARCHAR(500) NOT NULL COMMENT '文章標題',
content LONGTEXT COMMENT '文章全文',
summary TEXT COMMENT '原文摘要',
url VARCHAR(500) NOT NULL COMMENT '原文連結',
author VARCHAR(100) COMMENT '作者',
published_at TIMESTAMP NULL COMMENT '發布時間',
crawled_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '抓取時間',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (source_id) REFERENCES news_sources(id),
UNIQUE KEY uk_source_external (source_id, external_id)
) COMMENT='新聞文章表';
CREATE INDEX idx_articles_published ON news_articles(published_at);
CREATE INDEX idx_articles_crawled ON news_articles(crawled_at);
CREATE FULLTEXT INDEX ft_articles_content ON news_articles(title, content);
-- 2.3 抓取任務記錄表
CREATE TABLE crawl_jobs (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
source_id INT NOT NULL COMMENT '來源ID',
status ENUM('pending', 'running', 'completed', 'failed') DEFAULT 'pending',
scheduled_at TIMESTAMP NOT NULL COMMENT '排程時間',
started_at TIMESTAMP NULL COMMENT '開始時間',
completed_at TIMESTAMP NULL COMMENT '完成時間',
articles_count INT DEFAULT 0 COMMENT '抓取文章數',
error_message TEXT COMMENT '錯誤訊息',
retry_count INT DEFAULT 0 COMMENT '重試次數',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (source_id) REFERENCES news_sources(id)
) COMMENT='抓取任務記錄表';
CREATE INDEX idx_crawl_jobs_status ON crawl_jobs(status);
CREATE INDEX idx_crawl_jobs_scheduled ON crawl_jobs(scheduled_at);
-- ============================================================
-- 3. 群組與關鍵字相關
-- ============================================================
-- 3.1 群組表
CREATE TABLE groups (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(100) NOT NULL COMMENT '群組名稱',
description TEXT COMMENT '群組描述',
category ENUM('industry', 'topic') NOT NULL COMMENT '分類:產業別/議題',
ai_background TEXT COMMENT 'AI背景資訊設定',
ai_prompt TEXT COMMENT 'AI摘要方向提示',
is_active BOOLEAN DEFAULT TRUE COMMENT '是否啟用',
created_by INT COMMENT '建立者ID',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (created_by) REFERENCES users(id)
) COMMENT='群組表';
-- 3.2 關鍵字表
CREATE TABLE keywords (
id INT PRIMARY KEY AUTO_INCREMENT,
group_id INT NOT NULL COMMENT '所屬群組ID',
keyword VARCHAR(100) NOT NULL COMMENT '關鍵字',
is_active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (group_id) REFERENCES groups(id) ON DELETE CASCADE,
UNIQUE KEY uk_group_keyword (group_id, keyword)
) COMMENT='關鍵字表';
CREATE INDEX idx_keywords_keyword ON keywords(keyword);
-- 3.3 新聞-群組關聯表(根據關鍵字匹配)
CREATE TABLE article_group_matches (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
article_id BIGINT NOT NULL,
group_id INT NOT NULL,
matched_keywords JSON COMMENT '匹配到的關鍵字列表',
match_score DECIMAL(5,2) COMMENT '匹配分數',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (article_id) REFERENCES news_articles(id) ON DELETE CASCADE,
FOREIGN KEY (group_id) REFERENCES groups(id) ON DELETE CASCADE,
UNIQUE KEY uk_article_group (article_id, group_id)
) COMMENT='新聞-群組匹配關聯表';
CREATE INDEX idx_matches_group ON article_group_matches(group_id);
-- ============================================================
-- 4. 報告相關
-- ============================================================
-- 4.1 報告表
CREATE TABLE reports (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
group_id INT NOT NULL COMMENT '所屬群組ID',
title VARCHAR(200) NOT NULL COMMENT '報告標題',
report_date DATE NOT NULL COMMENT '報告日期',
ai_summary LONGTEXT COMMENT 'AI綜合摘要',
edited_summary LONGTEXT COMMENT '編輯後摘要(專員修改版)',
status ENUM('draft', 'pending', 'published', 'delayed') DEFAULT 'draft' COMMENT '狀態',
published_at TIMESTAMP NULL COMMENT '發布時間',
published_by INT COMMENT '發布者ID',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (group_id) REFERENCES groups(id),
FOREIGN KEY (published_by) REFERENCES users(id),
UNIQUE KEY uk_group_date (group_id, report_date)
) COMMENT='報告表';
CREATE INDEX idx_reports_status ON reports(status);
CREATE INDEX idx_reports_date ON reports(report_date);
-- 4.2 報告-新聞關聯表
CREATE TABLE report_articles (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
report_id BIGINT NOT NULL,
article_id BIGINT NOT NULL,
is_included BOOLEAN DEFAULT TRUE COMMENT '是否納入報告(專員篩選)',
display_order INT DEFAULT 0 COMMENT '顯示順序',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (report_id) REFERENCES reports(id) ON DELETE CASCADE,
FOREIGN KEY (article_id) REFERENCES news_articles(id),
UNIQUE KEY uk_report_article (report_id, article_id)
) COMMENT='報告-新聞關聯表';
-- ============================================================
-- 5. 讀者互動相關
-- ============================================================
-- 5.1 訂閱表
CREATE TABLE subscriptions (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL,
group_id INT NOT NULL,
email_notify BOOLEAN DEFAULT TRUE COMMENT '是否Email通知',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
FOREIGN KEY (group_id) REFERENCES groups(id) ON DELETE CASCADE,
UNIQUE KEY uk_user_group (user_id, group_id)
) COMMENT='訂閱表';
-- 5.2 收藏表
CREATE TABLE favorites (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL,
report_id BIGINT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
FOREIGN KEY (report_id) REFERENCES reports(id) ON DELETE CASCADE,
UNIQUE KEY uk_user_report (user_id, report_id)
) COMMENT='收藏表';
-- 5.3 留言表
CREATE TABLE comments (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
report_id BIGINT NOT NULL,
user_id INT NOT NULL,
content TEXT NOT NULL COMMENT '留言內容',
parent_id BIGINT COMMENT '父留言ID回覆用',
is_deleted BOOLEAN DEFAULT FALSE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (report_id) REFERENCES reports(id) ON DELETE CASCADE,
FOREIGN KEY (user_id) REFERENCES users(id),
FOREIGN KEY (parent_id) REFERENCES comments(id)
) COMMENT='留言表';
CREATE INDEX idx_comments_report ON comments(report_id);
-- 5.4 個人筆記表
CREATE TABLE notes (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL,
report_id BIGINT NOT NULL,
content TEXT NOT NULL COMMENT '筆記內容',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
FOREIGN KEY (report_id) REFERENCES reports(id) ON DELETE CASCADE
) COMMENT='個人筆記表';
CREATE INDEX idx_notes_user_report ON notes(user_id, report_id);
-- ============================================================
-- 6. 系統設定相關
-- ============================================================
-- 6.1 系統設定表
CREATE TABLE system_settings (
id INT PRIMARY KEY AUTO_INCREMENT,
setting_key VARCHAR(50) NOT NULL UNIQUE COMMENT '設定鍵',
setting_value TEXT COMMENT '設定值',
setting_type ENUM('string', 'number', 'boolean', 'json') DEFAULT 'string',
description VARCHAR(200) COMMENT '設定描述',
updated_by INT COMMENT '更新者ID',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (updated_by) REFERENCES users(id)
) COMMENT='系統設定表';
-- 預設系統設定
INSERT INTO system_settings (setting_key, setting_value, setting_type, description) VALUES
('crawl_schedule_time', '08:00', 'string', '每日抓取排程時間'),
('publish_deadline', '09:00', 'string', '報告發布截止時間'),
('llm_provider', 'claude', 'string', 'LLM提供者: openai/claude/ollama'),
('llm_api_key_encrypted', '', 'string', '加密後的API Key'),
('llm_model', 'claude-3-sonnet', 'string', '使用的模型名稱'),
('llm_ollama_endpoint', 'http://localhost:11434', 'string', 'Ollama端點'),
('data_retention_days', '60', 'number', '資料保留天數'),
('pdf_logo_path', '', 'string', 'PDF Logo檔案路徑'),
('pdf_header_text', '', 'string', 'PDF頁首文字'),
('pdf_footer_text', '', 'string', 'PDF頁尾文字'),
('smtp_host', '', 'string', 'SMTP伺服器'),
('smtp_port', '587', 'number', 'SMTP埠號'),
('smtp_username', '', 'string', 'SMTP帳號'),
('smtp_password_encrypted', '', 'string', '加密後SMTP密碼'),
('smtp_from_email', '', 'string', '寄件者Email'),
('smtp_from_name', '每日報導系統', 'string', '寄件者名稱');
-- 6.2 操作日誌表
CREATE TABLE audit_logs (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
user_id INT COMMENT '操作用戶ID',
action VARCHAR(50) NOT NULL COMMENT '操作類型',
target_type VARCHAR(50) COMMENT '目標類型',
target_id VARCHAR(50) COMMENT '目標ID',
details JSON COMMENT '操作詳情',
ip_address VARCHAR(45) COMMENT 'IP地址',
user_agent VARCHAR(500) COMMENT 'User Agent',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id)
) COMMENT='操作日誌表';
CREATE INDEX idx_audit_user ON audit_logs(user_id);
CREATE INDEX idx_audit_action ON audit_logs(action);
CREATE INDEX idx_audit_created ON audit_logs(created_at);
-- 6.3 通知記錄表
CREATE TABLE notification_logs (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL,
report_id BIGINT COMMENT '關聯報告ID',
notification_type ENUM('email', 'system') DEFAULT 'email',
subject VARCHAR(200) COMMENT '通知標題',
content TEXT COMMENT '通知內容',
status ENUM('pending', 'sent', 'failed') DEFAULT 'pending',
sent_at TIMESTAMP NULL,
error_message TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES users(id),
FOREIGN KEY (report_id) REFERENCES reports(id)
) COMMENT='通知記錄表';
CREATE INDEX idx_notification_status ON notification_logs(status);
-- ============================================================
-- 7. 資料清理事件60天保留
-- ============================================================
-- 建立事件排程器(需確保 event_scheduler=ON
DELIMITER //
CREATE EVENT IF NOT EXISTS cleanup_old_data
ON SCHEDULE EVERY 1 DAY
STARTS CURRENT_TIMESTAMP
DO
BEGIN
DECLARE retention_days INT DEFAULT 60;
-- 取得設定的保留天數
SELECT CAST(setting_value AS UNSIGNED) INTO retention_days
FROM system_settings
WHERE setting_key = 'data_retention_days';
-- 刪除過期的新聞文章(會連帶刪除關聯資料)
DELETE FROM news_articles
WHERE crawled_at < DATE_SUB(NOW(), INTERVAL retention_days DAY);
-- 刪除過期的抓取任務記錄
DELETE FROM crawl_jobs
WHERE created_at < DATE_SUB(NOW(), INTERVAL retention_days DAY);
-- 刪除過期的操作日誌
DELETE FROM audit_logs
WHERE created_at < DATE_SUB(NOW(), INTERVAL retention_days DAY);
-- 刪除過期的通知記錄
DELETE FROM notification_logs
WHERE created_at < DATE_SUB(NOW(), INTERVAL retention_days DAY);
END//
DELIMITER ;
-- ============================================================
-- 8. 視圖View- 常用查詢
-- ============================================================
-- 8.1 今日待審核報告視圖
CREATE VIEW v_pending_reports AS
SELECT
r.id,
r.title,
r.report_date,
r.status,
g.name AS group_name,
g.category,
COUNT(ra.id) AS article_count,
SUM(CASE WHEN ra.is_included = TRUE THEN 1 ELSE 0 END) AS included_count
FROM reports r
JOIN groups g ON r.group_id = g.id
LEFT JOIN report_articles ra ON r.id = ra.report_id
WHERE r.report_date = CURDATE()
GROUP BY r.id;
-- 8.2 用戶訂閱群組視圖
CREATE VIEW v_user_subscriptions AS
SELECT
u.id AS user_id,
u.display_name,
u.email,
g.id AS group_id,
g.name AS group_name,
g.category,
s.email_notify
FROM users u
JOIN subscriptions s ON u.id = s.user_id
JOIN groups g ON s.group_id = g.id
WHERE u.is_active = TRUE AND g.is_active = TRUE;
-- 8.3 報告統計視圖
CREATE VIEW v_report_stats AS
SELECT
r.id AS report_id,
r.title,
r.report_date,
r.status,
g.name AS group_name,
COUNT(DISTINCT f.user_id) AS favorite_count,
COUNT(DISTINCT c.id) AS comment_count,
COUNT(DISTINCT s.user_id) AS subscriber_count
FROM reports r
JOIN groups g ON r.group_id = g.id
LEFT JOIN favorites f ON r.id = f.report_id
LEFT JOIN comments c ON r.id = c.report_id AND c.is_deleted = FALSE
LEFT JOIN subscriptions s ON g.id = s.group_id
GROUP BY r.id;
-- ============================================================
-- END OF SCHEMA
-- ============================================================

130
scripts/init_db_sqlite.py Normal file
View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
資料庫初始化腳本
建立所有表格並插入預設資料
"""
import sys
import os
# Add project root to python path
sys.path.append(os.getcwd())
from app.db.session import init_db, SessionLocal
from app.models import Role, User, NewsSource, SourceType
from app.core.security import get_password_hash
from app.core.config import settings
def seed_default_data():
"""插入預設資料"""
db = SessionLocal()
try:
# 1. 建立角色
roles_data = [
{"code": "admin", "name": "系統管理員", "description": "LLM 設定、AD 整合、群組管理、用戶管理、系統設定"},
{"code": "editor", "name": "市場分析專員", "description": "新聞抓取管理、篩選編輯、報告發布、群組內容設定"},
{"code": "reader", "name": "讀者", "description": "訂閱群組、閱讀報告、留言討論、個人收藏、匯出 PDF"},
]
for role_data in roles_data:
existing = db.query(Role).filter(Role.code == role_data["code"]).first()
if not existing:
role = Role(**role_data)
db.add(role)
print(f" 建立角色: {role_data['name']}")
db.commit()
# 2. 建立管理員帳號
admin_role = db.query(Role).filter(Role.code == "admin").first()
existing_admin = db.query(User).filter(User.username == "admin").first()
if not existing_admin and admin_role:
admin_user = User(
username="admin",
password_hash=get_password_hash(settings.admin_password),
display_name="系統管理員",
email="admin@example.com",
auth_type="local",
role_id=admin_role.id,
is_active=True
)
db.add(admin_user)
print(f" 建立管理員帳號: admin (密碼: {settings.admin_password})")
db.commit()
# 3. 建立新聞來源
sources_data = [
{
"code": "digitimes",
"name": "Digitimes",
"base_url": "https://www.digitimes.com.tw",
"source_type": SourceType.SUBSCRIPTION,
"is_active": True,
},
{
"code": "udn",
"name": "經濟日報",
"base_url": "https://money.udn.com",
"source_type": SourceType.PUBLIC,
"is_active": True,
},
{
"code": "ctee",
"name": "工商時報",
"base_url": "https://ctee.com.tw",
"source_type": SourceType.PUBLIC,
"is_active": True,
},
]
for source_data in sources_data:
existing = db.query(NewsSource).filter(NewsSource.code == source_data["code"]).first()
if not existing:
source = NewsSource(**source_data)
db.add(source)
print(f" 建立新聞來源: {source_data['name']}")
db.commit()
print("預設資料插入完成!")
except Exception as e:
db.rollback()
print(f"插入預設資料失敗: {e}")
raise
finally:
db.close()
if __name__ == "__main__":
print("=" * 50)
print("每日報導 APP - 資料庫初始化")
print("=" * 50)
print("\n1. 建立資料庫表格...")
try:
init_db()
print("資料庫表格建立成功!")
except Exception as e:
print(f"建立表格失敗: {e}")
sys.exit(1)
print("\n2. 插入預設資料...")
try:
seed_default_data()
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
print("\n" + "=" * 50)
print("初始化完成!")
print("=" * 50)
print("\n登入資訊:")
print(f" 帳號: admin")
print(f" 密碼: {settings.admin_password}")
print("\n啟動應用程式:")
print(" python run.py")
print("=" * 50)