Initial commit: HBR 文章爬蟲專案
- Scrapy 爬蟲框架,爬取 HBR 繁體中文文章 - Flask Web 應用程式,提供文章查詢介面 - SQL Server 資料庫整合 - 自動化排程與郵件通知功能 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
235
templates/index.html
Normal file
235
templates/index.html
Normal file
@@ -0,0 +1,235 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-TW">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>HBR 爬蟲系統 - 文章查詢與統計</title>
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js@3.9.1/dist/chart.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<h1>📚 HBR Taiwan 文章爬蟲系統</h1>
|
||||
<div class="header-actions">
|
||||
<button id="refreshBtn" class="btn btn-primary">🔄 重新整理</button>
|
||||
<button id="runCrawlerBtn" class="btn btn-success">🚀 執行爬蟲</button>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<!-- 頁籤導航 -->
|
||||
<div class="tabs">
|
||||
<button class="tab-btn active" data-tab="dashboard">📊 儀表板</button>
|
||||
<button class="tab-btn" data-tab="crawler-config">⚙️ 爬蟲設定</button>
|
||||
</div>
|
||||
|
||||
<!-- 儀表板頁籤 -->
|
||||
<div id="dashboard-tab" class="tab-content active">
|
||||
<!-- 統計面板 -->
|
||||
<section class="stats-section">
|
||||
<h2>📊 統計資訊</h2>
|
||||
<div class="stats-grid">
|
||||
<div class="stat-card">
|
||||
<div class="stat-label">文章總數</div>
|
||||
<div class="stat-value" id="totalArticles">-</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-label">付費文章</div>
|
||||
<div class="stat-value" id="paywalledArticles">-</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-label">免費文章</div>
|
||||
<div class="stat-value" id="freeArticles">-</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-label">分類數量</div>
|
||||
<div class="stat-value" id="categoryCount">-</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="charts-container">
|
||||
<div class="chart-box">
|
||||
<h3>分類分布</h3>
|
||||
<canvas id="categoryChart"></canvas>
|
||||
</div>
|
||||
<div class="chart-box">
|
||||
<h3>作者統計(Top 10)</h3>
|
||||
<canvas id="authorChart"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- 查詢面板 -->
|
||||
<section class="search-section">
|
||||
<h2>🔍 文章查詢</h2>
|
||||
<div class="search-form">
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>關鍵字</label>
|
||||
<input type="text" id="keyword" placeholder="搜尋標題、摘要、內容...">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>分類</label>
|
||||
<select id="category">
|
||||
<option value="">全部</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>標籤</label>
|
||||
<input type="text" id="tag" placeholder="輸入標籤...">
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>開始日期</label>
|
||||
<input type="date" id="startDate">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>結束日期</label>
|
||||
<input type="date" id="endDate">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>付費狀態</label>
|
||||
<select id="isPaywalled">
|
||||
<option value="">全部</option>
|
||||
<option value="0">免費</option>
|
||||
<option value="1">付費</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<button id="searchBtn" class="btn btn-primary">🔍 搜尋</button>
|
||||
<button id="resetBtn" class="btn btn-secondary">🔄 重置</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- 文章列表 -->
|
||||
<section class="articles-section">
|
||||
<div class="articles-header">
|
||||
<h2>📄 文章列表</h2>
|
||||
<div class="pagination-info" id="paginationInfo"></div>
|
||||
</div>
|
||||
<div id="loading" class="loading">載入中...</div>
|
||||
<div id="articlesList" class="articles-list"></div>
|
||||
<div id="pagination" class="pagination"></div>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
<!-- 爬蟲設定頁籤 -->
|
||||
<div id="crawler-config-tab" class="tab-content">
|
||||
<section class="config-section">
|
||||
<h2>⚙️ 爬蟲設定</h2>
|
||||
<div class="config-form">
|
||||
<div class="config-group">
|
||||
<h3>起始 URL 設定</h3>
|
||||
<div class="url-list-container">
|
||||
<div id="urlList" class="url-list">
|
||||
<!-- URL 列表將由 JavaScript 動態生成 -->
|
||||
</div>
|
||||
<button id="addUrlBtn" class="btn btn-secondary">➕ 新增 URL</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="config-group">
|
||||
<h3>爬取設定</h3>
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>下載延遲(秒)</label>
|
||||
<input type="number" id="downloadDelay" min="0" max="10" step="0.5" value="1">
|
||||
<small>建議值:1-3 秒(保守模式)</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>最大深度</label>
|
||||
<input type="number" id="maxDepth" min="1" max="10" value="3">
|
||||
<small>爬取的最大深度層級</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>並發請求數</label>
|
||||
<input type="number" id="concurrentRequests" min="1" max="32" value="16">
|
||||
<small>同時進行的請求數量</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="config-group">
|
||||
<h3>內容過濾設定</h3>
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>
|
||||
<input type="checkbox" id="skipPaywalled" checked>
|
||||
跳過付費文章內容
|
||||
</label>
|
||||
<small>僅標記為付費,不爬取內容</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>
|
||||
<input type="checkbox" id="followPagination" checked>
|
||||
追蹤分頁連結
|
||||
</label>
|
||||
<small>自動追蹤「下一頁」連結</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>
|
||||
<input type="checkbox" id="obeyRobotsTxt" checked>
|
||||
遵守 robots.txt
|
||||
</label>
|
||||
<small>遵守網站的 robots.txt 規則</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="config-group">
|
||||
<h3>CSS 選擇器設定(進階)</h3>
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>文章列表選擇器</label>
|
||||
<input type="text" id="articleListSelector" value=".articleItem, article, .article-item, .post-item, .content-item" placeholder=".articleItem, article, .article-item">
|
||||
<small>用於識別文章列表的 CSS 選擇器(已優化為 HBR Taiwan 實際結構)</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>標題選擇器</label>
|
||||
<input type="text" id="titleSelector" value="h1.articleTitle, h1.article-title, h1, .article-title, .post-title" placeholder="h1.articleTitle, h1, .article-title">
|
||||
<small>用於提取文章標題的 CSS 選擇器(已優化為 HBR Taiwan 實際結構)</small>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-row">
|
||||
<div class="form-group">
|
||||
<label>作者選擇器</label>
|
||||
<input type="text" id="authorSelector" value=".authorName, .author, .byline, .writer, .author-name" placeholder=".authorName, .author, .byline">
|
||||
<small>用於提取作者的 CSS 選擇器(已優化為 HBR Taiwan 實際結構)</small>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label>內容選擇器</label>
|
||||
<input type="text" id="contentSelector" value=".articleContent, .article-content, .post-content, .content, .articleText" placeholder=".articleContent, .article-content, .post-content">
|
||||
<small>用於提取文章內容的 CSS 選擇器(已優化為 HBR Taiwan 實際結構)</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="config-actions">
|
||||
<button id="saveConfigBtn" class="btn btn-primary">💾 儲存設定</button>
|
||||
<button id="loadConfigBtn" class="btn btn-secondary">📂 載入設定</button>
|
||||
<button id="resetConfigBtn" class="btn btn-secondary">🔄 重置為預設值</button>
|
||||
<button id="testConfigBtn" class="btn btn-success">🧪 測試設定</button>
|
||||
</div>
|
||||
|
||||
<div id="configStatus" class="config-status"></div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 模態框 -->
|
||||
<div id="articleModal" class="modal">
|
||||
<div class="modal-content">
|
||||
<span class="close">×</span>
|
||||
<div id="articleDetail"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="{{ url_for('static', filename='app.js') }}"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user