Files
hbr-crawler/templates/index.html
DonaldFang 方士碩 f524713cb6 Initial commit: HBR 文章爬蟲專案
- Scrapy 爬蟲框架,爬取 HBR 繁體中文文章
- Flask Web 應用程式,提供文章查詢介面
- SQL Server 資料庫整合
- 自動化排程與郵件通知功能

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-03 17:19:56 +08:00

236 lines
11 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="zh-TW">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>HBR 爬蟲系統 - 文章查詢與統計</title>
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
<script src="https://cdn.jsdelivr.net/npm/chart.js@3.9.1/dist/chart.min.js"></script>
</head>
<body>
<div class="container">
<header>
<h1>📚 HBR Taiwan 文章爬蟲系統</h1>
<div class="header-actions">
<button id="refreshBtn" class="btn btn-primary">🔄 重新整理</button>
<button id="runCrawlerBtn" class="btn btn-success">🚀 執行爬蟲</button>
</div>
</header>
<!-- 頁籤導航 -->
<div class="tabs">
<button class="tab-btn active" data-tab="dashboard">📊 儀表板</button>
<button class="tab-btn" data-tab="crawler-config">⚙️ 爬蟲設定</button>
</div>
<!-- 儀表板頁籤 -->
<div id="dashboard-tab" class="tab-content active">
<!-- 統計面板 -->
<section class="stats-section">
<h2>📊 統計資訊</h2>
<div class="stats-grid">
<div class="stat-card">
<div class="stat-label">文章總數</div>
<div class="stat-value" id="totalArticles">-</div>
</div>
<div class="stat-card">
<div class="stat-label">付費文章</div>
<div class="stat-value" id="paywalledArticles">-</div>
</div>
<div class="stat-card">
<div class="stat-label">免費文章</div>
<div class="stat-value" id="freeArticles">-</div>
</div>
<div class="stat-card">
<div class="stat-label">分類數量</div>
<div class="stat-value" id="categoryCount">-</div>
</div>
</div>
<div class="charts-container">
<div class="chart-box">
<h3>分類分布</h3>
<canvas id="categoryChart"></canvas>
</div>
<div class="chart-box">
<h3>作者統計Top 10</h3>
<canvas id="authorChart"></canvas>
</div>
</div>
</section>
<!-- 查詢面板 -->
<section class="search-section">
<h2>🔍 文章查詢</h2>
<div class="search-form">
<div class="form-row">
<div class="form-group">
<label>關鍵字</label>
<input type="text" id="keyword" placeholder="搜尋標題、摘要、內容...">
</div>
<div class="form-group">
<label>分類</label>
<select id="category">
<option value="">全部</option>
</select>
</div>
<div class="form-group">
<label>標籤</label>
<input type="text" id="tag" placeholder="輸入標籤...">
</div>
</div>
<div class="form-row">
<div class="form-group">
<label>開始日期</label>
<input type="date" id="startDate">
</div>
<div class="form-group">
<label>結束日期</label>
<input type="date" id="endDate">
</div>
<div class="form-group">
<label>付費狀態</label>
<select id="isPaywalled">
<option value="">全部</option>
<option value="0">免費</option>
<option value="1">付費</option>
</select>
</div>
<div class="form-group">
<button id="searchBtn" class="btn btn-primary">🔍 搜尋</button>
<button id="resetBtn" class="btn btn-secondary">🔄 重置</button>
</div>
</div>
</div>
</section>
<!-- 文章列表 -->
<section class="articles-section">
<div class="articles-header">
<h2>📄 文章列表</h2>
<div class="pagination-info" id="paginationInfo"></div>
</div>
<div id="loading" class="loading">載入中...</div>
<div id="articlesList" class="articles-list"></div>
<div id="pagination" class="pagination"></div>
</section>
</div>
<!-- 爬蟲設定頁籤 -->
<div id="crawler-config-tab" class="tab-content">
<section class="config-section">
<h2>⚙️ 爬蟲設定</h2>
<div class="config-form">
<div class="config-group">
<h3>起始 URL 設定</h3>
<div class="url-list-container">
<div id="urlList" class="url-list">
<!-- URL 列表將由 JavaScript 動態生成 -->
</div>
<button id="addUrlBtn" class="btn btn-secondary"> 新增 URL</button>
</div>
</div>
<div class="config-group">
<h3>爬取設定</h3>
<div class="form-row">
<div class="form-group">
<label>下載延遲(秒)</label>
<input type="number" id="downloadDelay" min="0" max="10" step="0.5" value="1">
<small>建議值1-3 秒(保守模式)</small>
</div>
<div class="form-group">
<label>最大深度</label>
<input type="number" id="maxDepth" min="1" max="10" value="3">
<small>爬取的最大深度層級</small>
</div>
<div class="form-group">
<label>並發請求數</label>
<input type="number" id="concurrentRequests" min="1" max="32" value="16">
<small>同時進行的請求數量</small>
</div>
</div>
</div>
<div class="config-group">
<h3>內容過濾設定</h3>
<div class="form-row">
<div class="form-group">
<label>
<input type="checkbox" id="skipPaywalled" checked>
跳過付費文章內容
</label>
<small>僅標記為付費,不爬取內容</small>
</div>
<div class="form-group">
<label>
<input type="checkbox" id="followPagination" checked>
追蹤分頁連結
</label>
<small>自動追蹤「下一頁」連結</small>
</div>
<div class="form-group">
<label>
<input type="checkbox" id="obeyRobotsTxt" checked>
遵守 robots.txt
</label>
<small>遵守網站的 robots.txt 規則</small>
</div>
</div>
</div>
<div class="config-group">
<h3>CSS 選擇器設定(進階)</h3>
<div class="form-row">
<div class="form-group">
<label>文章列表選擇器</label>
<input type="text" id="articleListSelector" value=".articleItem, article, .article-item, .post-item, .content-item" placeholder=".articleItem, article, .article-item">
<small>用於識別文章列表的 CSS 選擇器(已優化為 HBR Taiwan 實際結構)</small>
</div>
<div class="form-group">
<label>標題選擇器</label>
<input type="text" id="titleSelector" value="h1.articleTitle, h1.article-title, h1, .article-title, .post-title" placeholder="h1.articleTitle, h1, .article-title">
<small>用於提取文章標題的 CSS 選擇器(已優化為 HBR Taiwan 實際結構)</small>
</div>
</div>
<div class="form-row">
<div class="form-group">
<label>作者選擇器</label>
<input type="text" id="authorSelector" value=".authorName, .author, .byline, .writer, .author-name" placeholder=".authorName, .author, .byline">
<small>用於提取作者的 CSS 選擇器(已優化為 HBR Taiwan 實際結構)</small>
</div>
<div class="form-group">
<label>內容選擇器</label>
<input type="text" id="contentSelector" value=".articleContent, .article-content, .post-content, .content, .articleText" placeholder=".articleContent, .article-content, .post-content">
<small>用於提取文章內容的 CSS 選擇器(已優化為 HBR Taiwan 實際結構)</small>
</div>
</div>
</div>
<div class="config-actions">
<button id="saveConfigBtn" class="btn btn-primary">💾 儲存設定</button>
<button id="loadConfigBtn" class="btn btn-secondary">📂 載入設定</button>
<button id="resetConfigBtn" class="btn btn-secondary">🔄 重置為預設值</button>
<button id="testConfigBtn" class="btn btn-success">🧪 測試設定</button>
</div>
<div id="configStatus" class="config-status"></div>
</div>
</section>
</div>
</div>
<!-- 模態框 -->
<div id="articleModal" class="modal">
<div class="modal-content">
<span class="close">&times;</span>
<div id="articleDetail"></div>
</div>
</div>
<script src="{{ url_for('static', filename='app.js') }}"></script>
</body>
</html>