Security: 移除硬編碼的資料庫帳密
Some checks failed
weekly-crawl / crawl-and-mail (push) Has been cancelled

- database.py: 改從環境變數讀取 DB 設定,新增必要變數檢查
- settings.py: 改從環境變數讀取 DB 設定
- test_db_connection.py: 改從環境變數讀取 DB 設定

所有機敏資料現在必須透過 .env 檔案設定,
參考 .env.example 取得設定範本。

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-03 19:03:50 +08:00
parent 5b4041afd5
commit 719ba8c133
3 changed files with 100 additions and 55 deletions

View File

@@ -200,33 +200,64 @@ class DatabaseManager:
def get_database_manager() -> DatabaseManager:
"""
從環境變數或設定檔取得資料庫連線資訊,建立 DatabaseManager 實例
優先順序:
1. 環境變數
2. Scrapy settings如果可用
3. 預設值
環境變數:
DB_HOST: 資料庫主機位址 (必要)
DB_PORT: 資料庫埠號 (預設: 3306)
DB_USER: 資料庫使用者名稱 (必要)
DB_PASSWORD: 資料庫密碼 (必要)
DB_NAME: 資料庫名稱 (必要)
Returns:
DatabaseManager: 資料庫管理物件
Raises:
ValueError: 當必要的環境變數未設定時
"""
import os
from dotenv import load_dotenv
# 載入 .env 檔案
load_dotenv()
# 嘗試從 Scrapy settings 取得設定
try:
from scrapy.utils.project import get_project_settings
settings = get_project_settings()
host = settings.get('DB_HOST', os.environ.get('DB_HOST', 'mysql.theaken.com'))
port = settings.getint('DB_PORT', int(os.environ.get('DB_PORT', 33306)))
user = settings.get('DB_USER', os.environ.get('DB_USER', 'A101'))
password = settings.get('DB_PASSWORD', os.environ.get('DB_PASSWORD', 'Aa123456'))
database = settings.get('DB_NAME', os.environ.get('DB_NAME', 'db_A101'))
host = settings.get('DB_HOST') or os.environ.get('DB_HOST')
port = settings.getint('DB_PORT', int(os.environ.get('DB_PORT', 3306)))
user = settings.get('DB_USER') or os.environ.get('DB_USER')
password = settings.get('DB_PASSWORD') or os.environ.get('DB_PASSWORD')
database = settings.get('DB_NAME') or os.environ.get('DB_NAME')
except:
# 如果無法取得 Scrapy settings使用環境變數或預設值
host = os.environ.get('DB_HOST', 'mysql.theaken.com')
port = int(os.environ.get('DB_PORT', 33306))
user = os.environ.get('DB_USER', 'A101')
password = os.environ.get('DB_PASSWORD', 'Aa123456')
database = os.environ.get('DB_NAME', 'db_A101')
# 如果無法取得 Scrapy settings使用環境變數
host = os.environ.get('DB_HOST')
port = int(os.environ.get('DB_PORT', 3306))
user = os.environ.get('DB_USER')
password = os.environ.get('DB_PASSWORD')
database = os.environ.get('DB_NAME')
# 檢查必要的設定
missing_vars = []
if not host:
missing_vars.append('DB_HOST')
if not user:
missing_vars.append('DB_USER')
if not password:
missing_vars.append('DB_PASSWORD')
if not database:
missing_vars.append('DB_NAME')
if missing_vars:
raise ValueError(
f"缺少必要的環境變數: {', '.join(missing_vars)}\n"
f"請在 .env 檔案或系統環境變數中設定這些值。\n"
f"參考 .env.example 檔案取得設定範本。"
)
return DatabaseManager(host, port, user, password, database)

View File

@@ -60,12 +60,16 @@ ITEM_PIPELINES = {
'hbr_crawler.pipelines.DatabasePipeline': 400,
}
# 資料庫設定
DB_HOST = 'mysql.theaken.com'
DB_PORT = 33306
DB_USER = 'A101'
DB_PASSWORD = 'Aa123456'
DB_NAME = 'db_A101'
# 資料庫設定(從環境變數讀取)
import os
from dotenv import load_dotenv
load_dotenv()
DB_HOST = os.environ.get('DB_HOST')
DB_PORT = int(os.environ.get('DB_PORT', 3306))
DB_USER = os.environ.get('DB_USER')
DB_PASSWORD = os.environ.get('DB_PASSWORD')
DB_NAME = os.environ.get('DB_NAME')
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html