From 719ba8c1333daee6da1e1fc236c51908a76640e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?DonaldFang=20=E6=96=B9=E5=A3=AB=E7=A2=A9?= Date: Wed, 3 Dec 2025 19:03:50 +0800 Subject: [PATCH] =?UTF-8?q?Security:=20=E7=A7=BB=E9=99=A4=E7=A1=AC?= =?UTF-8?q?=E7=B7=A8=E7=A2=BC=E7=9A=84=E8=B3=87=E6=96=99=E5=BA=AB=E5=B8=B3?= =?UTF-8?q?=E5=AF=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - database.py: 改從環境變數讀取 DB 設定,新增必要變數檢查 - settings.py: 改從環境變數讀取 DB 設定 - test_db_connection.py: 改從環境變數讀取 DB 設定 所有機敏資料現在必須透過 .env 檔案設定, 參考 .env.example 取得設定範本。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- hbr_crawler/hbr_crawler/database.py | 63 ++++++++++++++++++------ hbr_crawler/hbr_crawler/settings.py | 16 +++--- test_db_connection.py | 76 ++++++++++++++++------------- 3 files changed, 100 insertions(+), 55 deletions(-) diff --git a/hbr_crawler/hbr_crawler/database.py b/hbr_crawler/hbr_crawler/database.py index 6cfe15c..6d2b515 100644 --- a/hbr_crawler/hbr_crawler/database.py +++ b/hbr_crawler/hbr_crawler/database.py @@ -200,33 +200,64 @@ class DatabaseManager: def get_database_manager() -> DatabaseManager: """ 從環境變數或設定檔取得資料庫連線資訊,建立 DatabaseManager 實例 - + 優先順序: 1. 環境變數 2. Scrapy settings(如果可用) - 3. 預設值 - + + 環境變數: + DB_HOST: 資料庫主機位址 (必要) + DB_PORT: 資料庫埠號 (預設: 3306) + DB_USER: 資料庫使用者名稱 (必要) + DB_PASSWORD: 資料庫密碼 (必要) + DB_NAME: 資料庫名稱 (必要) + Returns: DatabaseManager: 資料庫管理物件 + + Raises: + ValueError: 當必要的環境變數未設定時 """ import os - + from dotenv import load_dotenv + + # 載入 .env 檔案 + load_dotenv() + # 嘗試從 Scrapy settings 取得設定 try: from scrapy.utils.project import get_project_settings settings = get_project_settings() - host = settings.get('DB_HOST', os.environ.get('DB_HOST', 'mysql.theaken.com')) - port = settings.getint('DB_PORT', int(os.environ.get('DB_PORT', 33306))) - user = settings.get('DB_USER', os.environ.get('DB_USER', 'A101')) - password = settings.get('DB_PASSWORD', os.environ.get('DB_PASSWORD', 'Aa123456')) - database = settings.get('DB_NAME', os.environ.get('DB_NAME', 'db_A101')) + host = settings.get('DB_HOST') or os.environ.get('DB_HOST') + port = settings.getint('DB_PORT', int(os.environ.get('DB_PORT', 3306))) + user = settings.get('DB_USER') or os.environ.get('DB_USER') + password = settings.get('DB_PASSWORD') or os.environ.get('DB_PASSWORD') + database = settings.get('DB_NAME') or os.environ.get('DB_NAME') except: - # 如果無法取得 Scrapy settings,使用環境變數或預設值 - host = os.environ.get('DB_HOST', 'mysql.theaken.com') - port = int(os.environ.get('DB_PORT', 33306)) - user = os.environ.get('DB_USER', 'A101') - password = os.environ.get('DB_PASSWORD', 'Aa123456') - database = os.environ.get('DB_NAME', 'db_A101') - + # 如果無法取得 Scrapy settings,使用環境變數 + host = os.environ.get('DB_HOST') + port = int(os.environ.get('DB_PORT', 3306)) + user = os.environ.get('DB_USER') + password = os.environ.get('DB_PASSWORD') + database = os.environ.get('DB_NAME') + + # 檢查必要的設定 + missing_vars = [] + if not host: + missing_vars.append('DB_HOST') + if not user: + missing_vars.append('DB_USER') + if not password: + missing_vars.append('DB_PASSWORD') + if not database: + missing_vars.append('DB_NAME') + + if missing_vars: + raise ValueError( + f"缺少必要的環境變數: {', '.join(missing_vars)}\n" + f"請在 .env 檔案或系統環境變數中設定這些值。\n" + f"參考 .env.example 檔案取得設定範本。" + ) + return DatabaseManager(host, port, user, password, database) diff --git a/hbr_crawler/hbr_crawler/settings.py b/hbr_crawler/hbr_crawler/settings.py index 1cdd5d1..7c8daf8 100644 --- a/hbr_crawler/hbr_crawler/settings.py +++ b/hbr_crawler/hbr_crawler/settings.py @@ -60,12 +60,16 @@ ITEM_PIPELINES = { 'hbr_crawler.pipelines.DatabasePipeline': 400, } -# 資料庫設定 -DB_HOST = 'mysql.theaken.com' -DB_PORT = 33306 -DB_USER = 'A101' -DB_PASSWORD = 'Aa123456' -DB_NAME = 'db_A101' +# 資料庫設定(從環境變數讀取) +import os +from dotenv import load_dotenv +load_dotenv() + +DB_HOST = os.environ.get('DB_HOST') +DB_PORT = int(os.environ.get('DB_PORT', 3306)) +DB_USER = os.environ.get('DB_USER') +DB_PASSWORD = os.environ.get('DB_PASSWORD') +DB_NAME = os.environ.get('DB_NAME') # Enable and configure the AutoThrottle extension (disabled by default) # See https://docs.scrapy.org/en/latest/topics/autothrottle.html diff --git a/test_db_connection.py b/test_db_connection.py index 98bada9..88110ae 100644 --- a/test_db_connection.py +++ b/test_db_connection.py @@ -20,24 +20,35 @@ logger = logging.getLogger(__name__) project_root = Path(__file__).parent sys.path.insert(0, str(project_root)) +from dotenv import load_dotenv from hbr_crawler.hbr_crawler.database import DatabaseManager, get_database_manager -# 資料庫連線資訊 +# 載入 .env 檔案 +load_dotenv() + +# 資料庫連線資訊(從環境變數讀取) DB_CONFIG = { - 'host': 'mysql.theaken.com', - 'port': 33306, - 'user': 'A101', - 'password': 'Aa123456', - 'database': 'db_A101' + 'host': os.environ.get('DB_HOST'), + 'port': int(os.environ.get('DB_PORT', 3306)), + 'user': os.environ.get('DB_USER'), + 'password': os.environ.get('DB_PASSWORD'), + 'database': os.environ.get('DB_NAME') } +# 檢查必要的環境變數 +missing_vars = [k for k, v in DB_CONFIG.items() if v is None and k != 'port'] +if missing_vars: + print(f"錯誤: 缺少必要的環境變數: {', '.join(['DB_' + k.upper() for k in missing_vars])}") + print("請在 .env 檔案中設定這些值,參考 .env.example") + sys.exit(1) + def test_basic_connection(): """測試基本連線(不指定資料庫)""" print("\n" + "="*50) print("測試 1: 基本資料庫連線(不指定資料庫)") print("="*50) - + db_manager = DatabaseManager( host=DB_CONFIG['host'], port=DB_CONFIG['port'], @@ -45,7 +56,7 @@ def test_basic_connection(): password=DB_CONFIG['password'], database=None ) - + if db_manager.test_connection(): print("✓ 基本連線測試成功") return True @@ -57,9 +68,9 @@ def test_basic_connection(): def create_database(): """建立 HBR_scraper 資料庫(如果需要)""" print("\n" + "="*50) - print("測試 2: 檢查資料庫連線(使用現有資料庫 db_A101)") + print("測試 2: 檢查資料庫連線(使用現有資料庫)") print("="*50) - + db_manager = DatabaseManager( host=DB_CONFIG['host'], port=DB_CONFIG['port'], @@ -67,7 +78,7 @@ def create_database(): password=DB_CONFIG['password'], database=None ) - + # 嘗試建立資料庫(可能需要管理員權限) try: if db_manager.create_database('HBR_scraper'): @@ -85,11 +96,11 @@ def create_database(): def test_database_connection(): - """測試連接到 db_A101 資料庫""" + """測試連接到指定資料庫""" print("\n" + "="*50) - print("測試 3: 連接到 db_A101 資料庫") + print(f"測試 3: 連接到 {DB_CONFIG['database']} 資料庫") print("="*50) - + db_manager = DatabaseManager( host=DB_CONFIG['host'], port=DB_CONFIG['port'], @@ -97,7 +108,7 @@ def test_database_connection(): password=DB_CONFIG['password'], database=DB_CONFIG['database'] ) - + if db_manager.test_connection(DB_CONFIG['database']): print("✓ 資料庫連線測試成功") return True @@ -111,7 +122,7 @@ def create_tables(): print("\n" + "="*50) print("測試 4: 建立資料表") print("="*50) - + db_manager = DatabaseManager( host=DB_CONFIG['host'], port=DB_CONFIG['port'], @@ -119,13 +130,13 @@ def create_tables(): password=DB_CONFIG['password'], database=DB_CONFIG['database'] ) - + sql_file = project_root / 'create_tables.sql' - + if not sql_file.exists(): print(f"✗ SQL 檔案不存在: {sql_file}") return False - + if db_manager.execute_sql_file(str(sql_file), DB_CONFIG['database']): print("✓ 資料表建立成功") return True @@ -139,7 +150,7 @@ def verify_tables(): print("\n" + "="*50) print("測試 5: 驗證資料表") print("="*50) - + db_manager = DatabaseManager( host=DB_CONFIG['host'], port=DB_CONFIG['port'], @@ -147,26 +158,26 @@ def verify_tables(): password=DB_CONFIG['password'], database=DB_CONFIG['database'] ) - + expected_tables = ['articles', 'tags', 'article_tags'] - + try: tables = db_manager.execute_query( "SHOW TABLES", database=DB_CONFIG['database'] ) - + # 取得資料表名稱列表 table_names = [list(table.values())[0] for table in tables] - + print(f"找到 {len(table_names)} 個資料表: {', '.join(table_names)}") - + for table in expected_tables: if table in table_names: print(f"✓ 資料表 {table} 存在") else: print(f"✗ 資料表 {table} 不存在") - + return all(table in table_names for table in expected_tables) except Exception as e: print(f"✗ 驗證資料表失敗: {e}") @@ -178,27 +189,27 @@ def main(): print("\n" + "="*60) print("HBR 爬蟲系統 - 資料庫連線測試") print("="*60) - + results = [] - + # 執行測試 results.append(("基本連線", test_basic_connection())) results.append(("建立資料庫", create_database())) results.append(("資料庫連線", test_database_connection())) results.append(("建立資料表", create_tables())) results.append(("驗證資料表", verify_tables())) - + # 顯示測試結果摘要 print("\n" + "="*60) print("測試結果摘要") print("="*60) - + for test_name, result in results: status = "✓ 通過" if result else "✗ 失敗" print(f"{test_name}: {status}") - + all_passed = all(result for _, result in results) - + if all_passed: print("\n✓ 所有測試通過!資料庫設定完成。") return 0 @@ -209,4 +220,3 @@ def main(): if __name__ == '__main__': sys.exit(main()) -