#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 測試Excel翻譯修正效果 """ import sys import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # 設定編碼 sys.stdout.reconfigure(encoding='utf-8') from pathlib import Path from app.services.translation_service import ExcelParser def test_excel_translation_fix(): """測試Excel翻譯修正效果""" print("=" * 80) print("測試Excel翻譯修正效果") print("=" * 80) # 文件路徑 excel_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f0b78200-2c5e-41a4-bac8-1536f92529e9") original_file = excel_dir / "original_panjit_f0b78200.xlsx" if not original_file.exists(): print(f"原始文件不存在: {original_file}") return # 創建解析器實例 parser = ExcelParser(str(original_file)) print("\n1. 測試修正後的should_translate函數") print("-" * 60) # 測試關鍵詞彙 test_texts = [ "製程", # A1儲存格,之前未翻譯 "主要特點", # C1儲存格 "優勢亮點", # D1儲存格 "AB", # 2個英文字母 "123", # 純數字 "工藝", # 2個中文字符 "Epoxy 膠黏(導電/導熱銀膠)" # B3儲存格 ] for text in test_texts: should_translate = parser._should_translate(text, 'auto') has_cjk = parser._has_cjk(text) print(f"'{text}': should_translate={should_translate}, has_cjk={has_cjk}, len={len(text)}") print("\n2. 測試提取的文字片段") print("-" * 60) segments = parser.extract_text_segments() print(f"修正後提取到 {len(segments)} 個文字片段") # 檢查A1是否被包含 a1_content = "製程" if a1_content in segments: print(f"✅ A1內容 '{a1_content}' 已被包含在提取列表中") else: print(f"❌ A1內容 '{a1_content}' 仍未被包含在提取列表中") # 顯示前10個片段 print("\n前10個提取片段:") for i, segment in enumerate(segments[:10]): safe_segment = repr(segment) print(f" {i+1:2d}. {safe_segment}") print("\n3. 測試翻譯快取映射邏輯(模擬)") print("-" * 60) # 模擬翻譯映射過程 from app import create_app app = create_app() with app.app_context(): from sqlalchemy import text as sql_text from app import db target_language = 'ja' # 日文 tmap = {} found_count = 0 print(f"查詢翻譯快取中的 {target_language} 翻譯...") for original_text in segments[:10]: # 只檢查前10個 result = db.session.execute(sql_text(""" SELECT translated_text FROM dt_translation_cache WHERE source_text = :text AND target_language = :lang ORDER BY created_at DESC LIMIT 1 """), {'text': original_text, 'lang': target_language}) row = result.fetchone() if row and row[0]: tmap[original_text] = row[0] print(f"✅ '{original_text[:20]}...' -> '{row[0][:20]}...'") found_count += 1 else: print(f"❌ 未找到翻譯: '{original_text[:30]}...'") print(f"\n翻譯映射結果: {found_count}/{min(10, len(segments))} 個片段找到翻譯") # 特別檢查A1 if a1_content in tmap: print(f"✅ A1內容 '{a1_content}' 的翻譯: '{tmap[a1_content]}'") else: print(f"❌ A1內容 '{a1_content}' 沒有找到翻譯") print("\n" + "=" * 80) print("測試完成!") print("=" * 80) if __name__ == "__main__": test_excel_translation_fix()