#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 分析最新Excel測試結果 - 檢查修正是否真正生效 """ import sys import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # 設定編碼 sys.stdout.reconfigure(encoding='utf-8') from pathlib import Path import openpyxl from app.services.translation_service import ExcelParser def analyze_latest_excel_test(): """詳細分析最新Excel測試結果""" print("=" * 80) print("分析最新Excel測試結果") print("UUID: 185bb457-b703-4e98-94a2-fde072b895c4") print("=" * 80) # 文件路徑 test_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\185bb457-b703-4e98-94a2-fde072b895c4") original_file = test_dir / "original_panjit_185bb457.xlsx" translated_file = test_dir / "original_panjit_185bb457_ja_translated.xlsx" if not original_file.exists(): print(f"原始文件不存在: {original_file}") return if not translated_file.exists(): print(f"翻譯文件不存在: {translated_file}") return print(f"\n✅ 檔案確認:") print(f" 原始文件: {original_file.name}") print(f" 翻譯文件: {translated_file.name}") # 1. 測試ExcelParser的_should_translate函數 print(f"\n1. 測試ExcelParser的_should_translate函數") print("-" * 60) parser = ExcelParser(str(original_file)) test_texts = [ ("製程", "A1儲存格"), ("主要特點", "標題文字"), ("AB", "2個英文字母"), ("123", "純數字"), ("工藝", "2個中文字符"), ("Epoxy 膠黏(導電/導熱銀膠)", "複合文字") ] for text, desc in test_texts: should_translate = parser._should_translate(text, 'auto') has_cjk = parser._has_cjk(text) min_length = 2 if has_cjk else 3 print(f" '{text}' ({desc}):") print(f" 長度: {len(text)}, CJK: {has_cjk}, 最小長度: {min_length}") print(f" 應翻譯: {should_translate}") print() # 2. 檢查實際提取的文字片段 print(f"\n2. 檢查實際提取的文字片段") print("-" * 60) segments = parser.extract_text_segments() print(f"✅ 總共提取 {len(segments)} 個文字片段") # 特別檢查A1 a1_content = "製程" if a1_content in segments: print(f"✅ A1內容 '{a1_content}' 已被提取") index = segments.index(a1_content) print(f" 在列表中的位置: 第{index+1}個") else: print(f"❌ A1內容 '{a1_content}' 仍未被提取") # 顯示所有提取的片段 print(f"\n 所有提取的片段:") for i, segment in enumerate(segments): safe_segment = repr(segment) print(f" {i+1:2d}. {safe_segment}") if segment == a1_content: print(f" ⬆️ 這是A1的內容!") # 3. 檢查原始和翻譯文件的A1儲存格 print(f"\n3. 檢查A1儲存格內容") print("-" * 60) wb_orig = openpyxl.load_workbook(str(original_file), data_only=False) wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False) try: wb_orig_vals = openpyxl.load_workbook(str(original_file), data_only=True) except: wb_orig_vals = None # A1儲存格比較 a1_orig = wb_orig.active['A1'].value a1_trans = wb_trans.active['A1'].value a1_orig_display = wb_orig_vals.active['A1'].value if wb_orig_vals else None print(f" A1原始值: {repr(a1_orig)}") if wb_orig_vals: print(f" A1顯示值: {repr(a1_orig_display)}") print(f" A1翻譯值: {repr(a1_trans)}") # 判斷A1是否被翻譯 if isinstance(a1_trans, str) and '\n' in a1_trans: lines = a1_trans.split('\n') if len(lines) >= 2: print(f" ✅ A1已翻譯!格式: 原文+換行+譯文") print(f" 原文行: {repr(lines[0])}") print(f" 譯文行: {repr(lines[1])}") else: print(f" ❌ A1格式異常") elif a1_orig == a1_trans: print(f" ❌ A1未翻譯 - 內容相同") else: print(f" ⚠️ A1內容有變化但格式不明") # 4. 檢查其他重要儲存格 print(f"\n4. 檢查其他重要儲存格") print("-" * 60) important_cells = ['B1', 'C1', 'D1', 'A2', 'B2', 'C2'] for cell_name in important_cells: orig_cell = wb_orig.active[cell_name] trans_cell = wb_trans.active[cell_name] orig_val = orig_cell.value trans_val = trans_cell.value if orig_val: # 只檢查有內容的儲存格 print(f"\n {cell_name}儲存格:") print(f" 原始: {repr(orig_val)}") print(f" 翻譯: {repr(trans_val)}") if isinstance(trans_val, str) and '\n' in trans_val: lines = trans_val.split('\n') print(f" 狀態: ✅ 已翻譯 (雙行格式)") if len(lines) >= 2: print(f" 原文: {repr(lines[0])}") print(f" 譯文: {repr(lines[1])}") elif orig_val == trans_val: print(f" 狀態: ❌ 未翻譯") else: print(f" 狀態: ⚠️ 內容有變化") # 5. 檢查翻譯快取狀況 print(f"\n5. 檢查翻譯快取狀況") print("-" * 60) from app import create_app app = create_app() with app.app_context(): from sqlalchemy import text as sql_text from app import db target_language = 'ja' print(f"查詢 '{a1_content}' 在翻譯快取中的狀況...") # 查詢精確匹配 result = db.session.execute(sql_text(""" SELECT source_text, translated_text, created_at FROM dt_translation_cache WHERE source_text = :text AND target_language = :lang ORDER BY created_at DESC LIMIT 3 """), {'text': a1_content, 'lang': target_language}) rows = result.fetchall() if rows: print(f"✅ 找到 {len(rows)} 筆精確匹配的翻譯記錄:") for i, (src, trans, created_at) in enumerate(rows): print(f" {i+1}. 原文: {repr(src)}") print(f" 譯文: {repr(trans)}") print(f" 時間: {created_at}") else: print(f"❌ 未找到精確匹配的翻譯記錄") # 查詢所有提取片段的翻譯狀況 print(f"\n檢查所有提取片段的翻譯快取狀況:") found_count = 0 for i, segment in enumerate(segments[:10]): # 只檢查前10個 result = db.session.execute(sql_text(""" SELECT translated_text FROM dt_translation_cache WHERE source_text = :text AND target_language = :lang ORDER BY created_at DESC LIMIT 1 """), {'text': segment, 'lang': target_language}) row = result.fetchone() if row: found_count += 1 print(f" ✅ {i+1:2d}. '{segment[:20]}...' -> '{row[0][:20]}...'") else: print(f" ❌ {i+1:2d}. '{segment[:20]}...' -> 無翻譯記錄") print(f"\n翻譯快取命中率: {found_count}/{min(10, len(segments))} = {found_count/min(10, len(segments))*100:.1f}%") wb_orig.close() wb_trans.close() if wb_orig_vals: wb_orig_vals.close() print("\n" + "=" * 80) print("分析完成!") print("=" * 80) if __name__ == "__main__": analyze_latest_excel_test()