#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 深度診斷"超温"翻譯問題 檢查從提取到插入的完整流程 """ import sys import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # 設定編碼 sys.stdout.reconfigure(encoding='utf-8') from pathlib import Path import docx from docx.table import Table from app import create_app from app.services.document_processor import should_translate from app.services.translation_service import TranslationService def debug_chaoweng_extraction(): """檢查"超温"在文件提取階段是否被正確識別""" print("=" * 80) print("診斷步驟1: 檢查文件提取階段") print("=" * 80) app = create_app() with app.app_context(): from app.services.document_processor import DocumentProcessor # 檔案路徑 base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e") original_file = base_dir / "original_-OR026_17e05695.docx" if not original_file.exists(): print(f"❌ 原始檔案不存在") return processor = DocumentProcessor() # 提取所有segment segments = processor.extract_docx_segments(str(original_file)) print(f"提取到 {len(segments)} 個segments") # 找包含"超温"的segments chaoweng_segments = [] for i, seg in enumerate(segments): if "超温" in seg.text: chaoweng_segments.append((i, seg)) print(f"\nSegment {i}:") print(f" 種類: {seg.kind}") print(f" 上下文: {seg.ctx}") print(f" 內容: {repr(seg.text)}") print(f" 長度: {len(seg.text.strip())}") # 檢查是否應該翻譯 should_trans = should_translate(seg.text, 'zh') print(f" should_translate: {should_trans}") if seg.kind == "table_cell": print(f" 🎯 這是表格儲存格segment") else: print(f" ⚠️ 不是表格儲存格類型") if not chaoweng_segments: print("❌ 沒有找到包含'超温'的segments") else: print(f"✅ 找到 {len(chaoweng_segments)} 個包含'超温'的segments") return chaoweng_segments def debug_chaoweng_translation(chaoweng_segments): """檢查"超温"在翻譯階段是否被正確處理""" print(f"\n" + "=" * 80) print("診斷步驟2: 檢查翻譯階段") print("=" * 80) if not chaoweng_segments: print("❌ 沒有segments可以測試翻譯") return app = create_app() with app.app_context(): service = TranslationService() for seg_idx, seg in chaoweng_segments: print(f"\n測試 Segment {seg_idx} 的翻譯:") print(f"原文: {repr(seg.text)}") try: if seg.kind == "table_cell": print("使用 translate_word_table_cell() 方法") translated = service.translate_word_table_cell( text=seg.text, source_language="zh", target_language="en", user_id=None ) else: print("使用 translate_segment_with_sentences() 方法") translated = service.translate_segment_with_sentences( text=seg.text, source_language="zh", target_language="en", user_id=None ) print(f"翻譯結果: {repr(translated[:100])}...") # 檢查翻譯是否成功 if "【翻譯失敗" in translated: print("❌ 翻譯失敗") elif translated == seg.text: print("❌ 翻譯結果與原文相同,可能未翻譯") else: print("✅ 翻譯成功") except Exception as e: print(f"❌ 翻譯過程發生錯誤: {e}") def debug_chaoweng_cache(): """檢查"超温"的翻譯快取狀況""" print(f"\n" + "=" * 80) print("診斷步驟3: 檢查翻譯快取") print("=" * 80) app = create_app() with app.app_context(): from sqlalchemy import text as sql_text from app import db # 1. 搜尋精確匹配"超温" print("1. 搜尋精確的'超温'記錄:") exact_results = db.session.execute(sql_text(""" SELECT id, source_text, target_language, translated_text, created_at FROM dt_translation_cache WHERE source_text = '超温' ORDER BY created_at DESC """)).fetchall() if exact_results: for row in exact_results: print(f" ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3]}'") else: print(" ❌ 沒有找到精確的'超温'記錄") # 2. 搜尋包含"超温"但可能有額外字符的記錄 print(f"\n2. 搜尋包含'超温'的記錄:") like_results = db.session.execute(sql_text(""" SELECT id, source_text, target_language, translated_text, created_at FROM dt_translation_cache WHERE source_text LIKE '%超温%' AND CHAR_LENGTH(source_text) <= 10 ORDER BY created_at DESC LIMIT 10 """)).fetchall() if like_results: for row in like_results: print(f" ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3][:30]}...'") else: print(" ❌ 沒有找到包含'超温'的短記錄") def debug_chaoweng_insertion(): """檢查"超温"的翻譯插入狀況""" print(f"\n" + "=" * 80) print("診斷步驟4: 檢查已翻譯文件的插入狀況") print("=" * 80) # 檢查翻譯後的文件 base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e") translated_files = [ ("英文", base_dir / "translated_original_-OR026_17e05695_en_translat.docx"), ("越南文", base_dir / "translated_original_-OR026_17e05695_vi_translat.docx") ] for lang, file_path in translated_files: if not file_path.exists(): print(f"❌ {lang}翻譯檔案不存在") continue print(f"\n檢查{lang}翻譯檔案:") try: doc = docx.Document(str(file_path)) found_chaoweng = False found_translation = False for table_idx, table in enumerate(doc.tables): for row_idx, row in enumerate(table.rows): for cell_idx, cell in enumerate(row.cells): cell_text = cell.text.strip() if "超温" in cell_text: found_chaoweng = True print(f" 🔍 表格{table_idx+1} 行{row_idx+1} 列{cell_idx+1}:") print(f" 內容: {repr(cell_text[:100])}") # 檢查該儲存格的段落結構 print(f" 段落數: {len(cell.paragraphs)}") for p_idx, para in enumerate(cell.paragraphs): p_text = para.text.strip() if p_text: print(f" 段落{p_idx+1}: {repr(p_text)}") # 檢查是否有英文翻譯跡象 if lang == "英文" and any(word in p_text.lower() for word in ['over', 'heat', 'temp', 'hot']): found_translation = True print(f" 🎯 可能的英文翻譯") elif lang == "越南文" and any(word in p_text.lower() for word in ['quá', 'nóng', 'nhiệt']): found_translation = True print(f" 🎯 可能的越南文翻譯") print(f" 原文'超温': {'✅ 找到' if found_chaoweng else '❌ 未找到'}") print(f" {lang}翻譯: {'✅ 找到' if found_translation else '❌ 未找到'}") except Exception as e: print(f"❌ 讀取{lang}翻譯檔案失敗: {e}") def main(): """主診斷函數""" print("🔍 深度診斷'超温'翻譯問題") print("檢查完整的提取->翻譯->插入流程") try: # 步驟1: 檢查文件提取 chaoweng_segments = debug_chaoweng_extraction() # 步驟2: 檢查翻譯邏輯 debug_chaoweng_translation(chaoweng_segments) # 步驟3: 檢查翻譯快取 debug_chaoweng_cache() # 步驟4: 檢查插入結果 debug_chaoweng_insertion() print(f"\n" + "=" * 80) print("診斷完成!") print("可能的問題:") print("1. 提取階段: segments沒有正確提取'超温'") print("2. 翻譯階段: 翻譯邏輯沒有處理該segment") print("3. 快取階段: 翻譯沒有正確存儲") print("4. 插入階段: 翻譯沒有正確插入到文件") print("=" * 80) except Exception as e: print(f"❌ 診斷過程發生錯誤: {e}") import traceback print(f"錯誤詳情: {traceback.format_exc()}") if __name__ == "__main__": main()