7th_fix doc

2025-09-03 19:00:29 +08:00
parent 086c4f25b9
commit 82aaa315bb
5 changed files with 629 additions and 12 deletions
--- a/debug_chaoweng_issue.py
+++ b/debug_chaoweng_issue.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+深度診斷"超温"翻譯問題
+檢查從提取到插入的完整流程
+"""
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+# 設定編碼
+sys.stdout.reconfigure(encoding='utf-8')
+
+from pathlib import Path
+import docx
+from docx.table import Table
+from app import create_app
+from app.services.document_processor import should_translate
+from app.services.translation_service import TranslationService
+
+def debug_chaoweng_extraction():
+    """檢查"超温"在文件提取階段是否被正確識別"""
+    
+    print("=" * 80)
+    print("診斷步驟1: 檢查文件提取階段")
+    print("=" * 80)
+    
+    app = create_app()
+    
+    with app.app_context():
+        from app.services.document_processor import DocumentProcessor
+        
+        # 檔案路徑
+        base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
+        original_file = base_dir / "original_-OR026_17e05695.docx"
+        
+        if not original_file.exists():
+            print(f"❌ 原始檔案不存在")
+            return
+        
+        processor = DocumentProcessor()
+        
+        # 提取所有segment
+        segments = processor.extract_docx_segments(str(original_file))
+        
+        print(f"提取到 {len(segments)} 個segments")
+        
+        # 找包含"超温"的segments
+        chaoweng_segments = []
+        for i, seg in enumerate(segments):
+            if "超温" in seg.text:
+                chaoweng_segments.append((i, seg))
+                print(f"\nSegment {i}:")
+                print(f"  種類: {seg.kind}")
+                print(f"  上下文: {seg.ctx}")
+                print(f"  內容: {repr(seg.text)}")
+                print(f"  長度: {len(seg.text.strip())}")
+                
+                # 檢查是否應該翻譯
+                should_trans = should_translate(seg.text, 'zh')
+                print(f"  should_translate: {should_trans}")
+                
+                if seg.kind == "table_cell":
+                    print(f"  🎯 這是表格儲存格segment")
+                else:
+                    print(f"  ⚠️  不是表格儲存格類型")
+        
+        if not chaoweng_segments:
+            print("❌ 沒有找到包含'超温'的segments")
+        else:
+            print(f"✅ 找到 {len(chaoweng_segments)} 個包含'超温'的segments")
+        
+        return chaoweng_segments
+
+def debug_chaoweng_translation(chaoweng_segments):
+    """檢查"超温"在翻譯階段是否被正確處理"""
+    
+    print(f"\n" + "=" * 80)
+    print("診斷步驟2: 檢查翻譯階段")
+    print("=" * 80)
+    
+    if not chaoweng_segments:
+        print("❌ 沒有segments可以測試翻譯")
+        return
+    
+    app = create_app()
+    
+    with app.app_context():
+        service = TranslationService()
+        
+        for seg_idx, seg in chaoweng_segments:
+            print(f"\n測試 Segment {seg_idx} 的翻譯:")
+            print(f"原文: {repr(seg.text)}")
+            
+            try:
+                if seg.kind == "table_cell":
+                    print("使用 translate_word_table_cell() 方法")
+                    translated = service.translate_word_table_cell(
+                        text=seg.text,
+                        source_language="zh",
+                        target_language="en",
+                        user_id=None
+                    )
+                else:
+                    print("使用 translate_segment_with_sentences() 方法")
+                    translated = service.translate_segment_with_sentences(
+                        text=seg.text,
+                        source_language="zh", 
+                        target_language="en",
+                        user_id=None
+                    )
+                
+                print(f"翻譯結果: {repr(translated[:100])}...")
+                
+                # 檢查翻譯是否成功
+                if "【翻譯失敗" in translated:
+                    print("❌ 翻譯失敗")
+                elif translated == seg.text:
+                    print("❌ 翻譯結果與原文相同，可能未翻譯")
+                else:
+                    print("✅ 翻譯成功")
+                    
+            except Exception as e:
+                print(f"❌ 翻譯過程發生錯誤: {e}")
+
+def debug_chaoweng_cache():
+    """檢查"超温"的翻譯快取狀況"""
+    
+    print(f"\n" + "=" * 80)
+    print("診斷步驟3: 檢查翻譯快取")
+    print("=" * 80)
+    
+    app = create_app()
+    
+    with app.app_context():
+        from sqlalchemy import text as sql_text
+        from app import db
+        
+        # 1. 搜尋精確匹配"超温"
+        print("1. 搜尋精確的'超温'記錄:")
+        exact_results = db.session.execute(sql_text("""
+            SELECT id, source_text, target_language, translated_text, created_at
+            FROM dt_translation_cache 
+            WHERE source_text = '超温'
+            ORDER BY created_at DESC
+        """)).fetchall()
+        
+        if exact_results:
+            for row in exact_results:
+                print(f"  ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3]}'")
+        else:
+            print("  ❌ 沒有找到精確的'超温'記錄")
+        
+        # 2. 搜尋包含"超温"但可能有額外字符的記錄
+        print(f"\n2. 搜尋包含'超温'的記錄:")
+        like_results = db.session.execute(sql_text("""
+            SELECT id, source_text, target_language, translated_text, created_at
+            FROM dt_translation_cache 
+            WHERE source_text LIKE '%超温%'
+            AND CHAR_LENGTH(source_text) <= 10
+            ORDER BY created_at DESC
+            LIMIT 10
+        """)).fetchall()
+        
+        if like_results:
+            for row in like_results:
+                print(f"  ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3][:30]}...'")
+        else:
+            print("  ❌ 沒有找到包含'超温'的短記錄")
+
+def debug_chaoweng_insertion():
+    """檢查"超温"的翻譯插入狀況"""
+    
+    print(f"\n" + "=" * 80)
+    print("診斷步驟4: 檢查已翻譯文件的插入狀況")
+    print("=" * 80)
+    
+    # 檢查翻譯後的文件
+    base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
+    translated_files = [
+        ("英文", base_dir / "translated_original_-OR026_17e05695_en_translat.docx"),
+        ("越南文", base_dir / "translated_original_-OR026_17e05695_vi_translat.docx")
+    ]
+    
+    for lang, file_path in translated_files:
+        if not file_path.exists():
+            print(f"❌ {lang}翻譯檔案不存在")
+            continue
+            
+        print(f"\n檢查{lang}翻譯檔案:")
+        try:
+            doc = docx.Document(str(file_path))
+            
+            found_chaoweng = False
+            found_translation = False
+            
+            for table_idx, table in enumerate(doc.tables):
+                for row_idx, row in enumerate(table.rows):
+                    for cell_idx, cell in enumerate(row.cells):
+                        cell_text = cell.text.strip()
+                        
+                        if "超温" in cell_text:
+                            found_chaoweng = True
+                            print(f"  🔍 表格{table_idx+1} 行{row_idx+1} 列{cell_idx+1}:")
+                            print(f"    內容: {repr(cell_text[:100])}")
+                            
+                            # 檢查該儲存格的段落結構
+                            print(f"    段落數: {len(cell.paragraphs)}")
+                            for p_idx, para in enumerate(cell.paragraphs):
+                                p_text = para.text.strip()
+                                if p_text:
+                                    print(f"      段落{p_idx+1}: {repr(p_text)}")
+                                    
+                                    # 檢查是否有英文翻譯跡象
+                                    if lang == "英文" and any(word in p_text.lower() for word in ['over', 'heat', 'temp', 'hot']):
+                                        found_translation = True
+                                        print(f"        🎯 可能的英文翻譯")
+                                    elif lang == "越南文" and any(word in p_text.lower() for word in ['quá', 'nóng', 'nhiệt']):
+                                        found_translation = True
+                                        print(f"        🎯 可能的越南文翻譯")
+            
+            print(f"  原文'超温': {'✅ 找到' if found_chaoweng else '❌ 未找到'}")
+            print(f"  {lang}翻譯: {'✅ 找到' if found_translation else '❌ 未找到'}")
+            
+        except Exception as e:
+            print(f"❌ 讀取{lang}翻譯檔案失敗: {e}")
+
+def main():
+    """主診斷函數"""
+    
+    print("🔍 深度診斷'超温'翻譯問題")
+    print("檢查完整的提取->翻譯->插入流程")
+    
+    try:
+        # 步驟1: 檢查文件提取
+        chaoweng_segments = debug_chaoweng_extraction()
+        
+        # 步驟2: 檢查翻譯邏輯
+        debug_chaoweng_translation(chaoweng_segments)
+        
+        # 步驟3: 檢查翻譯快取
+        debug_chaoweng_cache()
+        
+        # 步驟4: 檢查插入結果
+        debug_chaoweng_insertion()
+        
+        print(f"\n" + "=" * 80)
+        print("診斷完成！")
+        print("可能的問題:")
+        print("1. 提取階段: segments沒有正確提取'超温'")
+        print("2. 翻譯階段: 翻譯邏輯沒有處理該segment")
+        print("3. 快取階段: 翻譯沒有正確存儲")
+        print("4. 插入階段: 翻譯沒有正確插入到文件")
+        print("=" * 80)
+        
+    except Exception as e:
+        print(f"❌ 診斷過程發生錯誤: {e}")
+        import traceback
+        print(f"錯誤詳情: {traceback.format_exc()}")
+
+if __name__ == "__main__":
+    main()