4th_fix time error

2025-09-03 09:05:51 +08:00
parent e6e5332705
commit cce3fd4925
26 changed files with 2551 additions and 82 deletions
--- a/debug_docx_insertion_path.py
+++ b/debug_docx_insertion_path.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+調試DOCX翻譯插入的實際執行路徑
+"""
+
+import sys
+import os
+
+# Fix encoding for Windows console
+if sys.stdout.encoding != 'utf-8':
+    sys.stdout.reconfigure(encoding='utf-8')
+if sys.stderr.encoding != 'utf-8':
+    sys.stderr.reconfigure(encoding='utf-8')
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
+
+from app import create_app, db
+from app.services.translation_service import DocxParser
+from sqlalchemy import text
+
+def debug_docx_insertion_path():
+    """調試DOCX翻譯插入的實際執行路徑"""
+    
+    app = create_app()
+    
+    with app.app_context():
+        print("=== 調試DOCX翻譯插入的實際執行路徑 ===")
+        
+        # 使用現有的DOCX文件
+        original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx"
+        
+        # 創建解析器
+        parser = DocxParser(original_path)
+        
+        # 提取段落資訊
+        segments = parser.extract_segments_with_context()
+        
+        print(f"文檔總段落數: {len(segments)}")
+        
+        # 分析段落類型
+        table_segments = 0
+        normal_segments = 0
+        sdt_segments = 0
+        other_segments = 0
+        
+        print(f"\n📊 段落類型分析:")
+        
+        for i, seg in enumerate(segments[:20]):  # 檢查前20個段落
+            if seg.kind == "para":
+                # 檢查是否在表格中
+                from docx.table import _Cell
+                from docx.text.paragraph import Paragraph
+                
+                if isinstance(seg.ref, Paragraph):
+                    p = seg.ref
+                    if isinstance(p._parent, _Cell):
+                        table_segments += 1
+                        segment_type = "🏢 表格段落"
+                    else:
+                        normal_segments += 1
+                        segment_type = "📄 普通段落"
+                elif hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'):
+                    sdt_segments += 1
+                    segment_type = "📋 SDT段落"
+                else:
+                    other_segments += 1
+                    segment_type = f"❓ 其他段落 ({type(seg.ref)})"
+            else:
+                other_segments += 1
+                segment_type = f"🔧 非段落 ({seg.kind})"
+            
+            print(f"  段落 {i+1:2d}: {segment_type} - {seg.text[:50]}...")
+        
+        print(f"\n統計結果 (前20個段落):")
+        print(f"  表格段落: {table_segments}")
+        print(f"  普通段落: {normal_segments}")
+        print(f"  SDT段落: {sdt_segments}")
+        print(f"  其他類型: {other_segments}")
+        
+        # 檢查有翻譯的段落會走哪個路徑
+        print(f"\n🔍 檢查有翻譯的段落執行路徑:")
+        
+        path_stats = {
+            "table": 0,
+            "normal": 0,
+            "sdt": 0,
+            "other": 0,
+            "skipped": 0
+        }
+        
+        for i, seg in enumerate(segments[:10]):  # 檢查前10個段落
+            if seg.kind == "para":
+                # 查找翻譯
+                result = db.session.execute(text("""
+                    SELECT translated_text 
+                    FROM dt_translation_cache 
+                    WHERE source_text = :text AND target_language = 'en'
+                    ORDER BY created_at DESC 
+                    LIMIT 1
+                """), {'text': seg.text})
+                
+                row = result.fetchone()
+                has_translation = row and row[0]
+                
+                if has_translation:
+                    # 判斷執行路徑
+                    if isinstance(seg.ref, Paragraph):
+                        p = seg.ref
+                        if isinstance(p._parent, _Cell):
+                            path = "table"
+                            path_name = "🏢 表格路徑"
+                        else:
+                            path = "normal"
+                            path_name = "📄 普通段落路徑"
+                    elif hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'):
+                        path = "sdt"
+                        path_name = "📋 SDT路徑"
+                    else:
+                        path = "other"
+                        path_name = "❓ 其他路徑"
+                    
+                    path_stats[path] += 1
+                    
+                    print(f"  段落 {i+1:2d}: {path_name} ✅ 有翻譯")
+                    print(f"      原文: {seg.text[:50]}...")
+                    print(f"      譯文: {row[0][:50]}...")
+                else:
+                    path_stats["skipped"] += 1
+                    print(f"  段落 {i+1:2d}: ❌ 無翻譯 - {seg.text[:30]}...")
+        
+        print(f"\n📈 執行路徑統計:")
+        print(f"  表格路徑: {path_stats['table']} 段落")
+        print(f"  普通段落路徑: {path_stats['normal']} 段落")
+        print(f"  SDT路徑: {path_stats['sdt']} 段落")
+        print(f"  其他路徑: {path_stats['other']} 段落")
+        print(f"  跳過(無翻譯): {path_stats['skipped']} 段落")
+        
+        # 重點分析：大多數段落走的是哪個路徑？
+        total_with_translation = sum(path_stats[k] for k in ['table', 'normal', 'sdt', 'other'])
+        if total_with_translation > 0:
+            print(f"\n💡 關鍵分析:")
+            if path_stats['table'] > path_stats['normal']:
+                print(f"  ⚠️ 大多數段落走表格路徑 ({path_stats['table']}/{total_with_translation})")
+                print(f"  可能問題: 表格插入邏輯有問題")
+            elif path_stats['normal'] > path_stats['table']:
+                print(f"  ✅ 大多數段落走普通段落路徑 ({path_stats['normal']}/{total_with_translation})")
+                print(f"  可能問題: 普通段落插入邏輯有問題")
+            else:
+                print(f"  📊 表格和普通段落路徑數量相當")
+
+if __name__ == "__main__":
+    debug_docx_insertion_path()