#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 調試DOCX翻譯插入的實際執行路徑 """ import sys import os # Fix encoding for Windows console if sys.stdout.encoding != 'utf-8': sys.stdout.reconfigure(encoding='utf-8') if sys.stderr.encoding != 'utf-8': sys.stderr.reconfigure(encoding='utf-8') sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app')) from app import create_app, db from app.services.translation_service import DocxParser from sqlalchemy import text def debug_docx_insertion_path(): """調試DOCX翻譯插入的實際執行路徑""" app = create_app() with app.app_context(): print("=== 調試DOCX翻譯插入的實際執行路徑 ===") # 使用現有的DOCX文件 original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx" # 創建解析器 parser = DocxParser(original_path) # 提取段落資訊 segments = parser.extract_segments_with_context() print(f"文檔總段落數: {len(segments)}") # 分析段落類型 table_segments = 0 normal_segments = 0 sdt_segments = 0 other_segments = 0 print(f"\n📊 段落類型分析:") for i, seg in enumerate(segments[:20]): # 檢查前20個段落 if seg.kind == "para": # 檢查是否在表格中 from docx.table import _Cell from docx.text.paragraph import Paragraph if isinstance(seg.ref, Paragraph): p = seg.ref if isinstance(p._parent, _Cell): table_segments += 1 segment_type = "🏢 表格段落" else: normal_segments += 1 segment_type = "📄 普通段落" elif hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'): sdt_segments += 1 segment_type = "📋 SDT段落" else: other_segments += 1 segment_type = f"❓ 其他段落 ({type(seg.ref)})" else: other_segments += 1 segment_type = f"🔧 非段落 ({seg.kind})" print(f" 段落 {i+1:2d}: {segment_type} - {seg.text[:50]}...") print(f"\n統計結果 (前20個段落):") print(f" 表格段落: {table_segments}") print(f" 普通段落: {normal_segments}") print(f" SDT段落: {sdt_segments}") print(f" 其他類型: {other_segments}") # 檢查有翻譯的段落會走哪個路徑 print(f"\n🔍 檢查有翻譯的段落執行路徑:") path_stats = { "table": 0, "normal": 0, "sdt": 0, "other": 0, "skipped": 0 } for i, seg in enumerate(segments[:10]): # 檢查前10個段落 if seg.kind == "para": # 查找翻譯 result = db.session.execute(text(""" SELECT translated_text FROM dt_translation_cache WHERE source_text = :text AND target_language = 'en' ORDER BY created_at DESC LIMIT 1 """), {'text': seg.text}) row = result.fetchone() has_translation = row and row[0] if has_translation: # 判斷執行路徑 if isinstance(seg.ref, Paragraph): p = seg.ref if isinstance(p._parent, _Cell): path = "table" path_name = "🏢 表格路徑" else: path = "normal" path_name = "📄 普通段落路徑" elif hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'): path = "sdt" path_name = "📋 SDT路徑" else: path = "other" path_name = "❓ 其他路徑" path_stats[path] += 1 print(f" 段落 {i+1:2d}: {path_name} ✅ 有翻譯") print(f" 原文: {seg.text[:50]}...") print(f" 譯文: {row[0][:50]}...") else: path_stats["skipped"] += 1 print(f" 段落 {i+1:2d}: ❌ 無翻譯 - {seg.text[:30]}...") print(f"\n📈 執行路徑統計:") print(f" 表格路徑: {path_stats['table']} 段落") print(f" 普通段落路徑: {path_stats['normal']} 段落") print(f" SDT路徑: {path_stats['sdt']} 段落") print(f" 其他路徑: {path_stats['other']} 段落") print(f" 跳過(無翻譯): {path_stats['skipped']} 段落") # 重點分析:大多數段落走的是哪個路徑? total_with_translation = sum(path_stats[k] for k in ['table', 'normal', 'sdt', 'other']) if total_with_translation > 0: print(f"\n💡 關鍵分析:") if path_stats['table'] > path_stats['normal']: print(f" ⚠️ 大多數段落走表格路徑 ({path_stats['table']}/{total_with_translation})") print(f" 可能問題: 表格插入邏輯有問題") elif path_stats['normal'] > path_stats['table']: print(f" ✅ 大多數段落走普通段落路徑 ({path_stats['normal']}/{total_with_translation})") print(f" 可能問題: 普通段落插入邏輯有問題") else: print(f" 📊 表格和普通段落路徑數量相當") if __name__ == "__main__": debug_docx_insertion_path()