4th_fix time error
This commit is contained in:
153
debug_docx_insertion_path.py
Normal file
153
debug_docx_insertion_path.py
Normal file
@@ -0,0 +1,153 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
調試DOCX翻譯插入的實際執行路徑
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Fix encoding for Windows console
|
||||
if sys.stdout.encoding != 'utf-8':
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
if sys.stderr.encoding != 'utf-8':
|
||||
sys.stderr.reconfigure(encoding='utf-8')
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
|
||||
|
||||
from app import create_app, db
|
||||
from app.services.translation_service import DocxParser
|
||||
from sqlalchemy import text
|
||||
|
||||
def debug_docx_insertion_path():
|
||||
"""調試DOCX翻譯插入的實際執行路徑"""
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
print("=== 調試DOCX翻譯插入的實際執行路徑 ===")
|
||||
|
||||
# 使用現有的DOCX文件
|
||||
original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx"
|
||||
|
||||
# 創建解析器
|
||||
parser = DocxParser(original_path)
|
||||
|
||||
# 提取段落資訊
|
||||
segments = parser.extract_segments_with_context()
|
||||
|
||||
print(f"文檔總段落數: {len(segments)}")
|
||||
|
||||
# 分析段落類型
|
||||
table_segments = 0
|
||||
normal_segments = 0
|
||||
sdt_segments = 0
|
||||
other_segments = 0
|
||||
|
||||
print(f"\n📊 段落類型分析:")
|
||||
|
||||
for i, seg in enumerate(segments[:20]): # 檢查前20個段落
|
||||
if seg.kind == "para":
|
||||
# 檢查是否在表格中
|
||||
from docx.table import _Cell
|
||||
from docx.text.paragraph import Paragraph
|
||||
|
||||
if isinstance(seg.ref, Paragraph):
|
||||
p = seg.ref
|
||||
if isinstance(p._parent, _Cell):
|
||||
table_segments += 1
|
||||
segment_type = "🏢 表格段落"
|
||||
else:
|
||||
normal_segments += 1
|
||||
segment_type = "📄 普通段落"
|
||||
elif hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'):
|
||||
sdt_segments += 1
|
||||
segment_type = "📋 SDT段落"
|
||||
else:
|
||||
other_segments += 1
|
||||
segment_type = f"❓ 其他段落 ({type(seg.ref)})"
|
||||
else:
|
||||
other_segments += 1
|
||||
segment_type = f"🔧 非段落 ({seg.kind})"
|
||||
|
||||
print(f" 段落 {i+1:2d}: {segment_type} - {seg.text[:50]}...")
|
||||
|
||||
print(f"\n統計結果 (前20個段落):")
|
||||
print(f" 表格段落: {table_segments}")
|
||||
print(f" 普通段落: {normal_segments}")
|
||||
print(f" SDT段落: {sdt_segments}")
|
||||
print(f" 其他類型: {other_segments}")
|
||||
|
||||
# 檢查有翻譯的段落會走哪個路徑
|
||||
print(f"\n🔍 檢查有翻譯的段落執行路徑:")
|
||||
|
||||
path_stats = {
|
||||
"table": 0,
|
||||
"normal": 0,
|
||||
"sdt": 0,
|
||||
"other": 0,
|
||||
"skipped": 0
|
||||
}
|
||||
|
||||
for i, seg in enumerate(segments[:10]): # 檢查前10個段落
|
||||
if seg.kind == "para":
|
||||
# 查找翻譯
|
||||
result = db.session.execute(text("""
|
||||
SELECT translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = 'en'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': seg.text})
|
||||
|
||||
row = result.fetchone()
|
||||
has_translation = row and row[0]
|
||||
|
||||
if has_translation:
|
||||
# 判斷執行路徑
|
||||
if isinstance(seg.ref, Paragraph):
|
||||
p = seg.ref
|
||||
if isinstance(p._parent, _Cell):
|
||||
path = "table"
|
||||
path_name = "🏢 表格路徑"
|
||||
else:
|
||||
path = "normal"
|
||||
path_name = "📄 普通段落路徑"
|
||||
elif hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'):
|
||||
path = "sdt"
|
||||
path_name = "📋 SDT路徑"
|
||||
else:
|
||||
path = "other"
|
||||
path_name = "❓ 其他路徑"
|
||||
|
||||
path_stats[path] += 1
|
||||
|
||||
print(f" 段落 {i+1:2d}: {path_name} ✅ 有翻譯")
|
||||
print(f" 原文: {seg.text[:50]}...")
|
||||
print(f" 譯文: {row[0][:50]}...")
|
||||
else:
|
||||
path_stats["skipped"] += 1
|
||||
print(f" 段落 {i+1:2d}: ❌ 無翻譯 - {seg.text[:30]}...")
|
||||
|
||||
print(f"\n📈 執行路徑統計:")
|
||||
print(f" 表格路徑: {path_stats['table']} 段落")
|
||||
print(f" 普通段落路徑: {path_stats['normal']} 段落")
|
||||
print(f" SDT路徑: {path_stats['sdt']} 段落")
|
||||
print(f" 其他路徑: {path_stats['other']} 段落")
|
||||
print(f" 跳過(無翻譯): {path_stats['skipped']} 段落")
|
||||
|
||||
# 重點分析:大多數段落走的是哪個路徑?
|
||||
total_with_translation = sum(path_stats[k] for k in ['table', 'normal', 'sdt', 'other'])
|
||||
if total_with_translation > 0:
|
||||
print(f"\n💡 關鍵分析:")
|
||||
if path_stats['table'] > path_stats['normal']:
|
||||
print(f" ⚠️ 大多數段落走表格路徑 ({path_stats['table']}/{total_with_translation})")
|
||||
print(f" 可能問題: 表格插入邏輯有問題")
|
||||
elif path_stats['normal'] > path_stats['table']:
|
||||
print(f" ✅ 大多數段落走普通段落路徑 ({path_stats['normal']}/{total_with_translation})")
|
||||
print(f" 可能問題: 普通段落插入邏輯有問題")
|
||||
else:
|
||||
print(f" 📊 表格和普通段落路徑數量相當")
|
||||
|
||||
if __name__ == "__main__":
|
||||
debug_docx_insertion_path()
|
Reference in New Issue
Block a user