153 lines
6.0 KiB
Python
153 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
調試DOCX翻譯插入的實際執行路徑
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
|
|
# Fix encoding for Windows console
|
|
if sys.stdout.encoding != 'utf-8':
|
|
sys.stdout.reconfigure(encoding='utf-8')
|
|
if sys.stderr.encoding != 'utf-8':
|
|
sys.stderr.reconfigure(encoding='utf-8')
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
|
|
|
|
from app import create_app, db
|
|
from app.services.translation_service import DocxParser
|
|
from sqlalchemy import text
|
|
|
|
def debug_docx_insertion_path():
|
|
"""調試DOCX翻譯插入的實際執行路徑"""
|
|
|
|
app = create_app()
|
|
|
|
with app.app_context():
|
|
print("=== 調試DOCX翻譯插入的實際執行路徑 ===")
|
|
|
|
# 使用現有的DOCX文件
|
|
original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx"
|
|
|
|
# 創建解析器
|
|
parser = DocxParser(original_path)
|
|
|
|
# 提取段落資訊
|
|
segments = parser.extract_segments_with_context()
|
|
|
|
print(f"文檔總段落數: {len(segments)}")
|
|
|
|
# 分析段落類型
|
|
table_segments = 0
|
|
normal_segments = 0
|
|
sdt_segments = 0
|
|
other_segments = 0
|
|
|
|
print(f"\n📊 段落類型分析:")
|
|
|
|
for i, seg in enumerate(segments[:20]): # 檢查前20個段落
|
|
if seg.kind == "para":
|
|
# 檢查是否在表格中
|
|
from docx.table import _Cell
|
|
from docx.text.paragraph import Paragraph
|
|
|
|
if isinstance(seg.ref, Paragraph):
|
|
p = seg.ref
|
|
if isinstance(p._parent, _Cell):
|
|
table_segments += 1
|
|
segment_type = "🏢 表格段落"
|
|
else:
|
|
normal_segments += 1
|
|
segment_type = "📄 普通段落"
|
|
elif hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'):
|
|
sdt_segments += 1
|
|
segment_type = "📋 SDT段落"
|
|
else:
|
|
other_segments += 1
|
|
segment_type = f"❓ 其他段落 ({type(seg.ref)})"
|
|
else:
|
|
other_segments += 1
|
|
segment_type = f"🔧 非段落 ({seg.kind})"
|
|
|
|
print(f" 段落 {i+1:2d}: {segment_type} - {seg.text[:50]}...")
|
|
|
|
print(f"\n統計結果 (前20個段落):")
|
|
print(f" 表格段落: {table_segments}")
|
|
print(f" 普通段落: {normal_segments}")
|
|
print(f" SDT段落: {sdt_segments}")
|
|
print(f" 其他類型: {other_segments}")
|
|
|
|
# 檢查有翻譯的段落會走哪個路徑
|
|
print(f"\n🔍 檢查有翻譯的段落執行路徑:")
|
|
|
|
path_stats = {
|
|
"table": 0,
|
|
"normal": 0,
|
|
"sdt": 0,
|
|
"other": 0,
|
|
"skipped": 0
|
|
}
|
|
|
|
for i, seg in enumerate(segments[:10]): # 檢查前10個段落
|
|
if seg.kind == "para":
|
|
# 查找翻譯
|
|
result = db.session.execute(text("""
|
|
SELECT translated_text
|
|
FROM dt_translation_cache
|
|
WHERE source_text = :text AND target_language = 'en'
|
|
ORDER BY created_at DESC
|
|
LIMIT 1
|
|
"""), {'text': seg.text})
|
|
|
|
row = result.fetchone()
|
|
has_translation = row and row[0]
|
|
|
|
if has_translation:
|
|
# 判斷執行路徑
|
|
if isinstance(seg.ref, Paragraph):
|
|
p = seg.ref
|
|
if isinstance(p._parent, _Cell):
|
|
path = "table"
|
|
path_name = "🏢 表格路徑"
|
|
else:
|
|
path = "normal"
|
|
path_name = "📄 普通段落路徑"
|
|
elif hasattr(seg.ref, 'tag') and seg.ref.tag.endswith('}sdt'):
|
|
path = "sdt"
|
|
path_name = "📋 SDT路徑"
|
|
else:
|
|
path = "other"
|
|
path_name = "❓ 其他路徑"
|
|
|
|
path_stats[path] += 1
|
|
|
|
print(f" 段落 {i+1:2d}: {path_name} ✅ 有翻譯")
|
|
print(f" 原文: {seg.text[:50]}...")
|
|
print(f" 譯文: {row[0][:50]}...")
|
|
else:
|
|
path_stats["skipped"] += 1
|
|
print(f" 段落 {i+1:2d}: ❌ 無翻譯 - {seg.text[:30]}...")
|
|
|
|
print(f"\n📈 執行路徑統計:")
|
|
print(f" 表格路徑: {path_stats['table']} 段落")
|
|
print(f" 普通段落路徑: {path_stats['normal']} 段落")
|
|
print(f" SDT路徑: {path_stats['sdt']} 段落")
|
|
print(f" 其他路徑: {path_stats['other']} 段落")
|
|
print(f" 跳過(無翻譯): {path_stats['skipped']} 段落")
|
|
|
|
# 重點分析:大多數段落走的是哪個路徑?
|
|
total_with_translation = sum(path_stats[k] for k in ['table', 'normal', 'sdt', 'other'])
|
|
if total_with_translation > 0:
|
|
print(f"\n💡 關鍵分析:")
|
|
if path_stats['table'] > path_stats['normal']:
|
|
print(f" ⚠️ 大多數段落走表格路徑 ({path_stats['table']}/{total_with_translation})")
|
|
print(f" 可能問題: 表格插入邏輯有問題")
|
|
elif path_stats['normal'] > path_stats['table']:
|
|
print(f" ✅ 大多數段落走普通段落路徑 ({path_stats['normal']}/{total_with_translation})")
|
|
print(f" 可能問題: 普通段落插入邏輯有問題")
|
|
else:
|
|
print(f" 📊 表格和普通段落路徑數量相當")
|
|
|
|
if __name__ == "__main__":
|
|
debug_docx_insertion_path() |