4th_fix time error
This commit is contained in:
213
debug_actual_insertion.py
Normal file
213
debug_actual_insertion.py
Normal file
@@ -0,0 +1,213 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
監控實際的DOCX翻譯插入過程
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
# Fix encoding for Windows console
|
||||
if sys.stdout.encoding != 'utf-8':
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
if sys.stderr.encoding != 'utf-8':
|
||||
sys.stderr.reconfigure(encoding='utf-8')
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
|
||||
|
||||
from app import create_app, db
|
||||
from app.services.document_processor import DocumentProcessor, _insert_docx_translations
|
||||
from sqlalchemy import text as sql_text
|
||||
|
||||
def debug_actual_insertion():
|
||||
"""監控實際的DOCX翻譯插入過程"""
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
print("=== 監控實際的DOCX翻譯插入過程 ===")
|
||||
|
||||
# 原始文件
|
||||
original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx"
|
||||
|
||||
# 創建測試副本
|
||||
test_dir = Path(tempfile.gettempdir()) / "debug_insertion"
|
||||
test_dir.mkdir(exist_ok=True)
|
||||
test_path = test_dir / "debug_original.docx"
|
||||
output_path = test_dir / "debug_translated.docx"
|
||||
|
||||
shutil.copy2(original_path, test_path)
|
||||
print(f"✅ 創建測試副本: {test_path}")
|
||||
|
||||
# 創建處理器
|
||||
processor = DocumentProcessor()
|
||||
|
||||
# 提取段落
|
||||
segments = processor.extract_docx_segments(str(test_path))
|
||||
print(f"📄 提取到 {len(segments)} 個段落")
|
||||
|
||||
# 構建翻譯映射(只取前5個段落進行詳細調試)
|
||||
target_language = 'en'
|
||||
translation_map = {}
|
||||
|
||||
debug_segments = segments[:5] # 只調試前5個段落
|
||||
|
||||
print(f"\n🔍 構建前5個段落的翻譯映射:")
|
||||
|
||||
for i, seg in enumerate(debug_segments):
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': seg.text, 'lang': target_language})
|
||||
|
||||
row = result.fetchone()
|
||||
if row and row[0]:
|
||||
translation_map[(target_language, seg.text)] = row[0]
|
||||
print(f" 段落 {i+1}: ✅ 有翻譯")
|
||||
print(f" 原文: {seg.text[:50]}...")
|
||||
print(f" 譯文: {row[0][:50]}...")
|
||||
else:
|
||||
print(f" 段落 {i+1}: ❌ 無翻譯 - {seg.text[:50]}...")
|
||||
|
||||
print(f"\n翻譯映射總數: {len(translation_map)}")
|
||||
|
||||
# 載入文檔並檢查插入前狀態
|
||||
try:
|
||||
from docx import Document
|
||||
doc = Document(str(test_path))
|
||||
|
||||
print(f"\n📊 插入前文檔狀態:")
|
||||
print(f"總段落數: {len(doc.paragraphs)}")
|
||||
|
||||
# 創建詳細的日誌函數
|
||||
insertion_logs = []
|
||||
|
||||
def detailed_log(msg: str):
|
||||
print(f"[LOG] {msg}")
|
||||
insertion_logs.append(msg)
|
||||
|
||||
# 執行插入(只處理前5個段落)
|
||||
print(f"\n🔄 開始執行翻譯插入...")
|
||||
|
||||
ok_count, skip_count = _insert_docx_translations(
|
||||
doc, debug_segments, translation_map, [target_language], detailed_log
|
||||
)
|
||||
|
||||
print(f"\n插入結果: 成功 {ok_count}, 跳過 {skip_count}")
|
||||
|
||||
# 檢查插入後的文檔狀態
|
||||
print(f"\n📊 插入後文檔狀態:")
|
||||
print(f"總段落數: {len(doc.paragraphs)}")
|
||||
|
||||
# 詳細檢查前20個段落
|
||||
insertion_found = 0
|
||||
marker_found = 0
|
||||
|
||||
for i, para in enumerate(doc.paragraphs[:20]):
|
||||
text = para.text.strip()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
# 檢查是否有翻譯標記
|
||||
has_marker = any('\u200b' in (r.text or '') for r in para.runs)
|
||||
|
||||
# 語言檢測
|
||||
has_chinese = any('\u4e00' <= c <= '\u9fff' for c in text)
|
||||
has_english = any(ord(c) < 128 and c.isalpha() and c not in 'PANJIT' for c in text)
|
||||
|
||||
if has_marker:
|
||||
marker_found += 1
|
||||
lang_status = "🏷️ 翻譯標記"
|
||||
elif has_english and not has_chinese:
|
||||
insertion_found += 1
|
||||
lang_status = "🇺🇸 純英文"
|
||||
elif has_chinese and has_english:
|
||||
lang_status = "🔄 中英混合"
|
||||
elif has_chinese:
|
||||
lang_status = "🇨🇳 純中文"
|
||||
else:
|
||||
lang_status = "❓ 其他"
|
||||
|
||||
print(f" 段落 {i+1:2d}: {lang_status} - {text[:60]}...")
|
||||
|
||||
print(f"\n發現的插入內容:")
|
||||
print(f" 純英文段落: {insertion_found}")
|
||||
print(f" 帶翻譯標記的段落: {marker_found}")
|
||||
|
||||
# 保存文檔
|
||||
doc.save(str(output_path))
|
||||
print(f"\n✅ 文檔已保存至: {output_path}")
|
||||
|
||||
# 重新讀取並驗證
|
||||
doc2 = Document(str(output_path))
|
||||
print(f"\n📊 保存後重新讀取驗證:")
|
||||
print(f"總段落數: {len(doc2.paragraphs)}")
|
||||
|
||||
saved_insertion_found = 0
|
||||
saved_marker_found = 0
|
||||
|
||||
for i, para in enumerate(doc2.paragraphs[:20]):
|
||||
text = para.text.strip()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
has_marker = any('\u200b' in (r.text or '') for r in para.runs)
|
||||
has_chinese = any('\u4e00' <= c <= '\u9fff' for c in text)
|
||||
has_english = any(ord(c) < 128 and c.isalpha() and c not in 'PANJIT' for c in text)
|
||||
|
||||
if has_marker:
|
||||
saved_marker_found += 1
|
||||
elif has_english and not has_chinese:
|
||||
saved_insertion_found += 1
|
||||
|
||||
print(f"保存後發現的插入內容:")
|
||||
print(f" 純英文段落: {saved_insertion_found}")
|
||||
print(f" 帶翻譯標記的段落: {saved_marker_found}")
|
||||
|
||||
# 診斷結果
|
||||
if ok_count > 0 and saved_insertion_found == 0 and saved_marker_found == 0:
|
||||
print(f"\n🚨 關鍵問題發現:")
|
||||
print(f" - 插入函數報告成功插入 {ok_count} 個翻譯")
|
||||
print(f" - 但保存後的文檔中沒有發現任何翻譯內容或標記")
|
||||
print(f" - 問題可能在於:")
|
||||
print(f" 1. _append_after函數實際沒有插入")
|
||||
print(f" 2. 插入位置不正確")
|
||||
print(f" 3. 文檔保存過程有問題")
|
||||
elif ok_count > 0 and (saved_insertion_found > 0 or saved_marker_found > 0):
|
||||
print(f"\n✅ 插入成功!")
|
||||
print(f" - 插入函數報告: {ok_count} 個翻譯")
|
||||
print(f" - 保存後確認: {saved_insertion_found + saved_marker_found} 個翻譯段落")
|
||||
else:
|
||||
print(f"\n⚠️ 無翻譯插入(可能都被跳過)")
|
||||
|
||||
# 打印插入日誌摘要
|
||||
print(f"\n📝 插入日誌摘要:")
|
||||
success_logs = [log for log in insertion_logs if '[SUCCESS]' in log]
|
||||
skip_logs = [log for log in insertion_logs if '[SKIP]' in log]
|
||||
error_logs = [log for log in insertion_logs if '[ERROR]' in log]
|
||||
|
||||
print(f" 成功日誌: {len(success_logs)}")
|
||||
print(f" 跳過日誌: {len(skip_logs)}")
|
||||
print(f" 錯誤日誌: {len(error_logs)}")
|
||||
|
||||
if success_logs:
|
||||
print(f" 前3條成功日誌:")
|
||||
for log in success_logs[:3]:
|
||||
print(f" {log}")
|
||||
|
||||
if error_logs:
|
||||
print(f" 錯誤日誌:")
|
||||
for log in error_logs:
|
||||
print(f" {log}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 調試失敗: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
debug_actual_insertion()
|
Reference in New Issue
Block a user