#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 監控實際的DOCX翻譯插入過程 """ import sys import os import tempfile import shutil from pathlib import Path # Fix encoding for Windows console if sys.stdout.encoding != 'utf-8': sys.stdout.reconfigure(encoding='utf-8') if sys.stderr.encoding != 'utf-8': sys.stderr.reconfigure(encoding='utf-8') sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app')) from app import create_app, db from app.services.document_processor import DocumentProcessor, _insert_docx_translations from sqlalchemy import text as sql_text def debug_actual_insertion(): """監控實際的DOCX翻譯插入過程""" app = create_app() with app.app_context(): print("=== 監控實際的DOCX翻譯插入過程 ===") # 原始文件 original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx" # 創建測試副本 test_dir = Path(tempfile.gettempdir()) / "debug_insertion" test_dir.mkdir(exist_ok=True) test_path = test_dir / "debug_original.docx" output_path = test_dir / "debug_translated.docx" shutil.copy2(original_path, test_path) print(f"✅ 創建測試副本: {test_path}") # 創建處理器 processor = DocumentProcessor() # 提取段落 segments = processor.extract_docx_segments(str(test_path)) print(f"📄 提取到 {len(segments)} 個段落") # 構建翻譯映射(只取前5個段落進行詳細調試) target_language = 'en' translation_map = {} debug_segments = segments[:5] # 只調試前5個段落 print(f"\n🔍 構建前5個段落的翻譯映射:") for i, seg in enumerate(debug_segments): result = db.session.execute(sql_text(""" SELECT translated_text FROM dt_translation_cache WHERE source_text = :text AND target_language = :lang ORDER BY created_at DESC LIMIT 1 """), {'text': seg.text, 'lang': target_language}) row = result.fetchone() if row and row[0]: translation_map[(target_language, seg.text)] = row[0] print(f" 段落 {i+1}: ✅ 有翻譯") print(f" 原文: {seg.text[:50]}...") print(f" 譯文: {row[0][:50]}...") else: print(f" 段落 {i+1}: ❌ 無翻譯 - {seg.text[:50]}...") print(f"\n翻譯映射總數: {len(translation_map)}") # 載入文檔並檢查插入前狀態 try: from docx import Document doc = Document(str(test_path)) print(f"\n📊 插入前文檔狀態:") print(f"總段落數: {len(doc.paragraphs)}") # 創建詳細的日誌函數 insertion_logs = [] def detailed_log(msg: str): print(f"[LOG] {msg}") insertion_logs.append(msg) # 執行插入(只處理前5個段落) print(f"\n🔄 開始執行翻譯插入...") ok_count, skip_count = _insert_docx_translations( doc, debug_segments, translation_map, [target_language], detailed_log ) print(f"\n插入結果: 成功 {ok_count}, 跳過 {skip_count}") # 檢查插入後的文檔狀態 print(f"\n📊 插入後文檔狀態:") print(f"總段落數: {len(doc.paragraphs)}") # 詳細檢查前20個段落 insertion_found = 0 marker_found = 0 for i, para in enumerate(doc.paragraphs[:20]): text = para.text.strip() if not text: continue # 檢查是否有翻譯標記 has_marker = any('\u200b' in (r.text or '') for r in para.runs) # 語言檢測 has_chinese = any('\u4e00' <= c <= '\u9fff' for c in text) has_english = any(ord(c) < 128 and c.isalpha() and c not in 'PANJIT' for c in text) if has_marker: marker_found += 1 lang_status = "🏷️ 翻譯標記" elif has_english and not has_chinese: insertion_found += 1 lang_status = "🇺🇸 純英文" elif has_chinese and has_english: lang_status = "🔄 中英混合" elif has_chinese: lang_status = "🇨🇳 純中文" else: lang_status = "❓ 其他" print(f" 段落 {i+1:2d}: {lang_status} - {text[:60]}...") print(f"\n發現的插入內容:") print(f" 純英文段落: {insertion_found}") print(f" 帶翻譯標記的段落: {marker_found}") # 保存文檔 doc.save(str(output_path)) print(f"\n✅ 文檔已保存至: {output_path}") # 重新讀取並驗證 doc2 = Document(str(output_path)) print(f"\n📊 保存後重新讀取驗證:") print(f"總段落數: {len(doc2.paragraphs)}") saved_insertion_found = 0 saved_marker_found = 0 for i, para in enumerate(doc2.paragraphs[:20]): text = para.text.strip() if not text: continue has_marker = any('\u200b' in (r.text or '') for r in para.runs) has_chinese = any('\u4e00' <= c <= '\u9fff' for c in text) has_english = any(ord(c) < 128 and c.isalpha() and c not in 'PANJIT' for c in text) if has_marker: saved_marker_found += 1 elif has_english and not has_chinese: saved_insertion_found += 1 print(f"保存後發現的插入內容:") print(f" 純英文段落: {saved_insertion_found}") print(f" 帶翻譯標記的段落: {saved_marker_found}") # 診斷結果 if ok_count > 0 and saved_insertion_found == 0 and saved_marker_found == 0: print(f"\n🚨 關鍵問題發現:") print(f" - 插入函數報告成功插入 {ok_count} 個翻譯") print(f" - 但保存後的文檔中沒有發現任何翻譯內容或標記") print(f" - 問題可能在於:") print(f" 1. _append_after函數實際沒有插入") print(f" 2. 插入位置不正確") print(f" 3. 文檔保存過程有問題") elif ok_count > 0 and (saved_insertion_found > 0 or saved_marker_found > 0): print(f"\n✅ 插入成功!") print(f" - 插入函數報告: {ok_count} 個翻譯") print(f" - 保存後確認: {saved_insertion_found + saved_marker_found} 個翻譯段落") else: print(f"\n⚠️ 無翻譯插入(可能都被跳過)") # 打印插入日誌摘要 print(f"\n📝 插入日誌摘要:") success_logs = [log for log in insertion_logs if '[SUCCESS]' in log] skip_logs = [log for log in insertion_logs if '[SKIP]' in log] error_logs = [log for log in insertion_logs if '[ERROR]' in log] print(f" 成功日誌: {len(success_logs)}") print(f" 跳過日誌: {len(skip_logs)}") print(f" 錯誤日誌: {len(error_logs)}") if success_logs: print(f" 前3條成功日誌:") for log in success_logs[:3]: print(f" {log}") if error_logs: print(f" 錯誤日誌:") for log in error_logs: print(f" {log}") except Exception as e: print(f"❌ 調試失敗: {e}") if __name__ == "__main__": debug_actual_insertion()