#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 檢查DOCX任務的具體翻譯對應 """ import sys import os # Fix encoding for Windows console if sys.stdout.encoding != 'utf-8': sys.stdout.reconfigure(encoding='utf-8') if sys.stderr.encoding != 'utf-8': sys.stderr.reconfigure(encoding='utf-8') sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app')) from app import create_app, db from sqlalchemy import text from app.services.translation_service import DocxParser def check_docx_specific_translations(): """檢查DOCX任務的具體翻譯對應""" app = create_app() with app.app_context(): print("=== 檢查DOCX任務的具體翻譯對應 ===") # 原始文件路徑 original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx" # 提取原始文檔段落 parser = DocxParser(original_path) segments = parser.extract_segments_with_context() text_segments = [seg.text for seg in segments if seg.text.strip()] print(f"原始文檔有 {len(text_segments)} 個文本段落") # 查找這些段落在快取中對應的翻譯 print(f"\n=== 檢查每個段落的翻譯狀況 ===") total_segments = len(text_segments) found_en = 0 found_vi = 0 for i, segment_text in enumerate(text_segments): # 查找英文翻譯 en_result = db.session.execute(text(""" SELECT translated_text, created_at FROM dt_translation_cache WHERE source_text = :text AND target_language = 'en' ORDER BY created_at DESC LIMIT 1 """), {'text': segment_text}) en_row = en_result.fetchone() # 查找越南文翻譯 vi_result = db.session.execute(text(""" SELECT translated_text, created_at FROM dt_translation_cache WHERE source_text = :text AND target_language = 'vi' ORDER BY created_at DESC LIMIT 1 """), {'text': segment_text}) vi_row = vi_result.fetchone() status = "" if en_row: found_en += 1 status += "EN✅ " else: status += "EN❌ " if vi_row: found_vi += 1 status += "VI✅ " else: status += "VI❌ " print(f"段落 {i+1:3d}: {status} {segment_text[:50]}...") # 顯示翻譯內容(如果有的話) if en_row and len(en_row[0]) > 0: en_text = en_row[0] # 檢查是否真的是英文 has_english = any(ord(c) < 128 and c.isalpha() for c in en_text) has_chinese = any('\u4e00' <= c <= '\u9fff' for c in en_text) if has_english and not has_chinese: print(f" EN: ✅ {en_text[:60]}...") elif has_chinese: print(f" EN: ❌ 仍是中文: {en_text[:60]}...") else: print(f" EN: ❓ 未知: {en_text[:60]}...") if vi_row and len(vi_row[0]) > 0: vi_text = vi_row[0] has_vietnamese = any('\u00C0' <= c <= '\u1EF9' for c in vi_text) has_chinese = any('\u4e00' <= c <= '\u9fff' for c in vi_text) if has_vietnamese and not has_chinese: print(f" VI: ✅ {vi_text[:60]}...") elif has_chinese: print(f" VI: ❌ 仍是中文: {vi_text[:60]}...") else: print(f" VI: ❓ 未知: {vi_text[:60]}...") print(f"\n📊 統計結果:") print(f" 總段落數: {total_segments}") print(f" 有英文翻譯: {found_en} ({found_en/total_segments*100:.1f}%)") print(f" 有越南文翻譯: {found_vi} ({found_vi/total_segments*100:.1f}%)") if found_en < total_segments * 0.5: print(f" ❌ 翻譯覆蓋率太低,可能是翻譯流程有問題") else: print(f" ✅ 翻譯覆蓋率正常") if __name__ == "__main__": check_docx_specific_translations()