#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 測試修復後的DOCX翻譯功能 """ import sys import os from pathlib import Path # Fix encoding for Windows console if sys.stdout.encoding != 'utf-8': sys.stdout.reconfigure(encoding='utf-8') if sys.stderr.encoding != 'utf-8': sys.stderr.reconfigure(encoding='utf-8') sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app')) from app import create_app, db from app.services.translation_service import DocxParser import tempfile def test_fixed_docx_translation(): """測試修復後的DOCX翻譯功能""" app = create_app() with app.app_context(): print("=== 測試修復後的DOCX翻譯功能 ===") # 使用現有的DOCX文件測試 original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx" if not Path(original_path).exists(): print(f"原始文件不存在: {original_path}") return print(f"使用原始文件: {original_path}") # 創建解析器 parser = DocxParser(original_path) # 測試輸出目錄 output_dir = Path(tempfile.gettempdir()) / "test_docx_translation" output_dir.mkdir(exist_ok=True) print(f"輸出目錄: {output_dir}") # 測試英文翻譯生成 print(f"\n🔄 測試英文翻譯生成...") try: # 使用空的translations字典,因為我們現在從快取讀取 empty_translations = {} en_output_path = parser.generate_translated_document( empty_translations, 'en', output_dir ) print(f"✅ 英文翻譯文件生成成功: {en_output_path}") # 檢查生成的文件 output_file = Path(en_output_path) if output_file.exists(): print(f"文件大小: {output_file.stat().st_size:,} bytes") # 檢查文件內容 try: from docx import Document doc = Document(str(output_file)) paragraphs = [p.text.strip() for p in doc.paragraphs if p.text.strip()] print(f"總段落數: {len(paragraphs)}") # 分析語言內容 chinese_count = 0 english_count = 0 for para in paragraphs: has_chinese = any('\u4e00' <= c <= '\u9fff' for c in para) has_english = any(ord(c) < 128 and c.isalpha() for c in para) if has_chinese: chinese_count += 1 if has_english: english_count += 1 print(f"含中文段落: {chinese_count}") print(f"含英文段落: {english_count}") # 顯示一些範例段落 print(f"\n📄 前5段落範例:") for i, para in enumerate(paragraphs[:5]): has_chinese = any('\u4e00' <= c <= '\u9fff' for c in para) has_english = any(ord(c) < 128 and c.isalpha() for c in para) status = "" if has_chinese and has_english: status = "🔄 中英混合" elif has_english: status = "🇺🇸 純英文" elif has_chinese: status = "🇨🇳 純中文" else: status = "❓ 未知" print(f" 段落 {i+1}: {status} - {para[:80]}...") # 判斷翻譯效果 if english_count > chinese_count: print(f"\n✅ 翻譯效果良好 - 英文段落多於中文段落") elif english_count > 0: print(f"\n⚠️ 翻譯部分成功 - 有英文內容但仍有很多中文") else: print(f"\n❌ 翻譯失敗 - 沒有英文內容") except Exception as e: print(f"❌ 讀取生成文件失敗: {e}") else: print(f"❌ 生成的文件不存在") except Exception as e: print(f"❌ 英文翻譯生成失敗: {e}") # 測試越南文翻譯生成 print(f"\n🔄 測試越南文翻譯生成...") try: vi_output_path = parser.generate_translated_document( empty_translations, 'vi', output_dir ) print(f"✅ 越南文翻譯文件生成成功: {vi_output_path}") # 檢查生成的文件大小 output_file = Path(vi_output_path) if output_file.exists(): print(f"文件大小: {output_file.stat().st_size:,} bytes") else: print(f"❌ 生成的文件不存在") except Exception as e: print(f"❌ 越南文翻譯生成失敗: {e}") print(f"\n🏁 測試完成") if __name__ == "__main__": test_fixed_docx_translation()