Files
Document_Translator/test_fixed_docx_translation.py
2025-09-03 09:05:51 +08:00

150 lines
5.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
測試修復後的DOCX翻譯功能
"""
import sys
import os
from pathlib import Path
# Fix encoding for Windows console
if sys.stdout.encoding != 'utf-8':
sys.stdout.reconfigure(encoding='utf-8')
if sys.stderr.encoding != 'utf-8':
sys.stderr.reconfigure(encoding='utf-8')
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
from app import create_app, db
from app.services.translation_service import DocxParser
import tempfile
def test_fixed_docx_translation():
"""測試修復後的DOCX翻譯功能"""
app = create_app()
with app.app_context():
print("=== 測試修復後的DOCX翻譯功能 ===")
# 使用現有的DOCX文件測試
original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx"
if not Path(original_path).exists():
print(f"原始文件不存在: {original_path}")
return
print(f"使用原始文件: {original_path}")
# 創建解析器
parser = DocxParser(original_path)
# 測試輸出目錄
output_dir = Path(tempfile.gettempdir()) / "test_docx_translation"
output_dir.mkdir(exist_ok=True)
print(f"輸出目錄: {output_dir}")
# 測試英文翻譯生成
print(f"\n🔄 測試英文翻譯生成...")
try:
# 使用空的translations字典因為我們現在從快取讀取
empty_translations = {}
en_output_path = parser.generate_translated_document(
empty_translations,
'en',
output_dir
)
print(f"✅ 英文翻譯文件生成成功: {en_output_path}")
# 檢查生成的文件
output_file = Path(en_output_path)
if output_file.exists():
print(f"文件大小: {output_file.stat().st_size:,} bytes")
# 檢查文件內容
try:
from docx import Document
doc = Document(str(output_file))
paragraphs = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
print(f"總段落數: {len(paragraphs)}")
# 分析語言內容
chinese_count = 0
english_count = 0
for para in paragraphs:
has_chinese = any('\u4e00' <= c <= '\u9fff' for c in para)
has_english = any(ord(c) < 128 and c.isalpha() for c in para)
if has_chinese:
chinese_count += 1
if has_english:
english_count += 1
print(f"含中文段落: {chinese_count}")
print(f"含英文段落: {english_count}")
# 顯示一些範例段落
print(f"\n📄 前5段落範例:")
for i, para in enumerate(paragraphs[:5]):
has_chinese = any('\u4e00' <= c <= '\u9fff' for c in para)
has_english = any(ord(c) < 128 and c.isalpha() for c in para)
status = ""
if has_chinese and has_english:
status = "🔄 中英混合"
elif has_english:
status = "🇺🇸 純英文"
elif has_chinese:
status = "🇨🇳 純中文"
else:
status = "❓ 未知"
print(f" 段落 {i+1}: {status} - {para[:80]}...")
# 判斷翻譯效果
if english_count > chinese_count:
print(f"\n✅ 翻譯效果良好 - 英文段落多於中文段落")
elif english_count > 0:
print(f"\n⚠️ 翻譯部分成功 - 有英文內容但仍有很多中文")
else:
print(f"\n❌ 翻譯失敗 - 沒有英文內容")
except Exception as e:
print(f"❌ 讀取生成文件失敗: {e}")
else:
print(f"❌ 生成的文件不存在")
except Exception as e:
print(f"❌ 英文翻譯生成失敗: {e}")
# 測試越南文翻譯生成
print(f"\n🔄 測試越南文翻譯生成...")
try:
vi_output_path = parser.generate_translated_document(
empty_translations,
'vi',
output_dir
)
print(f"✅ 越南文翻譯文件生成成功: {vi_output_path}")
# 檢查生成的文件大小
output_file = Path(vi_output_path)
if output_file.exists():
print(f"文件大小: {output_file.stat().st_size:,} bytes")
else:
print(f"❌ 生成的文件不存在")
except Exception as e:
print(f"❌ 越南文翻譯生成失敗: {e}")
print(f"\n🏁 測試完成")
if __name__ == "__main__":
test_fixed_docx_translation()