120 lines
3.8 KiB
Python
120 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
測試Excel翻譯修正效果
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# 設定編碼
|
||
sys.stdout.reconfigure(encoding='utf-8')
|
||
|
||
from pathlib import Path
|
||
from app.services.translation_service import ExcelParser
|
||
|
||
def test_excel_translation_fix():
|
||
"""測試Excel翻譯修正效果"""
|
||
|
||
print("=" * 80)
|
||
print("測試Excel翻譯修正效果")
|
||
print("=" * 80)
|
||
|
||
# 文件路徑
|
||
excel_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f0b78200-2c5e-41a4-bac8-1536f92529e9")
|
||
original_file = excel_dir / "original_panjit_f0b78200.xlsx"
|
||
|
||
if not original_file.exists():
|
||
print(f"原始文件不存在: {original_file}")
|
||
return
|
||
|
||
# 創建解析器實例
|
||
parser = ExcelParser(str(original_file))
|
||
|
||
print("\n1. 測試修正後的should_translate函數")
|
||
print("-" * 60)
|
||
|
||
# 測試關鍵詞彙
|
||
test_texts = [
|
||
"製程", # A1儲存格,之前未翻譯
|
||
"主要特點", # C1儲存格
|
||
"優勢亮點", # D1儲存格
|
||
"AB", # 2個英文字母
|
||
"123", # 純數字
|
||
"工藝", # 2個中文字符
|
||
"Epoxy 膠黏(導電/導熱銀膠)" # B3儲存格
|
||
]
|
||
|
||
for text in test_texts:
|
||
should_translate = parser._should_translate(text, 'auto')
|
||
has_cjk = parser._has_cjk(text)
|
||
print(f"'{text}': should_translate={should_translate}, has_cjk={has_cjk}, len={len(text)}")
|
||
|
||
print("\n2. 測試提取的文字片段")
|
||
print("-" * 60)
|
||
|
||
segments = parser.extract_text_segments()
|
||
print(f"修正後提取到 {len(segments)} 個文字片段")
|
||
|
||
# 檢查A1是否被包含
|
||
a1_content = "製程"
|
||
if a1_content in segments:
|
||
print(f"✅ A1內容 '{a1_content}' 已被包含在提取列表中")
|
||
else:
|
||
print(f"❌ A1內容 '{a1_content}' 仍未被包含在提取列表中")
|
||
|
||
# 顯示前10個片段
|
||
print("\n前10個提取片段:")
|
||
for i, segment in enumerate(segments[:10]):
|
||
safe_segment = repr(segment)
|
||
print(f" {i+1:2d}. {safe_segment}")
|
||
|
||
print("\n3. 測試翻譯快取映射邏輯(模擬)")
|
||
print("-" * 60)
|
||
|
||
# 模擬翻譯映射過程
|
||
from app import create_app
|
||
app = create_app()
|
||
|
||
with app.app_context():
|
||
from sqlalchemy import text as sql_text
|
||
from app import db
|
||
|
||
target_language = 'ja' # 日文
|
||
tmap = {}
|
||
found_count = 0
|
||
|
||
print(f"查詢翻譯快取中的 {target_language} 翻譯...")
|
||
|
||
for original_text in segments[:10]: # 只檢查前10個
|
||
result = db.session.execute(sql_text("""
|
||
SELECT translated_text
|
||
FROM dt_translation_cache
|
||
WHERE source_text = :text AND target_language = :lang
|
||
ORDER BY created_at DESC
|
||
LIMIT 1
|
||
"""), {'text': original_text, 'lang': target_language})
|
||
|
||
row = result.fetchone()
|
||
if row and row[0]:
|
||
tmap[original_text] = row[0]
|
||
print(f"✅ '{original_text[:20]}...' -> '{row[0][:20]}...'")
|
||
found_count += 1
|
||
else:
|
||
print(f"❌ 未找到翻譯: '{original_text[:30]}...'")
|
||
|
||
print(f"\n翻譯映射結果: {found_count}/{min(10, len(segments))} 個片段找到翻譯")
|
||
|
||
# 特別檢查A1
|
||
if a1_content in tmap:
|
||
print(f"✅ A1內容 '{a1_content}' 的翻譯: '{tmap[a1_content]}'")
|
||
else:
|
||
print(f"❌ A1內容 '{a1_content}' 沒有找到翻譯")
|
||
|
||
print("\n" + "=" * 80)
|
||
print("測試完成!")
|
||
print("=" * 80)
|
||
|
||
if __name__ == "__main__":
|
||
test_excel_translation_fix() |