Files
Document_Translator/test_excel_fix.py
2025-09-03 15:07:34 +08:00

120 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
測試Excel翻譯修正效果
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
from app.services.translation_service import ExcelParser
def test_excel_translation_fix():
"""測試Excel翻譯修正效果"""
print("=" * 80)
print("測試Excel翻譯修正效果")
print("=" * 80)
# 文件路徑
excel_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f0b78200-2c5e-41a4-bac8-1536f92529e9")
original_file = excel_dir / "original_panjit_f0b78200.xlsx"
if not original_file.exists():
print(f"原始文件不存在: {original_file}")
return
# 創建解析器實例
parser = ExcelParser(str(original_file))
print("\n1. 測試修正後的should_translate函數")
print("-" * 60)
# 測試關鍵詞彙
test_texts = [
"製程", # A1儲存格之前未翻譯
"主要特點", # C1儲存格
"優勢亮點", # D1儲存格
"AB", # 2個英文字母
"123", # 純數字
"工藝", # 2個中文字符
"Epoxy 膠黏(導電/導熱銀膠)" # B3儲存格
]
for text in test_texts:
should_translate = parser._should_translate(text, 'auto')
has_cjk = parser._has_cjk(text)
print(f"'{text}': should_translate={should_translate}, has_cjk={has_cjk}, len={len(text)}")
print("\n2. 測試提取的文字片段")
print("-" * 60)
segments = parser.extract_text_segments()
print(f"修正後提取到 {len(segments)} 個文字片段")
# 檢查A1是否被包含
a1_content = "製程"
if a1_content in segments:
print(f"✅ A1內容 '{a1_content}' 已被包含在提取列表中")
else:
print(f"❌ A1內容 '{a1_content}' 仍未被包含在提取列表中")
# 顯示前10個片段
print("\n前10個提取片段:")
for i, segment in enumerate(segments[:10]):
safe_segment = repr(segment)
print(f" {i+1:2d}. {safe_segment}")
print("\n3. 測試翻譯快取映射邏輯(模擬)")
print("-" * 60)
# 模擬翻譯映射過程
from app import create_app
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
target_language = 'ja' # 日文
tmap = {}
found_count = 0
print(f"查詢翻譯快取中的 {target_language} 翻譯...")
for original_text in segments[:10]: # 只檢查前10個
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': original_text, 'lang': target_language})
row = result.fetchone()
if row and row[0]:
tmap[original_text] = row[0]
print(f"'{original_text[:20]}...' -> '{row[0][:20]}...'")
found_count += 1
else:
print(f"❌ 未找到翻譯: '{original_text[:30]}...'")
print(f"\n翻譯映射結果: {found_count}/{min(10, len(segments))} 個片段找到翻譯")
# 特別檢查A1
if a1_content in tmap:
print(f"✅ A1內容 '{a1_content}' 的翻譯: '{tmap[a1_content]}'")
else:
print(f"❌ A1內容 '{a1_content}' 沒有找到翻譯")
print("\n" + "=" * 80)
print("測試完成!")
print("=" * 80)
if __name__ == "__main__":
test_excel_translation_fix()