Files
Document_Translator/test_prioritized_mapping.py
2025-09-03 15:07:34 +08:00

150 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
測試優化後的翻譯映射邏輯 - 優先使用原始DIFY翻譯
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
from app import create_app
def test_prioritized_mapping():
"""測試優化後的翻譯映射邏輯"""
print("=" * 80)
print("測試優化後的翻譯映射邏輯")
print("預期: 應該優先使用原始DIFY翻譯 (ROW 449)")
print("=" * 80)
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
from app.services.translation_service import ExcelParser
# 取得Excel提取的D2文字
original_file = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\98158984-f335-44f5-a0b4-88fb8ccd5d78") / "original_panjit_98158984.xlsx"
if not original_file.exists():
print("❌ 測試檔案不存在")
return
parser = ExcelParser(str(original_file))
segments = parser.extract_text_segments()
d2_extracted = None
for segment in segments:
if "WB inline" in segment:
d2_extracted = segment
break
if not d2_extracted:
print("❌ 沒有找到D2相關內容")
return
print(f"1. Excel提取的D2文字:")
print(f" {repr(d2_extracted)}")
# 2. 測試新的聯合查詢邏輯
print(f"\n2. 測試新的聯合查詢邏輯")
print("-" * 60)
target_language = 'ko'
normalized_text = d2_extracted.replace('\n', ' ').replace('\r', ' ').strip()
print(f"標準化文字: {repr(normalized_text)}")
result = db.session.execute(sql_text("""
SELECT translated_text, created_at, 'exact' as match_type
FROM dt_translation_cache
WHERE source_text = :exact_text AND target_language = :lang
UNION ALL
SELECT translated_text, created_at, 'normalized' as match_type
FROM dt_translation_cache
WHERE REPLACE(REPLACE(TRIM(source_text), '\n', ' '), '\r', ' ') = :norm_text
AND target_language = :lang
AND source_text != :exact_text
ORDER BY created_at ASC
LIMIT 1
"""), {'exact_text': d2_extracted, 'norm_text': normalized_text, 'lang': target_language})
row = result.fetchone()
if row:
print(f"✅ 聯合查詢找到翻譯:")
print(f" 翻譯內容: {repr(row[0][:50])}...")
print(f" 創建時間: {row[1]}")
print(f" 匹配類型: {row[2]}")
# 檢查這是原始DIFY翻譯還是手動翻譯
if "와이어 본딩" in row[0]:
print(f" 🎯 這是原始DIFY翻譯(特徵: 와이어 본딩)")
success = True
elif "연결" in row[0]:
print(f" ✋ 這是手動補充翻譯 (特徵: 연결)")
success = False
else:
print(f" ❓ 無法判斷翻譯來源")
success = False
else:
print(f"❌ 聯合查詢沒有找到任何翻譯")
success = False
# 3. 查看所有可能的翻譯記錄
print(f"\n3. 查看所有相關的翻譯記錄 (用於對比)")
print("-" * 60)
all_result = db.session.execute(sql_text("""
SELECT id, translated_text, created_at, 'exact' as match_type
FROM dt_translation_cache
WHERE source_text = :exact_text AND target_language = :lang
UNION ALL
SELECT id, translated_text, created_at, 'normalized' as match_type
FROM dt_translation_cache
WHERE REPLACE(REPLACE(TRIM(source_text), '\n', ' '), '\r', ' ') = :norm_text
AND target_language = :lang
AND source_text != :exact_text
ORDER BY created_at ASC
"""), {'exact_text': d2_extracted, 'norm_text': normalized_text, 'lang': target_language})
all_rows = all_result.fetchall()
for i, (row_id, trans, created_at, match_type) in enumerate(all_rows, 1):
print(f"選項{i}: ROW {row_id} ({match_type}匹配, {created_at})")
print(f" 翻譯: {repr(trans[:40])}...")
if row_id == 449:
print(f" 🎯 這是原始DIFY翻譯")
elif row_id == 514:
print(f" ✋ 這是手動補充翻譯")
# 4. 結果評估
print(f"\n4. 結果評估")
print("-" * 60)
if success:
print(f"🎉 成功新邏輯正確地優先選擇了原始DIFY翻譯")
print(f" 現在重新生成韓文Excel檔案應該會使用原始翻譯")
else:
print(f"⚠️ 邏輯需要進一步調整")
print(f" 可能需要檢查SQL查詢或排序邏輯")
print(f"\n" + "=" * 80)
print("優化後映射邏輯測試完成!")
print("=" * 80)
if __name__ == "__main__":
test_prioritized_mapping()