Files
Document_Translator/test_fixed_mapping_logic.py
2025-09-03 15:07:34 +08:00

166 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
測試修復後的翻譯映射邏輯
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
from app import create_app
def test_fixed_mapping_logic():
"""測試修復後的翻譯映射邏輯"""
print("=" * 80)
print("測試修復後的翻譯映射邏輯")
print("預期結果: 應該找到原始DIFY翻譯 (ROW 449)")
print("=" * 80)
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
from app.services.translation_service import ExcelParser
# 1. 取得Excel提取的D2文字
original_file = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\98158984-f335-44f5-a0b4-88fb8ccd5d78") / "original_panjit_98158984.xlsx"
if not original_file.exists():
print("❌ 測試檔案不存在")
return
parser = ExcelParser(str(original_file))
segments = parser.extract_text_segments()
d2_extracted = None
for segment in segments:
if "WB inline" in segment:
d2_extracted = segment
break
if not d2_extracted:
print("❌ 沒有找到D2相關內容")
return
print(f"1. Excel提取的D2文字:")
print(f" {repr(d2_extracted)}")
# 2. 測試修復後的查詢邏輯
print(f"\n2. 測試修復後的查詢邏輯")
print("-" * 60)
target_language = 'ko'
# 精確匹配 (應該找到ROW 514)
print(f"步驟1: 精確匹配查詢")
result1 = db.session.execute(sql_text("""
SELECT id, translated_text, created_at
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': d2_extracted, 'lang': target_language})
row1 = result1.fetchone()
if row1:
print(f" ✅ 精確匹配找到: ROW {row1[0]} (時間: {row1[2]})")
print(f" 翻譯: {repr(row1[1][:40])}...")
else:
print(f" ❌ 精確匹配失敗")
# 標準化匹配 (應該找到ROW 449)
print(f"\n步驟2: 標準化匹配查詢")
normalized_text = d2_extracted.replace('\n', ' ').replace('\r', ' ').strip()
print(f" 標準化文字: {repr(normalized_text)}")
result2 = db.session.execute(sql_text("""
SELECT id, translated_text, created_at
FROM dt_translation_cache
WHERE REPLACE(REPLACE(TRIM(source_text), '\n', ' '), '\r', ' ') = :text
AND target_language = :lang
ORDER BY created_at ASC
LIMIT 1
"""), {'text': normalized_text, 'lang': target_language})
row2 = result2.fetchone()
if row2:
print(f" ✅ 標準化匹配找到: ROW {row2[0]} (時間: {row2[2]})")
print(f" 翻譯: {repr(row2[1][:40])}...")
if row2[0] == 449:
print(f" 🎯 太好了找到原始DIFY翻譯 (ROW 449)")
else:
print(f" ⚠️ 不是原始DIFY翻譯")
else:
print(f" ❌ 標準化匹配也失敗")
# 3. 模擬完整映射邏輯
print(f"\n3. 模擬完整映射邏輯")
print("-" * 60)
# 模擬修復後的查詢邏輯
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': d2_extracted, 'lang': target_language})
row = result.fetchone()
# 如果精確匹配失敗,嘗試標準化匹配
if not row:
normalized_text = d2_extracted.replace('\n', ' ').replace('\r', ' ').strip()
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE REPLACE(REPLACE(TRIM(source_text), '\n', ' '), '\r', ' ') = :text
AND target_language = :lang
ORDER BY created_at ASC
LIMIT 1
"""), {'text': normalized_text, 'lang': target_language})
row = result.fetchone()
print(f" 使用標準化匹配")
else:
print(f" 使用精確匹配")
if row and row[0]:
print(f" ✅ 最終找到翻譯: {repr(row[0][:50])}...")
# 檢查這是否為原始DIFY翻譯的特徵
if "와이어 본딩" in row[0] or "처리 속도" in row[0]:
print(f" 🎯 這是原始DIFY翻譯")
print(f" 特徵: 包含 '와이어 본딩''처리 속도'")
elif "연결" in row[0] and "단축" in row[0]:
print(f" ✋ 這是手動補充翻譯")
print(f" 特徵: 包含 '연결''단축'")
else:
print(f" ❓ 無法判斷翻譯來源")
else:
print(f" ❌ 最終也沒找到翻譯")
# 4. 建議下一步
print(f"\n4. 建議下一步")
print("-" * 60)
if row2 and row2[0] == 449:
print(f"✅ 修復成功系統現在能找到原始DIFY翻譯")
print(f" 建議: 重新生成韓文翻譯檔案應該會使用原始DIFY翻譯")
else:
print(f"⚠️ 修復不完全,還需要進一步調整")
print(f" 可能需要檢查SQL語法或邏輯")
print(f"\n" + "=" * 80)
print("修復後映射邏輯測試完成!")
print("=" * 80)
if __name__ == "__main__":
test_fixed_mapping_logic()