Document_Translator/test_prioritized_mapping.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
測試優化後的翻譯映射邏輯 - 優先使用原始DIFY翻譯
"""

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')

from pathlib import Path
from app import create_app

def test_prioritized_mapping():
    """測試優化後的翻譯映射邏輯"""

    print("=" * 80)
    print("測試優化後的翻譯映射邏輯")
    print("預期: 應該優先使用原始DIFY翻譯 (ROW 449)")
    print("=" * 80)

    app = create_app()

    with app.app_context():
        from sqlalchemy import text as sql_text
        from app import db
        from app.services.translation_service import ExcelParser

        # 取得Excel提取的D2文字
        original_file = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\98158984-f335-44f5-a0b4-88fb8ccd5d78") / "original_panjit_98158984.xlsx"

        if not original_file.exists():
            print("❌ 測試檔案不存在")
            return

        parser = ExcelParser(str(original_file))
        segments = parser.extract_text_segments()

        d2_extracted = None
        for segment in segments:
            if "WB inline" in segment:
                d2_extracted = segment
                break

        if not d2_extracted:
            print("❌ 沒有找到D2相關內容")
            return

        print(f"1. Excel提取的D2文字:")
        print(f"   {repr(d2_extracted)}")

        # 2. 測試新的聯合查詢邏輯
        print(f"\n2. 測試新的聯合查詢邏輯")
        print("-" * 60)

        target_language = 'ko'
        normalized_text = d2_extracted.replace('\n', ' ').replace('\r', ' ').strip()

        print(f"標準化文字: {repr(normalized_text)}")

        result = db.session.execute(sql_text("""
            SELECT translated_text, created_at, 'exact' as match_type
            FROM dt_translation_cache
            WHERE source_text = :exact_text AND target_language = :lang

            UNION ALL

            SELECT translated_text, created_at, 'normalized' as match_type
            FROM dt_translation_cache
            WHERE REPLACE(REPLACE(TRIM(source_text), '\n', ' '), '\r', ' ') = :norm_text
            AND target_language = :lang
            AND source_text != :exact_text

            ORDER BY created_at ASC
            LIMIT 1
        """), {'exact_text': d2_extracted, 'norm_text': normalized_text, 'lang': target_language})

        row = result.fetchone()

        if row:
            print(f"✅ 聯合查詢找到翻譯:")
            print(f"   翻譯內容: {repr(row[0][:50])}...")
            print(f"   創建時間: {row[1]}")
            print(f"   匹配類型: {row[2]}")

            # 檢查這是原始DIFY翻譯還是手動翻譯
            if "와이어 본딩" in row[0]:
                print(f"   🎯 這是原始DIFY翻譯！(特徵: 와이어 본딩)")
                success = True
            elif "연결" in row[0]:
                print(f"   ✋ 這是手動補充翻譯 (特徵: 연결)")
                success = False
            else:
                print(f"   ❓ 無法判斷翻譯來源")
                success = False
        else:
            print(f"❌ 聯合查詢沒有找到任何翻譯")
            success = False

        # 3. 查看所有可能的翻譯記錄
        print(f"\n3. 查看所有相關的翻譯記錄 (用於對比)")
        print("-" * 60)

        all_result = db.session.execute(sql_text("""
            SELECT id, translated_text, created_at, 'exact' as match_type
            FROM dt_translation_cache
            WHERE source_text = :exact_text AND target_language = :lang

            UNION ALL

            SELECT id, translated_text, created_at, 'normalized' as match_type
            FROM dt_translation_cache
            WHERE REPLACE(REPLACE(TRIM(source_text), '\n', ' '), '\r', ' ') = :norm_text
            AND target_language = :lang
            AND source_text != :exact_text

            ORDER BY created_at ASC
        """), {'exact_text': d2_extracted, 'norm_text': normalized_text, 'lang': target_language})

        all_rows = all_result.fetchall()

        for i, (row_id, trans, created_at, match_type) in enumerate(all_rows, 1):
            print(f"選項{i}: ROW {row_id} ({match_type}匹配, {created_at})")
            print(f"   翻譯: {repr(trans[:40])}...")

            if row_id == 449:
                print(f"   🎯 這是原始DIFY翻譯")
            elif row_id == 514:
                print(f"   ✋ 這是手動補充翻譯")

        # 4. 結果評估
        print(f"\n4. 結果評估")
        print("-" * 60)

        if success:
            print(f"🎉 成功！新邏輯正確地優先選擇了原始DIFY翻譯")
            print(f"   現在重新生成韓文Excel檔案應該會使用原始翻譯")
        else:
            print(f"⚠️  邏輯需要進一步調整")
            print(f"   可能需要檢查SQL查詢或排序邏輯")

    print(f"\n" + "=" * 80)
    print("優化後映射邏輯測試完成！")
    print("=" * 80)

if __name__ == "__main__":
    test_prioritized_mapping()