5th_fix excel problem

2025-09-03 15:07:34 +08:00
parent cce3fd4925
commit 5fd0671b4f
28 changed files with 4484 additions and 97 deletions
--- a/test_prioritized_mapping.py
+++ b/test_prioritized_mapping.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+測試優化後的翻譯映射邏輯 - 優先使用原始DIFY翻譯
+"""
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+# 設定編碼
+sys.stdout.reconfigure(encoding='utf-8')
+
+from pathlib import Path
+from app import create_app
+
+def test_prioritized_mapping():
+    """測試優化後的翻譯映射邏輯"""
+    
+    print("=" * 80)
+    print("測試優化後的翻譯映射邏輯")
+    print("預期: 應該優先使用原始DIFY翻譯 (ROW 449)")
+    print("=" * 80)
+    
+    app = create_app()
+    
+    with app.app_context():
+        from sqlalchemy import text as sql_text
+        from app import db
+        from app.services.translation_service import ExcelParser
+        
+        # 取得Excel提取的D2文字
+        original_file = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\98158984-f335-44f5-a0b4-88fb8ccd5d78") / "original_panjit_98158984.xlsx"
+        
+        if not original_file.exists():
+            print("❌ 測試檔案不存在")
+            return
+        
+        parser = ExcelParser(str(original_file))
+        segments = parser.extract_text_segments()
+        
+        d2_extracted = None
+        for segment in segments:
+            if "WB inline" in segment:
+                d2_extracted = segment
+                break
+        
+        if not d2_extracted:
+            print("❌ 沒有找到D2相關內容")
+            return
+        
+        print(f"1. Excel提取的D2文字:")
+        print(f"   {repr(d2_extracted)}")
+        
+        # 2. 測試新的聯合查詢邏輯
+        print(f"\n2. 測試新的聯合查詢邏輯")
+        print("-" * 60)
+        
+        target_language = 'ko'
+        normalized_text = d2_extracted.replace('\n', ' ').replace('\r', ' ').strip()
+        
+        print(f"標準化文字: {repr(normalized_text)}")
+        
+        result = db.session.execute(sql_text("""
+            SELECT translated_text, created_at, 'exact' as match_type
+            FROM dt_translation_cache 
+            WHERE source_text = :exact_text AND target_language = :lang
+            
+            UNION ALL
+            
+            SELECT translated_text, created_at, 'normalized' as match_type
+            FROM dt_translation_cache 
+            WHERE REPLACE(REPLACE(TRIM(source_text), '\n', ' '), '\r', ' ') = :norm_text 
+            AND target_language = :lang
+            AND source_text != :exact_text
+            
+            ORDER BY created_at ASC 
+            LIMIT 1
+        """), {'exact_text': d2_extracted, 'norm_text': normalized_text, 'lang': target_language})
+        
+        row = result.fetchone()
+        
+        if row:
+            print(f"✅ 聯合查詢找到翻譯:")
+            print(f"   翻譯內容: {repr(row[0][:50])}...")
+            print(f"   創建時間: {row[1]}")
+            print(f"   匹配類型: {row[2]}")
+            
+            # 檢查這是原始DIFY翻譯還是手動翻譯
+            if "와이어 본딩" in row[0]:
+                print(f"   🎯 這是原始DIFY翻譯！(特徵: 와이어 본딩)")
+                success = True
+            elif "연결" in row[0]:
+                print(f"   ✋ 這是手動補充翻譯 (特徵: 연결)")
+                success = False
+            else:
+                print(f"   ❓ 無法判斷翻譯來源")
+                success = False
+        else:
+            print(f"❌ 聯合查詢沒有找到任何翻譯")
+            success = False
+        
+        # 3. 查看所有可能的翻譯記錄
+        print(f"\n3. 查看所有相關的翻譯記錄 (用於對比)")
+        print("-" * 60)
+        
+        all_result = db.session.execute(sql_text("""
+            SELECT id, translated_text, created_at, 'exact' as match_type
+            FROM dt_translation_cache 
+            WHERE source_text = :exact_text AND target_language = :lang
+            
+            UNION ALL
+            
+            SELECT id, translated_text, created_at, 'normalized' as match_type
+            FROM dt_translation_cache 
+            WHERE REPLACE(REPLACE(TRIM(source_text), '\n', ' '), '\r', ' ') = :norm_text 
+            AND target_language = :lang
+            AND source_text != :exact_text
+            
+            ORDER BY created_at ASC
+        """), {'exact_text': d2_extracted, 'norm_text': normalized_text, 'lang': target_language})
+        
+        all_rows = all_result.fetchall()
+        
+        for i, (row_id, trans, created_at, match_type) in enumerate(all_rows, 1):
+            print(f"選項{i}: ROW {row_id} ({match_type}匹配, {created_at})")
+            print(f"   翻譯: {repr(trans[:40])}...")
+            
+            if row_id == 449:
+                print(f"   🎯 這是原始DIFY翻譯")
+            elif row_id == 514:
+                print(f"   ✋ 這是手動補充翻譯")
+        
+        # 4. 結果評估
+        print(f"\n4. 結果評估")
+        print("-" * 60)
+        
+        if success:
+            print(f"🎉 成功！新邏輯正確地優先選擇了原始DIFY翻譯")
+            print(f"   現在重新生成韓文Excel檔案應該會使用原始翻譯")
+        else:
+            print(f"⚠️  邏輯需要進一步調整")
+            print(f"   可能需要檢查SQL查詢或排序邏輯")
+    
+    print(f"\n" + "=" * 80)
+    print("優化後映射邏輯測試完成！")
+    print("=" * 80)
+
+if __name__ == "__main__":
+    test_prioritized_mapping()