5th_fix excel problem

2025-09-03 15:07:34 +08:00
parent cce3fd4925
commit 5fd0671b4f
28 changed files with 4484 additions and 97 deletions
--- a/test_fixed_translation.py
+++ b/test_fixed_translation.py
@@ -1,96 +1,176 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-Test the fixed translation service
+測試修正後的翻譯功能 - 重新生成翻譯文件
 """

 import sys
 import os
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

-# Fix encoding for Windows console
-if sys.stdout.encoding != 'utf-8':
-    sys.stdout.reconfigure(encoding='utf-8')
-if sys.stderr.encoding != 'utf-8':
-    sys.stderr.reconfigure(encoding='utf-8')
+# 設定編碼
+sys.stdout.reconfigure(encoding='utf-8')

-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
+from pathlib import Path
+from app.services.translation_service import ExcelParser
+import openpyxl

-from app import create_app
-from app.services.translation_service import TranslationService
-from app.models.job import TranslationJob
-
-def test_fixed_translation_service():
-    """Test the fixed translation service on a real job"""
+def test_fixed_translation():
+    """測試修正後的翻譯功能"""
    
+    print("=" * 80)
+    print("測試修正後的Excel翻譯功能")
+    print("=" * 80)
+    
+    # 使用現有的測試文件
+    test_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\185bb457-b703-4e98-94a2-fde072b895c4")
+    original_file = test_dir / "original_panjit_185bb457.xlsx"
+    
+    if not original_file.exists():
+        print(f"原始文件不存在: {original_file}")
+        return
+    
+    # 創建一個新的翻譯文件名稱
+    new_translated_file = test_dir / "original_panjit_185bb457_ja_translated_fixed.xlsx"
+    
+    print(f"✅ 使用原始文件: {original_file.name}")
+    print(f"✅ 生成新翻譯文件: {new_translated_file.name}")
+    
+    # 1. 驗證提取功能
+    print(f"\n1. 驗證文字提取功能")
+    print("-" * 60)
+    
+    parser = ExcelParser(str(original_file))
+    segments = parser.extract_text_segments()
+    
+    print(f"提取到 {len(segments)} 個文字片段")
+    
+    # 檢查A1是否在其中
+    a1_content = "製程"
+    if a1_content in segments:
+        print(f"✅ A1內容 '{a1_content}' 已被提取")
+        print(f"   位置: 第{segments.index(a1_content)+1}個")
+    else:
+        print(f"❌ A1內容 '{a1_content}' 仍未被提取")
+        return
+    
+    # 2. 驗證翻譯快取
+    print(f"\n2. 驗證翻譯快取狀況")
+    print("-" * 60)
+    
+    from app import create_app
    app = create_app()
    
    with app.app_context():
-        # Get the most recent job to test with
-        job = TranslationJob.query.order_by(TranslationJob.created_at.desc()).first()
-        
-        if not job:
-            print("No jobs found to test")
-            return
-        
-        print(f"Testing translation service on job: {job.job_uuid}")
-        print(f"Original filename: {job.original_filename}")
-        print(f"Target languages: {job.target_languages}")
-        print(f"File path: {job.file_path}")
-        
-        # Reset job status to PENDING for testing
-        job.status = 'PENDING'
-        job.progress = 0.0
-        job.error_message = None
-        
+        from sqlalchemy import text as sql_text
        from app import db
-        db.session.commit()
        
-        print(f"Reset job status to PENDING")
+        target_language = 'ja'
+        translation_map = {}
+        missing_count = 0
        
-        # Create translation service and test
-        service = TranslationService()
+        for segment in segments:
+            result = db.session.execute(sql_text("""
+                SELECT translated_text 
+                FROM dt_translation_cache 
+                WHERE source_text = :text AND target_language = :lang
+                ORDER BY created_at DESC 
+                LIMIT 1
+            """), {'text': segment, 'lang': target_language})
+            
+            row = result.fetchone()
+            if row:
+                translation_map[segment] = row[0]
+                if segment == a1_content:
+                    print(f"✅ '{segment}' -> '{row[0]}'")
+            else:
+                missing_count += 1
+                if segment == a1_content:
+                    print(f"❌ '{segment}' -> 無翻譯記錄")
+        
+        print(f"翻譯快取命中: {len(translation_map)}/{len(segments)} = {len(translation_map)/len(segments)*100:.1f}%")
+        print(f"缺失翻譯: {missing_count} 個")
+    
+        # 3. 手動生成翻譯文件
+        print(f"\n3. 手動生成翻譯文件")
+        print("-" * 60)
        
        try:
-            print("Starting translation...")
-            result = service.translate_document(job.job_uuid)
-            
-            print(f"Translation completed!")
-            print(f"Result: {result}")
-            
-            # Check the job status
-            db.session.refresh(job)
-            print(f"Final job status: {job.status}")
-            print(f"Progress: {job.progress}%")
-            print(f"Total tokens: {job.total_tokens}")
-            print(f"Total cost: ${job.total_cost}")
-            
-            if job.error_message:
-                print(f"Error message: {job.error_message}")
-            
-            # Check translated files
-            translated_files = job.get_translated_files()
-            print(f"Generated {len(translated_files)} translated files:")
-            for tf in translated_files:
-                print(f"  - {tf.filename} ({tf.language_code}) - Size: {tf.file_size} bytes")
-                
-                # Check if file exists and has content
-                from pathlib import Path
-                if Path(tf.file_path).exists():
-                    size = Path(tf.file_path).stat().st_size
-                    print(f"    File exists with {size} bytes")
-                    
-                    # Quick check if it contains translations (different from original)
-                    if size != job.get_original_file().file_size:
-                        print(f"    ✅ File size differs from original - likely contains translations")
-                    else:
-                        print(f"    ⚠️  File size same as original - may not contain translations")
-                else:
-                    print(f"    ❌ File not found at: {tf.file_path}")
+            # 在app context內使用ExcelParser的generate_translated_document方法
+            translated_file_path = parser.generate_translated_document(
+                translations={}, # 空字典，會使用快取查詢
+                target_language='ja',
+                output_dir=test_dir
+            )
            
+            # 重新命名為我們的測試檔名
+            import shutil
+            if Path(translated_file_path).exists():
+                shutil.move(translated_file_path, str(new_translated_file))
+                print(f"✅ 翻譯文件已生成: {new_translated_file.name}")
+            else:
+                print(f"❌ 翻譯文件生成失敗")
+                return
        except Exception as e:
-            print(f"Translation failed with error: {e}")
-            import traceback
-            traceback.print_exc()
+            print(f"❌ 生成翻譯文件時出錯: {str(e)}")
+            return
+    
+    # 4. 驗證翻譯結果
+    print(f"\n4. 驗證翻譯結果")
+    print("-" * 60)
+    
+    wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
+    wb_trans = openpyxl.load_workbook(str(new_translated_file), data_only=False)
+    
+    # 檢查A1儲存格
+    a1_orig = wb_orig.active['A1'].value
+    a1_trans = wb_trans.active['A1'].value
+    
+    print(f"A1儲存格檢查:")
+    print(f"  原始: {repr(a1_orig)}")
+    print(f"  翻譯: {repr(a1_trans)}")
+    
+    if isinstance(a1_trans, str) and '\n' in a1_trans:
+        lines = a1_trans.split('\n')
+        if len(lines) >= 2 and lines[0].strip() == a1_content:
+            print(f"  ✅ A1翻譯成功！")
+            print(f"     原文: '{lines[0]}'")
+            print(f"     譯文: '{lines[1]}'")
+            success = True
+        else:
+            print(f"  ⚠️ A1格式異常")
+            success = False
+    else:
+        print(f"  ❌ A1未翻譯")
+        success = False
+    
+    # 檢查其他重要儲存格
+    test_cells = ['C1', 'D1', 'B2', 'C2']
+    translated_count = 0
+    
+    for cell_name in test_cells:
+        orig_val = wb_orig.active[cell_name].value
+        trans_val = wb_trans.active[cell_name].value
+        
+        if orig_val and isinstance(trans_val, str) and '\n' in trans_val:
+            translated_count += 1
+    
+    print(f"\n其他儲存格翻譯狀況: {translated_count}/{len(test_cells)} 個成功翻譯")
+    
+    wb_orig.close()
+    wb_trans.close()
+    
+    # 5. 最終結果
+    print(f"\n" + "=" * 80)
+    if success:
+        print("🎉 測試成功！A1儲存格翻譯問題已修復！")
+        print(f"   新翻譯文件: {new_translated_file}")
+        print("   - ✅ 文字提取修正生效")
+        print("   - ✅ 翻譯快取記錄已補充")
+        print("   - ✅ A1儲存格翻譯正常")
+    else:
+        print("❌ 測試失敗！需要進一步排查問題。")
+    print("=" * 80)

 if __name__ == "__main__":
-    test_fixed_translation_service()
+    test_fixed_translation()