5th_fix excel problem

2025-09-03 15:07:34 +08:00
parent cce3fd4925
commit 5fd0671b4f
28 changed files with 4484 additions and 97 deletions
--- a/debug_real_production_issue.py
+++ b/debug_real_production_issue.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+調試實際生產環境中的翻譯問題
+"""
+
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+# 設定編碼
+sys.stdout.reconfigure(encoding='utf-8')
+
+from pathlib import Path
+import openpyxl
+from app.services.translation_service import ExcelParser
+
+def debug_real_production_issue():
+    """調試實際生產環境的翻譯問題"""
+    
+    print("=" * 80)
+    print("調試實際生產環境翻譯問題")
+    print("新上傳UUID: f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
+    print("=" * 80)
+    
+    # 實際生產檔案路徑
+    prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
+    original_file = prod_dir / "original_panjit_f8b0febc.xlsx"
+    translated_file = prod_dir / "original_panjit_f8b0febc_ja_translated.xlsx"
+    
+    if not original_file.exists():
+        print(f"❌ 原始文件不存在: {original_file}")
+        return
+        
+    if not translated_file.exists():
+        print(f"❌ 翻譯文件不存在: {translated_file}")
+        return
+    
+    print(f"✅ 檔案確認:")
+    print(f"   原始文件: {original_file.name}")
+    print(f"   翻譯文件: {translated_file.name}")
+    
+    # 1. 檢查實際使用的ExcelParser行為
+    print(f"\n1. 檢查實際ExcelParser提取行為")
+    print("-" * 60)
+    
+    parser = ExcelParser(str(original_file))
+    segments = parser.extract_text_segments()
+    
+    print(f"實際提取到 {len(segments)} 個文字片段")
+    
+    # 檢查A1是否被提取
+    a1_content = "製程"
+    if a1_content in segments:
+        print(f"✅ A1內容 '{a1_content}' 已被提取（位置: {segments.index(a1_content)+1}）")
+    else:
+        print(f"❌ A1內容 '{a1_content}' 仍未被提取")
+        
+        # 顯示實際提取的前10個片段
+        print(f"   實際提取的前10個片段:")
+        for i, seg in enumerate(segments[:10]):
+            print(f"     {i+1:2d}. {repr(seg)}")
+    
+    # 2. 直接檢查A1儲存格的原始內容
+    print(f"\n2. 檢查A1儲存格原始內容")
+    print("-" * 60)
+    
+    wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
+    try:
+        wb_orig_vals = openpyxl.load_workbook(str(original_file), data_only=True)
+    except:
+        wb_orig_vals = None
+    
+    a1_raw = wb_orig.active['A1'].value
+    a1_display = wb_orig_vals.active['A1'].value if wb_orig_vals else None
+    
+    print(f"A1原始值: {repr(a1_raw)}")
+    if wb_orig_vals:
+        print(f"A1顯示值: {repr(a1_display)}")
+    
+    # 模擬get_display_text_for_translation邏輯
+    if isinstance(a1_raw, str) and a1_raw.startswith("="):
+        display_text = a1_display if isinstance(a1_display, str) and a1_display.strip() else None
+    elif isinstance(a1_raw, str) and a1_raw.strip():
+        display_text = a1_raw
+    else:
+        display_text = a1_display if wb_orig_vals and isinstance(a1_display, str) and a1_display.strip() else None
+    
+    print(f"用於翻譯的文字: {repr(display_text)}")
+    
+    if display_text:
+        should_translate = parser._should_translate(display_text, 'auto')
+        has_cjk = parser._has_cjk(display_text)
+        min_length = 2 if has_cjk else 3
+        
+        print(f"文字長度: {len(display_text)}")
+        print(f"包含CJK: {has_cjk}")
+        print(f"最小長度要求: {min_length}")
+        print(f"應該翻譯: {should_translate}")
+    
+    # 3. 檢查翻譯文件的A1
+    print(f"\n3. 檢查翻譯文件A1儲存格")
+    print("-" * 60)
+    
+    wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
+    a1_trans = wb_trans.active['A1'].value
+    
+    print(f"A1翻譯結果: {repr(a1_trans)}")
+    
+    if isinstance(a1_trans, str) and '\n' in a1_trans:
+        lines = a1_trans.split('\n')
+        print(f"✅ A1已翻譯！格式: 雙行")
+        for i, line in enumerate(lines):
+            print(f"   行{i+1}: {repr(line)}")
+    elif a1_raw == a1_trans:
+        print(f"❌ A1未翻譯 - 內容完全相同")
+    else:
+        print(f"⚠️ A1內容有變化但格式不明")
+    
+    # 4. 檢查翻譯快取狀況
+    print(f"\n4. 檢查翻譯快取")
+    print("-" * 60)
+    
+    from app import create_app
+    app = create_app()
+    
+    with app.app_context():
+        from sqlalchemy import text as sql_text
+        from app import db
+        
+        if display_text:
+            result = db.session.execute(sql_text("""
+                SELECT translated_text, created_at
+                FROM dt_translation_cache 
+                WHERE source_text = :text AND target_language = 'ja'
+                ORDER BY created_at DESC 
+                LIMIT 1
+            """), {'text': display_text})
+            
+            row = result.fetchone()
+            if row:
+                print(f"✅ 快取中有翻譯: '{display_text}' -> '{row[0]}'")
+                print(f"   創建時間: {row[1]}")
+            else:
+                print(f"❌ 快取中沒有翻譯: '{display_text}'")
+    
+    # 5. 系統性檢查前10個儲存格
+    print(f"\n5. 系統性檢查前10個儲存格")
+    print("-" * 60)
+    
+    important_cells = ['A1', 'B1', 'C1', 'D1', 'E1', 'A2', 'B2', 'C2', 'D2', 'E2']
+    
+    for cell_name in important_cells:
+        orig_val = wb_orig.active[cell_name].value
+        trans_val = wb_trans.active[cell_name].value
+        
+        if orig_val:  # 只檢查有內容的儲存格
+            print(f"\n{cell_name}:")
+            print(f"  原始: {repr(orig_val)}")
+            print(f"  翻譯: {repr(trans_val)}")
+            
+            if isinstance(trans_val, str) and '\n' in trans_val:
+                print(f"  狀態: ✅ 已翻譯")
+            elif orig_val == trans_val:
+                print(f"  狀態: ❌ 未翻譯")
+            else:
+                print(f"  狀態: ⚠️ 內容有變化")
+    
+    wb_orig.close()
+    wb_trans.close()
+    if wb_orig_vals:
+        wb_orig_vals.close()
+    
+    print(f"\n" + "=" * 80)
+    print("實際生產環境調試完成！")
+    print("=" * 80)
+
+if __name__ == "__main__":
+    debug_real_production_issue()