Document_Translator/debug_real_production_issue.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
調試實際生產環境中的翻譯問題
"""

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')

from pathlib import Path
import openpyxl
from app.services.translation_service import ExcelParser

def debug_real_production_issue():
    """調試實際生產環境的翻譯問題"""

    print("=" * 80)
    print("調試實際生產環境翻譯問題")
    print("新上傳UUID: f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
    print("=" * 80)

    # 實際生產檔案路徑
    prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
    original_file = prod_dir / "original_panjit_f8b0febc.xlsx"
    translated_file = prod_dir / "original_panjit_f8b0febc_ja_translated.xlsx"

    if not original_file.exists():
        print(f"❌ 原始文件不存在: {original_file}")
        return

    if not translated_file.exists():
        print(f"❌ 翻譯文件不存在: {translated_file}")
        return

    print(f"✅ 檔案確認:")
    print(f"   原始文件: {original_file.name}")
    print(f"   翻譯文件: {translated_file.name}")

    # 1. 檢查實際使用的ExcelParser行為
    print(f"\n1. 檢查實際ExcelParser提取行為")
    print("-" * 60)

    parser = ExcelParser(str(original_file))
    segments = parser.extract_text_segments()

    print(f"實際提取到 {len(segments)} 個文字片段")

    # 檢查A1是否被提取
    a1_content = "製程"
    if a1_content in segments:
        print(f"✅ A1內容 '{a1_content}' 已被提取（位置: {segments.index(a1_content)+1}）")
    else:
        print(f"❌ A1內容 '{a1_content}' 仍未被提取")

        # 顯示實際提取的前10個片段
        print(f"   實際提取的前10個片段:")
        for i, seg in enumerate(segments[:10]):
            print(f"     {i+1:2d}. {repr(seg)}")

    # 2. 直接檢查A1儲存格的原始內容
    print(f"\n2. 檢查A1儲存格原始內容")
    print("-" * 60)

    wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
    try:
        wb_orig_vals = openpyxl.load_workbook(str(original_file), data_only=True)
    except:
        wb_orig_vals = None

    a1_raw = wb_orig.active['A1'].value
    a1_display = wb_orig_vals.active['A1'].value if wb_orig_vals else None

    print(f"A1原始值: {repr(a1_raw)}")
    if wb_orig_vals:
        print(f"A1顯示值: {repr(a1_display)}")

    # 模擬get_display_text_for_translation邏輯
    if isinstance(a1_raw, str) and a1_raw.startswith("="):
        display_text = a1_display if isinstance(a1_display, str) and a1_display.strip() else None
    elif isinstance(a1_raw, str) and a1_raw.strip():
        display_text = a1_raw
    else:
        display_text = a1_display if wb_orig_vals and isinstance(a1_display, str) and a1_display.strip() else None

    print(f"用於翻譯的文字: {repr(display_text)}")

    if display_text:
        should_translate = parser._should_translate(display_text, 'auto')
        has_cjk = parser._has_cjk(display_text)
        min_length = 2 if has_cjk else 3

        print(f"文字長度: {len(display_text)}")
        print(f"包含CJK: {has_cjk}")
        print(f"最小長度要求: {min_length}")
        print(f"應該翻譯: {should_translate}")

    # 3. 檢查翻譯文件的A1
    print(f"\n3. 檢查翻譯文件A1儲存格")
    print("-" * 60)

    wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
    a1_trans = wb_trans.active['A1'].value

    print(f"A1翻譯結果: {repr(a1_trans)}")

    if isinstance(a1_trans, str) and '\n' in a1_trans:
        lines = a1_trans.split('\n')
        print(f"✅ A1已翻譯！格式: 雙行")
        for i, line in enumerate(lines):
            print(f"   行{i+1}: {repr(line)}")
    elif a1_raw == a1_trans:
        print(f"❌ A1未翻譯 - 內容完全相同")
    else:
        print(f"⚠️ A1內容有變化但格式不明")

    # 4. 檢查翻譯快取狀況
    print(f"\n4. 檢查翻譯快取")
    print("-" * 60)

    from app import create_app
    app = create_app()

    with app.app_context():
        from sqlalchemy import text as sql_text
        from app import db

        if display_text:
            result = db.session.execute(sql_text("""
                SELECT translated_text, created_at
                FROM dt_translation_cache
                WHERE source_text = :text AND target_language = 'ja'
                ORDER BY created_at DESC
                LIMIT 1
            """), {'text': display_text})

            row = result.fetchone()
            if row:
                print(f"✅ 快取中有翻譯: '{display_text}' -> '{row[0]}'")
                print(f"   創建時間: {row[1]}")
            else:
                print(f"❌ 快取中沒有翻譯: '{display_text}'")

    # 5. 系統性檢查前10個儲存格
    print(f"\n5. 系統性檢查前10個儲存格")
    print("-" * 60)

    important_cells = ['A1', 'B1', 'C1', 'D1', 'E1', 'A2', 'B2', 'C2', 'D2', 'E2']

    for cell_name in important_cells:
        orig_val = wb_orig.active[cell_name].value
        trans_val = wb_trans.active[cell_name].value

        if orig_val:  # 只檢查有內容的儲存格
            print(f"\n{cell_name}:")
            print(f"  原始: {repr(orig_val)}")
            print(f"  翻譯: {repr(trans_val)}")

            if isinstance(trans_val, str) and '\n' in trans_val:
                print(f"  狀態: ✅ 已翻譯")
            elif orig_val == trans_val:
                print(f"  狀態: ❌ 未翻譯")
            else:
                print(f"  狀態: ⚠️ 內容有變化")

    wb_orig.close()
    wb_trans.close()
    if wb_orig_vals:
        wb_orig_vals.close()

    print(f"\n" + "=" * 80)
    print("實際生產環境調試完成！")
    print("=" * 80)

if __name__ == "__main__":
    debug_real_production_issue()