Document_Translator/debug_table_translation.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
檢查docx文件表格翻譯問題
特別分析"超温"文字的翻譯狀況
"""

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')

from pathlib import Path
import docx
from docx.table import Table
from app import create_app

def analyze_docx_table_translation():
    """分析docx表格翻譯問題"""

    print("=" * 80)
    print("檢查docx表格翻譯問題")
    print("任務ID: 17e05695-406f-47af-96eb-a0e23843770e")
    print("=" * 80)

    base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
    original_file = base_dir / "original_-OR026_17e05695.docx"
    translated_en = base_dir / "translated_original_-OR026_17e05695_en_translat.docx"
    translated_vi = base_dir / "translated_original_-OR026_17e05695_vi_translat.docx"

    if not original_file.exists():
        print(f"❌ 原始檔案不存在: {original_file}")
        return

    print(f"✅ 原始檔案: {original_file.name}")

    # 1. 檢查原始文件中的"超温"
    print(f"\n1. 分析原始文件表格內容")
    print("-" * 60)

    try:
        doc = docx.Document(str(original_file))
        tables_found = 0
        target_text_found = False

        for table_idx, table in enumerate(doc.tables):
            tables_found += 1
            print(f"表格 {table_idx + 1}:")

            for row_idx, row in enumerate(table.rows):
                for cell_idx, cell in enumerate(row.cells):
                    cell_text = cell.text.strip()
                    if cell_text:
                        print(f"  行{row_idx+1} 列{cell_idx+1}: {repr(cell_text)}")

                        if "超温" in cell_text:
                            print(f"    🎯 找到目標文字 '超温'")
                            target_text_found = True

                            # 檢查該儲存格的詳細結構
                            print(f"    儲存格段落數: {len(cell.paragraphs)}")
                            for p_idx, para in enumerate(cell.paragraphs):
                                print(f"      段落{p_idx+1}: {repr(para.text)}")

        print(f"\n總表格數: {tables_found}")
        print(f"是否找到'超温': {'✅' if target_text_found else '❌'}")

    except Exception as e:
        print(f"❌ 讀取原始文件失敗: {e}")
        return

    # 2. 檢查翻譯版本中的對應內容
    for lang, trans_file in [("英文", translated_en), ("越南文", translated_vi)]:
        if not trans_file.exists():
            print(f"\n❌ {lang}翻譯檔案不存在")
            continue

        print(f"\n2. 檢查{lang}翻譯結果")
        print("-" * 60)

        try:
            trans_doc = docx.Document(str(trans_file))
            translation_found = False

            for table_idx, table in enumerate(trans_doc.tables):
                print(f"{lang}表格 {table_idx + 1}:")

                for row_idx, row in enumerate(table.rows):
                    for cell_idx, cell in enumerate(row.cells):
                        cell_text = cell.text.strip()
                        if cell_text:
                            # 檢查是否包含原文"超温"
                            if "超温" in cell_text:
                                print(f"  行{row_idx+1} 列{cell_idx+1}: {repr(cell_text)}")
                                print(f"    ⚠️  仍包含原文'超温'，可能未翻譯")

                                # 詳細分析該儲存格
                                print(f"    儲存格段落數: {len(cell.paragraphs)}")
                                for p_idx, para in enumerate(cell.paragraphs):
                                    p_text = para.text.strip()
                                    print(f"      段落{p_idx+1}: {repr(p_text)}")

                                    # 檢查是否有翻譯標記
                                    if "【翻譯失敗」" in p_text or "translation:" in p_text.lower():
                                        print(f"        🔍 發現翻譯標記")
                                    elif "\u200b" in p_text:  # 零寬空格標記
                                        print(f"        🔍 發現翻譯插入標記")

                            # 檢查可能的翻譯結果
                            elif any(keyword in cell_text.lower() for keyword in ['overheating', 'over-heating', 'quá nóng']):
                                print(f"  行{row_idx+1} 列{cell_idx+1}: {repr(cell_text)}")
                                print(f"    ✅ 可能的翻譯結果")
                                translation_found = True

            print(f"{lang}翻譯狀態: {'✅ 找到翻譯' if translation_found else '❌ 未找到翻譯'}")

        except Exception as e:
            print(f"❌ 讀取{lang}翻譯檔案失敗: {e}")

def check_translation_cache():
    """檢查翻譯快取中是否有"超温"的記錄"""

    print(f"\n" + "=" * 80)
    print("檢查翻譯快取")
    print("=" * 80)

    app = create_app()

    with app.app_context():
        from sqlalchemy import text as sql_text
        from app import db

        print(f"\n1. 搜尋'超温'相關的快取記錄")
        print("-" * 60)

        # 搜尋包含"超温"的快取記錄
        cache_results = db.session.execute(sql_text("""
            SELECT id, source_text, target_language, translated_text, created_at
            FROM dt_translation_cache
            WHERE source_text LIKE '%超温%'
            ORDER BY created_at DESC
            LIMIT 10
        """)).fetchall()

        if cache_results:
            print(f"找到 {len(cache_results)} 條相關記錄:")
            for row in cache_results:
                print(f"ROW {row[0]}: {row[3]} -> {row[1]}")
                print(f"   目標語言: {row[1]}")
                print(f"   翻譯結果: {repr(row[2][:50])}...")
                print(f"   時間: {row[4]}")
                print()
        else:
            print("❌ 未找到包含'超温'的快取記錄")

        # 搜尋近期的翻譯記錄
        print(f"\n2. 檢查近期的翻譯記錄")
        print("-" * 60)

        recent_results = db.session.execute(sql_text("""
            SELECT id, source_text, target_language, translated_text, created_at
            FROM dt_translation_cache
            WHERE created_at >= DATE_SUB(NOW(), INTERVAL 1 DAY)
            AND (target_language = 'en' OR target_language = 'vi')
            ORDER BY created_at DESC
            LIMIT 20
        """)).fetchall()

        print(f"近24小時內的英文/越南文翻譯記錄 (共{len(recent_results)}條):")
        for row in recent_results:
            print(f"ROW {row[0]}: {repr(row[1][:20])}... -> {row[2]} -> {repr(row[3][:30])}...")

def main():
    """主檢查函數"""

    print("🔍 診斷docx表格翻譯問題")
    print("重點檢查: '超温' 文字翻譯狀況")

    try:
        # 分析文件表格
        analyze_docx_table_translation()

        # 檢查翻譯快取
        check_translation_cache()

        print(f"\n" + "=" * 80)
        print("診斷總結")
        print("=" * 80)
        print("請根據以上結果判斷問題類型:")
        print("1. 解析問題: 原始文件中找不到'超温'")
        print("2. 翻譯問題: 快取中沒有'超温'的翻譯記錄")
        print("3. 插入問題: 有翻譯記錄但未插入到文件中")
        print("4. 版面問題: 翻譯已插入但格式或位置導致看不到")
        print("=" * 80)

    except Exception as e:
        print(f"❌ 診斷過程發生錯誤: {e}")
        import traceback
        print(f"錯誤詳情: {traceback.format_exc()}")

if __name__ == "__main__":
    main()