Document_Translator/debug_chaoweng_issue.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
深度診斷"超温"翻譯問題
檢查從提取到插入的完整流程
"""

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')

from pathlib import Path
import docx
from docx.table import Table
from app import create_app
from app.services.document_processor import should_translate
from app.services.translation_service import TranslationService

def debug_chaoweng_extraction():
    """檢查"超温"在文件提取階段是否被正確識別"""

    print("=" * 80)
    print("診斷步驟1: 檢查文件提取階段")
    print("=" * 80)

    app = create_app()

    with app.app_context():
        from app.services.document_processor import DocumentProcessor

        # 檔案路徑
        base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
        original_file = base_dir / "original_-OR026_17e05695.docx"

        if not original_file.exists():
            print(f"❌ 原始檔案不存在")
            return

        processor = DocumentProcessor()

        # 提取所有segment
        segments = processor.extract_docx_segments(str(original_file))

        print(f"提取到 {len(segments)} 個segments")

        # 找包含"超温"的segments
        chaoweng_segments = []
        for i, seg in enumerate(segments):
            if "超温" in seg.text:
                chaoweng_segments.append((i, seg))
                print(f"\nSegment {i}:")
                print(f"  種類: {seg.kind}")
                print(f"  上下文: {seg.ctx}")
                print(f"  內容: {repr(seg.text)}")
                print(f"  長度: {len(seg.text.strip())}")

                # 檢查是否應該翻譯
                should_trans = should_translate(seg.text, 'zh')
                print(f"  should_translate: {should_trans}")

                if seg.kind == "table_cell":
                    print(f"  🎯 這是表格儲存格segment")
                else:
                    print(f"  ⚠️  不是表格儲存格類型")

        if not chaoweng_segments:
            print("❌ 沒有找到包含'超温'的segments")
        else:
            print(f"✅ 找到 {len(chaoweng_segments)} 個包含'超温'的segments")

        return chaoweng_segments

def debug_chaoweng_translation(chaoweng_segments):
    """檢查"超温"在翻譯階段是否被正確處理"""

    print(f"\n" + "=" * 80)
    print("診斷步驟2: 檢查翻譯階段")
    print("=" * 80)

    if not chaoweng_segments:
        print("❌ 沒有segments可以測試翻譯")
        return

    app = create_app()

    with app.app_context():
        service = TranslationService()

        for seg_idx, seg in chaoweng_segments:
            print(f"\n測試 Segment {seg_idx} 的翻譯:")
            print(f"原文: {repr(seg.text)}")

            try:
                if seg.kind == "table_cell":
                    print("使用 translate_word_table_cell() 方法")
                    translated = service.translate_word_table_cell(
                        text=seg.text,
                        source_language="zh",
                        target_language="en",
                        user_id=None
                    )
                else:
                    print("使用 translate_segment_with_sentences() 方法")
                    translated = service.translate_segment_with_sentences(
                        text=seg.text,
                        source_language="zh",
                        target_language="en",
                        user_id=None
                    )

                print(f"翻譯結果: {repr(translated[:100])}...")

                # 檢查翻譯是否成功
                if "【翻譯失敗" in translated:
                    print("❌ 翻譯失敗")
                elif translated == seg.text:
                    print("❌ 翻譯結果與原文相同，可能未翻譯")
                else:
                    print("✅ 翻譯成功")

            except Exception as e:
                print(f"❌ 翻譯過程發生錯誤: {e}")

def debug_chaoweng_cache():
    """檢查"超温"的翻譯快取狀況"""

    print(f"\n" + "=" * 80)
    print("診斷步驟3: 檢查翻譯快取")
    print("=" * 80)

    app = create_app()

    with app.app_context():
        from sqlalchemy import text as sql_text
        from app import db

        # 1. 搜尋精確匹配"超温"
        print("1. 搜尋精確的'超温'記錄:")
        exact_results = db.session.execute(sql_text("""
            SELECT id, source_text, target_language, translated_text, created_at
            FROM dt_translation_cache
            WHERE source_text = '超温'
            ORDER BY created_at DESC
        """)).fetchall()

        if exact_results:
            for row in exact_results:
                print(f"  ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3]}'")
        else:
            print("  ❌ 沒有找到精確的'超温'記錄")

        # 2. 搜尋包含"超温"但可能有額外字符的記錄
        print(f"\n2. 搜尋包含'超温'的記錄:")
        like_results = db.session.execute(sql_text("""
            SELECT id, source_text, target_language, translated_text, created_at
            FROM dt_translation_cache
            WHERE source_text LIKE '%超温%'
            AND CHAR_LENGTH(source_text) <= 10
            ORDER BY created_at DESC
            LIMIT 10
        """)).fetchall()

        if like_results:
            for row in like_results:
                print(f"  ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3][:30]}...'")
        else:
            print("  ❌ 沒有找到包含'超温'的短記錄")

def debug_chaoweng_insertion():
    """檢查"超温"的翻譯插入狀況"""

    print(f"\n" + "=" * 80)
    print("診斷步驟4: 檢查已翻譯文件的插入狀況")
    print("=" * 80)

    # 檢查翻譯後的文件
    base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
    translated_files = [
        ("英文", base_dir / "translated_original_-OR026_17e05695_en_translat.docx"),
        ("越南文", base_dir / "translated_original_-OR026_17e05695_vi_translat.docx")
    ]

    for lang, file_path in translated_files:
        if not file_path.exists():
            print(f"❌ {lang}翻譯檔案不存在")
            continue

        print(f"\n檢查{lang}翻譯檔案:")
        try:
            doc = docx.Document(str(file_path))

            found_chaoweng = False
            found_translation = False

            for table_idx, table in enumerate(doc.tables):
                for row_idx, row in enumerate(table.rows):
                    for cell_idx, cell in enumerate(row.cells):
                        cell_text = cell.text.strip()

                        if "超温" in cell_text:
                            found_chaoweng = True
                            print(f"  🔍 表格{table_idx+1} 行{row_idx+1} 列{cell_idx+1}:")
                            print(f"    內容: {repr(cell_text[:100])}")

                            # 檢查該儲存格的段落結構
                            print(f"    段落數: {len(cell.paragraphs)}")
                            for p_idx, para in enumerate(cell.paragraphs):
                                p_text = para.text.strip()
                                if p_text:
                                    print(f"      段落{p_idx+1}: {repr(p_text)}")

                                    # 檢查是否有英文翻譯跡象
                                    if lang == "英文" and any(word in p_text.lower() for word in ['over', 'heat', 'temp', 'hot']):
                                        found_translation = True
                                        print(f"        🎯 可能的英文翻譯")
                                    elif lang == "越南文" and any(word in p_text.lower() for word in ['quá', 'nóng', 'nhiệt']):
                                        found_translation = True
                                        print(f"        🎯 可能的越南文翻譯")

            print(f"  原文'超温': {'✅ 找到' if found_chaoweng else '❌ 未找到'}")
            print(f"  {lang}翻譯: {'✅ 找到' if found_translation else '❌ 未找到'}")

        except Exception as e:
            print(f"❌ 讀取{lang}翻譯檔案失敗: {e}")

def main():
    """主診斷函數"""

    print("🔍 深度診斷'超温'翻譯問題")
    print("檢查完整的提取->翻譯->插入流程")

    try:
        # 步驟1: 檢查文件提取
        chaoweng_segments = debug_chaoweng_extraction()

        # 步驟2: 檢查翻譯邏輯
        debug_chaoweng_translation(chaoweng_segments)

        # 步驟3: 檢查翻譯快取
        debug_chaoweng_cache()

        # 步驟4: 檢查插入結果
        debug_chaoweng_insertion()

        print(f"\n" + "=" * 80)
        print("診斷完成！")
        print("可能的問題:")
        print("1. 提取階段: segments沒有正確提取'超温'")
        print("2. 翻譯階段: 翻譯邏輯沒有處理該segment")
        print("3. 快取階段: 翻譯沒有正確存儲")
        print("4. 插入階段: 翻譯沒有正確插入到文件")
        print("=" * 80)

    except Exception as e:
        print(f"❌ 診斷過程發生錯誤: {e}")
        import traceback
        print(f"錯誤詳情: {traceback.format_exc()}")

if __name__ == "__main__":
    main()