Files
Document_Translator/debug_real_production_issue.py
2025-09-03 15:07:34 +08:00

179 lines
6.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
調試實際生產環境中的翻譯問題
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
import openpyxl
from app.services.translation_service import ExcelParser
def debug_real_production_issue():
"""調試實際生產環境的翻譯問題"""
print("=" * 80)
print("調試實際生產環境翻譯問題")
print("新上傳UUID: f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
print("=" * 80)
# 實際生產檔案路徑
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
original_file = prod_dir / "original_panjit_f8b0febc.xlsx"
translated_file = prod_dir / "original_panjit_f8b0febc_ja_translated.xlsx"
if not original_file.exists():
print(f"❌ 原始文件不存在: {original_file}")
return
if not translated_file.exists():
print(f"❌ 翻譯文件不存在: {translated_file}")
return
print(f"✅ 檔案確認:")
print(f" 原始文件: {original_file.name}")
print(f" 翻譯文件: {translated_file.name}")
# 1. 檢查實際使用的ExcelParser行為
print(f"\n1. 檢查實際ExcelParser提取行為")
print("-" * 60)
parser = ExcelParser(str(original_file))
segments = parser.extract_text_segments()
print(f"實際提取到 {len(segments)} 個文字片段")
# 檢查A1是否被提取
a1_content = "製程"
if a1_content in segments:
print(f"✅ A1內容 '{a1_content}' 已被提取(位置: {segments.index(a1_content)+1}")
else:
print(f"❌ A1內容 '{a1_content}' 仍未被提取")
# 顯示實際提取的前10個片段
print(f" 實際提取的前10個片段:")
for i, seg in enumerate(segments[:10]):
print(f" {i+1:2d}. {repr(seg)}")
# 2. 直接檢查A1儲存格的原始內容
print(f"\n2. 檢查A1儲存格原始內容")
print("-" * 60)
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
try:
wb_orig_vals = openpyxl.load_workbook(str(original_file), data_only=True)
except:
wb_orig_vals = None
a1_raw = wb_orig.active['A1'].value
a1_display = wb_orig_vals.active['A1'].value if wb_orig_vals else None
print(f"A1原始值: {repr(a1_raw)}")
if wb_orig_vals:
print(f"A1顯示值: {repr(a1_display)}")
# 模擬get_display_text_for_translation邏輯
if isinstance(a1_raw, str) and a1_raw.startswith("="):
display_text = a1_display if isinstance(a1_display, str) and a1_display.strip() else None
elif isinstance(a1_raw, str) and a1_raw.strip():
display_text = a1_raw
else:
display_text = a1_display if wb_orig_vals and isinstance(a1_display, str) and a1_display.strip() else None
print(f"用於翻譯的文字: {repr(display_text)}")
if display_text:
should_translate = parser._should_translate(display_text, 'auto')
has_cjk = parser._has_cjk(display_text)
min_length = 2 if has_cjk else 3
print(f"文字長度: {len(display_text)}")
print(f"包含CJK: {has_cjk}")
print(f"最小長度要求: {min_length}")
print(f"應該翻譯: {should_translate}")
# 3. 檢查翻譯文件的A1
print(f"\n3. 檢查翻譯文件A1儲存格")
print("-" * 60)
wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
a1_trans = wb_trans.active['A1'].value
print(f"A1翻譯結果: {repr(a1_trans)}")
if isinstance(a1_trans, str) and '\n' in a1_trans:
lines = a1_trans.split('\n')
print(f"✅ A1已翻譯格式: 雙行")
for i, line in enumerate(lines):
print(f"{i+1}: {repr(line)}")
elif a1_raw == a1_trans:
print(f"❌ A1未翻譯 - 內容完全相同")
else:
print(f"⚠️ A1內容有變化但格式不明")
# 4. 檢查翻譯快取狀況
print(f"\n4. 檢查翻譯快取")
print("-" * 60)
from app import create_app
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
if display_text:
result = db.session.execute(sql_text("""
SELECT translated_text, created_at
FROM dt_translation_cache
WHERE source_text = :text AND target_language = 'ja'
ORDER BY created_at DESC
LIMIT 1
"""), {'text': display_text})
row = result.fetchone()
if row:
print(f"✅ 快取中有翻譯: '{display_text}' -> '{row[0]}'")
print(f" 創建時間: {row[1]}")
else:
print(f"❌ 快取中沒有翻譯: '{display_text}'")
# 5. 系統性檢查前10個儲存格
print(f"\n5. 系統性檢查前10個儲存格")
print("-" * 60)
important_cells = ['A1', 'B1', 'C1', 'D1', 'E1', 'A2', 'B2', 'C2', 'D2', 'E2']
for cell_name in important_cells:
orig_val = wb_orig.active[cell_name].value
trans_val = wb_trans.active[cell_name].value
if orig_val: # 只檢查有內容的儲存格
print(f"\n{cell_name}:")
print(f" 原始: {repr(orig_val)}")
print(f" 翻譯: {repr(trans_val)}")
if isinstance(trans_val, str) and '\n' in trans_val:
print(f" 狀態: ✅ 已翻譯")
elif orig_val == trans_val:
print(f" 狀態: ❌ 未翻譯")
else:
print(f" 狀態: ⚠️ 內容有變化")
wb_orig.close()
wb_trans.close()
if wb_orig_vals:
wb_orig_vals.close()
print(f"\n" + "=" * 80)
print("實際生產環境調試完成!")
print("=" * 80)
if __name__ == "__main__":
debug_real_production_issue()