Files
Document_Translator/debug_writeback_issue.py
2025-09-03 15:07:34 +08:00

220 lines
8.4 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
調試回寫問題 - 為什麼D2-D8有快取但沒有回寫到Excel
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
import openpyxl
from app.services.translation_service import ExcelParser
def debug_writeback_issue():
"""調試回寫問題的詳細分析"""
print("=" * 80)
print("調試回寫問題 - D2-D8有快取但沒有回寫")
print("使用上傳UUID: f8b0febc-c0df-4902-8dc3-c90f5634f3b3 (有日文翻譯)")
print("=" * 80)
# 使用有日文翻譯的檔案路徑
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
original_file = prod_dir / "original_panjit_f8b0febc.xlsx"
translated_file = prod_dir / "original_panjit_f8b0febc_ja_translated.xlsx"
if not original_file.exists():
print(f"❌ 原始文件不存在: {original_file}")
return
if not translated_file.exists():
print(f"❌ 翻譯文件不存在: {translated_file}")
return
print(f"✅ 檔案確認:")
print(f" 原始: {original_file.name}")
print(f" 翻譯: {translated_file.name}")
# 1. 檢查問題儲存格的具體內容
print(f"\n1. 檢查問題儲存格內容")
print("-" * 60)
problem_cells = ['D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'F2', 'F3', 'F4', 'F5', 'F6']
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
try:
wb_orig_vals = openpyxl.load_workbook(str(original_file), data_only=True)
except:
wb_orig_vals = None
wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
cell_contents = {}
for cell_name in problem_cells:
orig_val = wb_orig.active[cell_name].value
orig_display = wb_orig_vals.active[cell_name].value if wb_orig_vals else None
trans_val = wb_trans.active[cell_name].value
if orig_val: # 只檢查有內容的儲存格
print(f"\n{cell_name}:")
print(f" 原始值: {repr(orig_val)}")
if wb_orig_vals and orig_display != orig_val:
print(f" 顯示值: {repr(orig_display)}")
print(f" 翻譯值: {repr(trans_val)}")
# 決定用於翻譯的文字
parser = ExcelParser(str(original_file))
if isinstance(orig_val, str) and orig_val.startswith("="):
display_text = orig_display if isinstance(orig_display, str) and orig_display.strip() else None
elif isinstance(orig_val, str) and orig_val.strip():
display_text = orig_val
else:
display_text = orig_display if wb_orig_vals and isinstance(orig_display, str) and orig_display.strip() else None
print(f" 用於翻譯: {repr(display_text)}")
if display_text:
should_translate = parser._should_translate(display_text, 'auto')
print(f" 應該翻譯: {should_translate}")
cell_contents[cell_name] = display_text
else:
print(f" ❌ 沒有可翻譯文字")
# 2. 檢查這些文字是否在提取列表中
print(f"\n2. 檢查文字提取狀況")
print("-" * 60)
segments = parser.extract_text_segments()
print(f"總共提取 {len(segments)} 個片段")
for cell_name, text in cell_contents.items():
if text in segments:
print(f"{cell_name}='{text}' 已被提取 (位置: {segments.index(text)+1})")
else:
print(f"{cell_name}='{text}' 未被提取")
# 3. 檢查MySQL快取中的翻譯
print(f"\n3. 檢查MySQL快取中的翻譯")
print("-" * 60)
from app import create_app
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
translation_map = {}
for cell_name, text in cell_contents.items():
result = db.session.execute(sql_text("""
SELECT id, translated_text, created_at
FROM dt_translation_cache
WHERE source_text = :text AND target_language = 'ja'
ORDER BY created_at DESC
LIMIT 1
"""), {'text': text})
row = result.fetchone()
if row:
translation_map[text] = row[1]
print(f"{cell_name}='{text}' -> '{row[1]}' (ID:{row[0]}, 時間:{row[2]})")
else:
print(f"{cell_name}='{text}' -> 快取中無翻譯")
print(f"\n快取命中率: {len(translation_map)}/{len(cell_contents)} = {len(translation_map)/len(cell_contents)*100:.1f}%")
# 4. 模擬generate_translated_document的映射邏輯
print(f"\n4. 模擬翻譯映射建立過程")
print("-" * 60)
# 建立翻譯映射 (模擬實際邏輯)
mapping_result = {}
for original_text in segments:
cache_result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = 'ja'
ORDER BY created_at DESC
LIMIT 1
"""), {'text': original_text, 'lang': 'ja'})
cache_row = cache_result.fetchone()
if cache_row and cache_row[0]:
mapping_result[original_text] = cache_row[0]
print(f"映射建立完成: {len(mapping_result)}/{len(segments)} = {len(mapping_result)/len(segments)*100:.1f}%")
# 檢查問題儲存格的映射狀況
print(f"\n映射檢查:")
for cell_name, text in cell_contents.items():
if text in mapping_result:
print(f"{cell_name}='{text}' 在映射中: '{mapping_result[text]}'")
else:
print(f"{cell_name}='{text}' 不在映射中")
# 5. 模擬實際的儲存格翻譯寫入邏輯
print(f"\n5. 模擬儲存格翻譯寫入邏輯")
print("-" * 60)
# 重新載入工作簿進行模擬
wb_test = openpyxl.load_workbook(str(original_file), data_only=False)
try:
wb_test_vals = openpyxl.load_workbook(str(original_file), data_only=True)
except:
wb_test_vals = None
ws = wb_test.active
ws_vals = wb_test_vals.active if wb_test_vals else None
for cell_name in problem_cells:
if cell_name in cell_contents:
text = cell_contents[cell_name]
# 模擬_get_display_text_for_translation邏輯
cell = ws[cell_name]
r, c = cell.row, cell.column
src_text = parser._get_display_text_for_translation(ws, ws_vals, r, c)
print(f"\n{cell_name} 寫入模擬:")
print(f" 提取文字: {repr(src_text)}")
print(f" 預期文字: {repr(text)}")
print(f" 文字一致: {src_text == text}")
if src_text and parser._should_translate(src_text, 'auto'):
if src_text in mapping_result:
translated = mapping_result[src_text]
new_value = f"{src_text}\n{translated}"
print(f" ✅ 應該寫入: {repr(new_value)}")
else:
print(f" ❌ 映射中找不到: '{src_text}'")
# 檢查映射鍵中是否有相似的
similar_keys = [key for key in mapping_result.keys() if key.strip() == src_text.strip()]
if similar_keys:
print(f" 相似鍵: {similar_keys}")
else:
print(f" ❌ 不應翻譯或無文字")
wb_test.close()
if wb_test_vals:
wb_test_vals.close()
wb_orig.close()
wb_trans.close()
if wb_orig_vals:
wb_orig_vals.close()
print(f"\n" + "=" * 80)
print("回寫問題調試完成!")
print("請檢查上述輸出找出問題原因。")
print("=" * 80)
if __name__ == "__main__":
debug_writeback_issue()