179 lines
6.0 KiB
Python
179 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
調試實際生產環境中的翻譯問題
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# 設定編碼
|
||
sys.stdout.reconfigure(encoding='utf-8')
|
||
|
||
from pathlib import Path
|
||
import openpyxl
|
||
from app.services.translation_service import ExcelParser
|
||
|
||
def debug_real_production_issue():
|
||
"""調試實際生產環境的翻譯問題"""
|
||
|
||
print("=" * 80)
|
||
print("調試實際生產環境翻譯問題")
|
||
print("新上傳UUID: f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
|
||
print("=" * 80)
|
||
|
||
# 實際生產檔案路徑
|
||
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
|
||
original_file = prod_dir / "original_panjit_f8b0febc.xlsx"
|
||
translated_file = prod_dir / "original_panjit_f8b0febc_ja_translated.xlsx"
|
||
|
||
if not original_file.exists():
|
||
print(f"❌ 原始文件不存在: {original_file}")
|
||
return
|
||
|
||
if not translated_file.exists():
|
||
print(f"❌ 翻譯文件不存在: {translated_file}")
|
||
return
|
||
|
||
print(f"✅ 檔案確認:")
|
||
print(f" 原始文件: {original_file.name}")
|
||
print(f" 翻譯文件: {translated_file.name}")
|
||
|
||
# 1. 檢查實際使用的ExcelParser行為
|
||
print(f"\n1. 檢查實際ExcelParser提取行為")
|
||
print("-" * 60)
|
||
|
||
parser = ExcelParser(str(original_file))
|
||
segments = parser.extract_text_segments()
|
||
|
||
print(f"實際提取到 {len(segments)} 個文字片段")
|
||
|
||
# 檢查A1是否被提取
|
||
a1_content = "製程"
|
||
if a1_content in segments:
|
||
print(f"✅ A1內容 '{a1_content}' 已被提取(位置: {segments.index(a1_content)+1})")
|
||
else:
|
||
print(f"❌ A1內容 '{a1_content}' 仍未被提取")
|
||
|
||
# 顯示實際提取的前10個片段
|
||
print(f" 實際提取的前10個片段:")
|
||
for i, seg in enumerate(segments[:10]):
|
||
print(f" {i+1:2d}. {repr(seg)}")
|
||
|
||
# 2. 直接檢查A1儲存格的原始內容
|
||
print(f"\n2. 檢查A1儲存格原始內容")
|
||
print("-" * 60)
|
||
|
||
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
|
||
try:
|
||
wb_orig_vals = openpyxl.load_workbook(str(original_file), data_only=True)
|
||
except:
|
||
wb_orig_vals = None
|
||
|
||
a1_raw = wb_orig.active['A1'].value
|
||
a1_display = wb_orig_vals.active['A1'].value if wb_orig_vals else None
|
||
|
||
print(f"A1原始值: {repr(a1_raw)}")
|
||
if wb_orig_vals:
|
||
print(f"A1顯示值: {repr(a1_display)}")
|
||
|
||
# 模擬get_display_text_for_translation邏輯
|
||
if isinstance(a1_raw, str) and a1_raw.startswith("="):
|
||
display_text = a1_display if isinstance(a1_display, str) and a1_display.strip() else None
|
||
elif isinstance(a1_raw, str) and a1_raw.strip():
|
||
display_text = a1_raw
|
||
else:
|
||
display_text = a1_display if wb_orig_vals and isinstance(a1_display, str) and a1_display.strip() else None
|
||
|
||
print(f"用於翻譯的文字: {repr(display_text)}")
|
||
|
||
if display_text:
|
||
should_translate = parser._should_translate(display_text, 'auto')
|
||
has_cjk = parser._has_cjk(display_text)
|
||
min_length = 2 if has_cjk else 3
|
||
|
||
print(f"文字長度: {len(display_text)}")
|
||
print(f"包含CJK: {has_cjk}")
|
||
print(f"最小長度要求: {min_length}")
|
||
print(f"應該翻譯: {should_translate}")
|
||
|
||
# 3. 檢查翻譯文件的A1
|
||
print(f"\n3. 檢查翻譯文件A1儲存格")
|
||
print("-" * 60)
|
||
|
||
wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
|
||
a1_trans = wb_trans.active['A1'].value
|
||
|
||
print(f"A1翻譯結果: {repr(a1_trans)}")
|
||
|
||
if isinstance(a1_trans, str) and '\n' in a1_trans:
|
||
lines = a1_trans.split('\n')
|
||
print(f"✅ A1已翻譯!格式: 雙行")
|
||
for i, line in enumerate(lines):
|
||
print(f" 行{i+1}: {repr(line)}")
|
||
elif a1_raw == a1_trans:
|
||
print(f"❌ A1未翻譯 - 內容完全相同")
|
||
else:
|
||
print(f"⚠️ A1內容有變化但格式不明")
|
||
|
||
# 4. 檢查翻譯快取狀況
|
||
print(f"\n4. 檢查翻譯快取")
|
||
print("-" * 60)
|
||
|
||
from app import create_app
|
||
app = create_app()
|
||
|
||
with app.app_context():
|
||
from sqlalchemy import text as sql_text
|
||
from app import db
|
||
|
||
if display_text:
|
||
result = db.session.execute(sql_text("""
|
||
SELECT translated_text, created_at
|
||
FROM dt_translation_cache
|
||
WHERE source_text = :text AND target_language = 'ja'
|
||
ORDER BY created_at DESC
|
||
LIMIT 1
|
||
"""), {'text': display_text})
|
||
|
||
row = result.fetchone()
|
||
if row:
|
||
print(f"✅ 快取中有翻譯: '{display_text}' -> '{row[0]}'")
|
||
print(f" 創建時間: {row[1]}")
|
||
else:
|
||
print(f"❌ 快取中沒有翻譯: '{display_text}'")
|
||
|
||
# 5. 系統性檢查前10個儲存格
|
||
print(f"\n5. 系統性檢查前10個儲存格")
|
||
print("-" * 60)
|
||
|
||
important_cells = ['A1', 'B1', 'C1', 'D1', 'E1', 'A2', 'B2', 'C2', 'D2', 'E2']
|
||
|
||
for cell_name in important_cells:
|
||
orig_val = wb_orig.active[cell_name].value
|
||
trans_val = wb_trans.active[cell_name].value
|
||
|
||
if orig_val: # 只檢查有內容的儲存格
|
||
print(f"\n{cell_name}:")
|
||
print(f" 原始: {repr(orig_val)}")
|
||
print(f" 翻譯: {repr(trans_val)}")
|
||
|
||
if isinstance(trans_val, str) and '\n' in trans_val:
|
||
print(f" 狀態: ✅ 已翻譯")
|
||
elif orig_val == trans_val:
|
||
print(f" 狀態: ❌ 未翻譯")
|
||
else:
|
||
print(f" 狀態: ⚠️ 內容有變化")
|
||
|
||
wb_orig.close()
|
||
wb_trans.close()
|
||
if wb_orig_vals:
|
||
wb_orig_vals.close()
|
||
|
||
print(f"\n" + "=" * 80)
|
||
print("實際生產環境調試完成!")
|
||
print("=" * 80)
|
||
|
||
if __name__ == "__main__":
|
||
debug_real_production_issue() |