5th_fix excel problem
This commit is contained in:
146
debug_translation_mapping.py
Normal file
146
debug_translation_mapping.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
調試翻譯映射過程 - 為什麼A1沒有被翻譯
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 設定編碼
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
from pathlib import Path
|
||||
from app.services.translation_service import ExcelParser
|
||||
|
||||
def debug_translation_mapping():
|
||||
"""調試翻譯映射過程"""
|
||||
|
||||
print("=" * 80)
|
||||
print("調試翻譯映射過程 - 為什麼A1沒有被翻譯")
|
||||
print("=" * 80)
|
||||
|
||||
# 使用實際生產檔案
|
||||
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
|
||||
original_file = prod_dir / "original_panjit_f8b0febc.xlsx"
|
||||
|
||||
parser = ExcelParser(str(original_file))
|
||||
|
||||
# 1. 檢查提取的文字片段
|
||||
print(f"1. 檢查文字片段提取")
|
||||
print("-" * 60)
|
||||
|
||||
segments = parser.extract_text_segments()
|
||||
print(f"提取到 {len(segments)} 個片段")
|
||||
|
||||
a1_content = "製程"
|
||||
if a1_content in segments:
|
||||
print(f"✅ '{a1_content}' 在提取列表中")
|
||||
else:
|
||||
print(f"❌ '{a1_content}' 不在提取列表中")
|
||||
return
|
||||
|
||||
# 2. 模擬generate_translated_document的映射過程
|
||||
print(f"\n2. 模擬翻譯映射過程")
|
||||
print("-" * 60)
|
||||
|
||||
from app import create_app
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
from sqlalchemy import text as sql_text
|
||||
from app import db
|
||||
|
||||
target_language = 'ja'
|
||||
tmap = {}
|
||||
|
||||
print(f"建立翻譯映射...")
|
||||
|
||||
for original_text in segments:
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': original_text, 'lang': target_language})
|
||||
|
||||
row = result.fetchone()
|
||||
if row and row[0]:
|
||||
tmap[original_text] = row[0]
|
||||
if original_text == a1_content:
|
||||
print(f"✅ A1映射成功: '{original_text}' -> '{row[0]}'")
|
||||
else:
|
||||
if original_text == a1_content:
|
||||
print(f"❌ A1映射失敗: '{original_text}' -> 無翻譯")
|
||||
|
||||
print(f"翻譯映射建立完成: {len(tmap)}/{len(segments)}")
|
||||
|
||||
# 3. 模擬儲存格翻譯過程
|
||||
print(f"\n3. 模擬儲存格翻譯過程")
|
||||
print("-" * 60)
|
||||
|
||||
import openpyxl
|
||||
wb = openpyxl.load_workbook(str(original_file), data_only=False)
|
||||
try:
|
||||
wb_vals = openpyxl.load_workbook(str(original_file), data_only=True)
|
||||
except:
|
||||
wb_vals = None
|
||||
|
||||
ws = wb.active
|
||||
ws_vals = wb_vals.active if wb_vals else None
|
||||
|
||||
# 檢查A1儲存格的翻譯邏輯
|
||||
r, c = 1, 1 # A1
|
||||
src_text = parser._get_display_text_for_translation(ws, ws_vals, r, c)
|
||||
|
||||
print(f"A1儲存格:")
|
||||
print(f" 提取的文字: {repr(src_text)}")
|
||||
print(f" 是否需要翻譯: {parser._should_translate(src_text, 'auto') if src_text else False}")
|
||||
|
||||
if src_text:
|
||||
if not parser._should_translate(src_text, 'auto'):
|
||||
print(f" ❌ 跳過原因: should_translate返回False")
|
||||
elif src_text not in tmap:
|
||||
print(f" ❌ 跳過原因: 翻譯映射中沒有找到")
|
||||
print(f" 映射鍵列表中是否包含:")
|
||||
for key in list(tmap.keys())[:5]:
|
||||
print(f" {repr(key)}")
|
||||
if len(tmap) > 5:
|
||||
print(f" ... 還有{len(tmap)-5}個")
|
||||
else:
|
||||
print(f" ✅ 應該翻譯: '{src_text}' -> '{tmap[src_text]}'")
|
||||
|
||||
wb.close()
|
||||
if wb_vals:
|
||||
wb_vals.close()
|
||||
|
||||
# 4. 檢查實際執行時的日誌
|
||||
print(f"\n4. 檢查是否有其他問題")
|
||||
print("-" * 60)
|
||||
|
||||
# 再次檢查快取中的記錄
|
||||
exact_match = db.session.execute(sql_text("""
|
||||
SELECT source_text, translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': a1_content, 'lang': target_language})
|
||||
|
||||
match_row = exact_match.fetchone()
|
||||
if match_row:
|
||||
print(f"✅ 快取精確匹配: '{match_row[0]}' -> '{match_row[1]}'")
|
||||
print(f" 原文字節數: {len(match_row[0].encode('utf-8'))}")
|
||||
print(f" 查找字節數: {len(a1_content.encode('utf-8'))}")
|
||||
print(f" 字符完全相等: {match_row[0] == a1_content}")
|
||||
else:
|
||||
print(f"❌ 沒有找到精確匹配")
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("翻譯映射調試完成!")
|
||||
print("=" * 80)
|
||||
|
||||
if __name__ == "__main__":
|
||||
debug_translation_mapping()
|
Reference in New Issue
Block a user