5th_fix excel problem

This commit is contained in:
beabigegg
2025-09-03 15:07:34 +08:00
parent cce3fd4925
commit 5fd0671b4f
28 changed files with 4484 additions and 97 deletions

View File

@@ -0,0 +1,179 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
調試實際生產環境中的翻譯問題
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
import openpyxl
from app.services.translation_service import ExcelParser
def debug_real_production_issue():
"""調試實際生產環境的翻譯問題"""
print("=" * 80)
print("調試實際生產環境翻譯問題")
print("新上傳UUID: f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
print("=" * 80)
# 實際生產檔案路徑
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
original_file = prod_dir / "original_panjit_f8b0febc.xlsx"
translated_file = prod_dir / "original_panjit_f8b0febc_ja_translated.xlsx"
if not original_file.exists():
print(f"❌ 原始文件不存在: {original_file}")
return
if not translated_file.exists():
print(f"❌ 翻譯文件不存在: {translated_file}")
return
print(f"✅ 檔案確認:")
print(f" 原始文件: {original_file.name}")
print(f" 翻譯文件: {translated_file.name}")
# 1. 檢查實際使用的ExcelParser行為
print(f"\n1. 檢查實際ExcelParser提取行為")
print("-" * 60)
parser = ExcelParser(str(original_file))
segments = parser.extract_text_segments()
print(f"實際提取到 {len(segments)} 個文字片段")
# 檢查A1是否被提取
a1_content = "製程"
if a1_content in segments:
print(f"✅ A1內容 '{a1_content}' 已被提取(位置: {segments.index(a1_content)+1}")
else:
print(f"❌ A1內容 '{a1_content}' 仍未被提取")
# 顯示實際提取的前10個片段
print(f" 實際提取的前10個片段:")
for i, seg in enumerate(segments[:10]):
print(f" {i+1:2d}. {repr(seg)}")
# 2. 直接檢查A1儲存格的原始內容
print(f"\n2. 檢查A1儲存格原始內容")
print("-" * 60)
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
try:
wb_orig_vals = openpyxl.load_workbook(str(original_file), data_only=True)
except:
wb_orig_vals = None
a1_raw = wb_orig.active['A1'].value
a1_display = wb_orig_vals.active['A1'].value if wb_orig_vals else None
print(f"A1原始值: {repr(a1_raw)}")
if wb_orig_vals:
print(f"A1顯示值: {repr(a1_display)}")
# 模擬get_display_text_for_translation邏輯
if isinstance(a1_raw, str) and a1_raw.startswith("="):
display_text = a1_display if isinstance(a1_display, str) and a1_display.strip() else None
elif isinstance(a1_raw, str) and a1_raw.strip():
display_text = a1_raw
else:
display_text = a1_display if wb_orig_vals and isinstance(a1_display, str) and a1_display.strip() else None
print(f"用於翻譯的文字: {repr(display_text)}")
if display_text:
should_translate = parser._should_translate(display_text, 'auto')
has_cjk = parser._has_cjk(display_text)
min_length = 2 if has_cjk else 3
print(f"文字長度: {len(display_text)}")
print(f"包含CJK: {has_cjk}")
print(f"最小長度要求: {min_length}")
print(f"應該翻譯: {should_translate}")
# 3. 檢查翻譯文件的A1
print(f"\n3. 檢查翻譯文件A1儲存格")
print("-" * 60)
wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
a1_trans = wb_trans.active['A1'].value
print(f"A1翻譯結果: {repr(a1_trans)}")
if isinstance(a1_trans, str) and '\n' in a1_trans:
lines = a1_trans.split('\n')
print(f"✅ A1已翻譯格式: 雙行")
for i, line in enumerate(lines):
print(f"{i+1}: {repr(line)}")
elif a1_raw == a1_trans:
print(f"❌ A1未翻譯 - 內容完全相同")
else:
print(f"⚠️ A1內容有變化但格式不明")
# 4. 檢查翻譯快取狀況
print(f"\n4. 檢查翻譯快取")
print("-" * 60)
from app import create_app
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
if display_text:
result = db.session.execute(sql_text("""
SELECT translated_text, created_at
FROM dt_translation_cache
WHERE source_text = :text AND target_language = 'ja'
ORDER BY created_at DESC
LIMIT 1
"""), {'text': display_text})
row = result.fetchone()
if row:
print(f"✅ 快取中有翻譯: '{display_text}' -> '{row[0]}'")
print(f" 創建時間: {row[1]}")
else:
print(f"❌ 快取中沒有翻譯: '{display_text}'")
# 5. 系統性檢查前10個儲存格
print(f"\n5. 系統性檢查前10個儲存格")
print("-" * 60)
important_cells = ['A1', 'B1', 'C1', 'D1', 'E1', 'A2', 'B2', 'C2', 'D2', 'E2']
for cell_name in important_cells:
orig_val = wb_orig.active[cell_name].value
trans_val = wb_trans.active[cell_name].value
if orig_val: # 只檢查有內容的儲存格
print(f"\n{cell_name}:")
print(f" 原始: {repr(orig_val)}")
print(f" 翻譯: {repr(trans_val)}")
if isinstance(trans_val, str) and '\n' in trans_val:
print(f" 狀態: ✅ 已翻譯")
elif orig_val == trans_val:
print(f" 狀態: ❌ 未翻譯")
else:
print(f" 狀態: ⚠️ 內容有變化")
wb_orig.close()
wb_trans.close()
if wb_orig_vals:
wb_orig_vals.close()
print(f"\n" + "=" * 80)
print("實際生產環境調試完成!")
print("=" * 80)
if __name__ == "__main__":
debug_real_production_issue()