#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 修復Excel翻譯快取缺失問題 - 從已翻譯的Excel檔案中提取翻譯並補充快取 """ import sys import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # 設定編碼 sys.stdout.reconfigure(encoding='utf-8') from pathlib import Path import openpyxl from app import create_app def extract_translations_from_excel(): """從已翻譯的Excel檔案中提取翻譯對照""" print("=" * 80) print("修復Excel翻譯快取缺失問題") print("從已翻譯檔案提取翻譯對照並補充快取") print("=" * 80) # 使用已翻譯的Excel檔案 prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3") original_file = prod_dir / "original_panjit_f8b0febc.xlsx" translated_file = prod_dir / "original_panjit_f8b0febc_ja_translated.xlsx" if not original_file.exists() or not translated_file.exists(): print("❌ 需要的檔案不存在") return # 1. 提取翻譯對照 print("\n1. 提取翻譯對照") print("-" * 60) wb_orig = openpyxl.load_workbook(str(original_file), data_only=False) wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False) translation_pairs = [] target_language = 'ja' source_language = 'zh' # 檢查所有儲存格,找出有翻譯的 for row in range(1, 50): # 前50行應該足夠 for col in range(1, 20): # 前20列 orig_cell = wb_orig.active.cell(row=row, column=col) trans_cell = wb_trans.active.cell(row=row, column=col) orig_val = orig_cell.value trans_val = trans_cell.value if not orig_val or not trans_val: continue # 檢查是否為翻譯格式 (原文\n翻譯) if isinstance(trans_val, str) and '\n' in trans_val: lines = trans_val.split('\n') if len(lines) >= 2: original_text = lines[0].strip() translated_text = '\n'.join(lines[1:]).strip() # 驗證原文是否一致 if isinstance(orig_val, str) and orig_val.strip() == original_text: cell_name = f"{chr(64+col)}{row}" translation_pairs.append({ 'cell': cell_name, 'source_text': original_text, 'translated_text': translated_text }) print(f"✅ {cell_name}: '{original_text[:30]}...' -> '{translated_text[:30]}...'") wb_orig.close() wb_trans.close() print(f"\n找到 {len(translation_pairs)} 個翻譯對照") # 2. 補充到快取中 print(f"\n2. 補充翻譯快取") print("-" * 60) app = create_app() with app.app_context(): from app.models.cache import TranslationCache from app import db added_count = 0 updated_count = 0 skipped_count = 0 for pair in translation_pairs: source_text = pair['source_text'] translated_text = pair['translated_text'] # 檢查是否已存在 existing = TranslationCache.get_translation(source_text, source_language, target_language) if existing: if existing.strip() == translated_text.strip(): print(f"⚠️ {pair['cell']}: 快取已存在且相同") skipped_count += 1 else: print(f"🔄 {pair['cell']}: 更新快取翻譯") TranslationCache.save_translation(source_text, source_language, target_language, translated_text) updated_count += 1 else: print(f"✅ {pair['cell']}: 新增快取翻譯") TranslationCache.save_translation(source_text, source_language, target_language, translated_text) added_count += 1 print(f"\n快取補充結果:") print(f" 新增: {added_count}") print(f" 更新: {updated_count}") print(f" 跳過: {skipped_count}") print(f" 總計: {added_count + updated_count + skipped_count}") # 3. 驗證補充結果 print(f"\n3. 驗證補充結果") print("-" * 60) verification_failed = 0 for pair in translation_pairs: source_text = pair['source_text'] cached_translation = TranslationCache.get_translation(source_text, source_language, target_language) if cached_translation: if cached_translation.strip() == pair['translated_text'].strip(): print(f"✅ {pair['cell']}: 驗證成功") else: print(f"⚠️ {pair['cell']}: 驗證失敗 - 內容不一致") verification_failed += 1 else: print(f"❌ {pair['cell']}: 驗證失敗 - 快取中沒有") verification_failed += 1 print(f"\n驗證結果: {len(translation_pairs) - verification_failed}/{len(translation_pairs)} 成功") # 4. 測試新的翻譯映射邏輯 print(f"\n4. 測試翻譯映射邏輯") print("-" * 60) from app.services.translation_service import ExcelParser parser = ExcelParser(str(original_file)) segments = parser.extract_text_segments() print(f"文字片段提取: {len(segments)} 個") from sqlalchemy import text as sql_text mapping_count = 0 for segment in segments: result = db.session.execute(sql_text(""" SELECT translated_text FROM dt_translation_cache WHERE source_text = :text AND target_language = :lang ORDER BY created_at DESC LIMIT 1 """), {'text': segment, 'lang': target_language}) row = result.fetchone() if row: mapping_count += 1 mapping_rate = mapping_count / len(segments) * 100 if segments else 0 print(f"翻譯映射覆蓋率: {mapping_count}/{len(segments)} = {mapping_rate:.1f}%") if mapping_rate >= 80: print("✅ 映射覆蓋率良好,翻譯功能應該正常工作") else: print("⚠️ 映射覆蓋率不佳,可能仍有部分文字無法翻譯") print(f"\n" + "=" * 80) print("Excel翻譯快取修復完成!") print("建議: 重新上傳檔案測試翻譯功能") print("=" * 80) if __name__ == "__main__": extract_translations_from_excel()