#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 修復韓文翻譯快取問題 - D2-D8欄位韓文翻譯 """ import sys import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # 設定編碼 sys.stdout.reconfigure(encoding='utf-8') from pathlib import Path import openpyxl from app import create_app def fix_korean_translation_cache(): """修復韓文翻譯快取問題""" print("=" * 80) print("修復韓文翻譯快取問題") print("目標語言: 韓文 (ko)") print("=" * 80) # 檢查韓文翻譯檔案 prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\98158984-f335-44f5-a0b4-88fb8ccd5d78") original_file = prod_dir / "original_panjit_98158984.xlsx" korean_file = prod_dir / "original_panjit_98158984_ko_translated.xlsx" if not original_file.exists(): print(f"❌ 原始文件不存在: {original_file}") return if not korean_file.exists(): print(f"❌ 韓文翻譯文件不存在: {korean_file}") return print(f"✅ 檔案確認:") print(f" 原始: {original_file.name}") print(f" 韓文: {korean_file.name}") # 1. 檢查韓文翻譯檔案內容 print(f"\n1. 檢查韓文翻譯檔案內容") print("-" * 60) wb_orig = openpyxl.load_workbook(str(original_file), data_only=False) wb_korean = openpyxl.load_workbook(str(korean_file), data_only=False) # 檢查D2-D8和F2-F6欄位 problem_cells = ['D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'F2', 'F3', 'F4', 'F5', 'F6'] korean_translations = [] for cell_name in problem_cells: orig_val = wb_orig.active[cell_name].value korean_val = wb_korean.active[cell_name].value if orig_val: print(f"\n{cell_name}:") print(f" 原文: {repr(orig_val)}") print(f" 韓文: {repr(korean_val)}") # 檢查是否為翻譯格式 (原文\n翻譯) if isinstance(korean_val, str) and '\n' in korean_val: lines = korean_val.split('\n') if len(lines) >= 2: original_text = lines[0].strip() translated_text = '\n'.join(lines[1:]).strip() # 驗證原文是否一致 if isinstance(orig_val, str) and orig_val.strip() == original_text: korean_translations.append({ 'cell': cell_name, 'source_text': original_text, 'translated_text': translated_text }) print(f" ✅ 已翻譯: '{translated_text[:30]}...'") else: print(f" ❌ 原文不一致") else: print(f" ❌ 格式異常") else: if orig_val == korean_val: print(f" ❌ 未翻譯") else: print(f" ⚠️ 格式不明") wb_orig.close() wb_korean.close() print(f"\n找到 {len(korean_translations)} 個韓文翻譯對照") # 2. 檢查現有韓文快取 print(f"\n2. 檢查現有韓文快取") print("-" * 60) app = create_app() with app.app_context(): from sqlalchemy import text as sql_text from app import db target_language = 'ko' source_language = 'zh' # 檢查韓文快取總數 korean_cache_count = db.session.execute(sql_text(""" SELECT COUNT(*) FROM dt_translation_cache WHERE target_language = :lang """), {'lang': target_language}).fetchone()[0] print(f"韓文快取總數: {korean_cache_count}") # 檢查D2-D8是否有韓文快取 missing_korean_cache = [] for trans in korean_translations: source_text = trans['source_text'] result = db.session.execute(sql_text(""" SELECT translated_text, created_at FROM dt_translation_cache WHERE source_text = :text AND target_language = :lang ORDER BY created_at DESC LIMIT 1 """), {'text': source_text, 'lang': target_language}) row = result.fetchone() if row: print(f"✅ {trans['cell']}: 韓文快取已存在 (時間: {row[1]})") else: print(f"❌ {trans['cell']}: 韓文快取不存在") missing_korean_cache.append(trans) # 3. 補充缺失的韓文快取 if missing_korean_cache: print(f"\n3. 補充缺失的韓文快取") print("-" * 60) from app.models.cache import TranslationCache added_count = 0 for trans in missing_korean_cache: source_text = trans['source_text'] translated_text = trans['translated_text'] print(f"✅ 新增 {trans['cell']}: '{source_text[:30]}...' -> '{translated_text[:30]}...'") TranslationCache.save_translation(source_text, source_language, target_language, translated_text) added_count += 1 print(f"\n韓文快取補充完成: 新增 {added_count} 筆") # 4. 測試韓文翻譯映射 print(f"\n4. 測試韓文翻譯映射") print("-" * 60) from app.services.translation_service import ExcelParser parser = ExcelParser(str(original_file)) segments = parser.extract_text_segments() print(f"提取文字片段: {len(segments)} 個") korean_mapping_count = 0 for segment in segments: result = db.session.execute(sql_text(""" SELECT translated_text FROM dt_translation_cache WHERE source_text = :text AND target_language = :lang ORDER BY created_at DESC LIMIT 1 """), {'text': segment, 'lang': target_language}) row = result.fetchone() if row: korean_mapping_count += 1 korean_mapping_rate = korean_mapping_count / len(segments) * 100 if segments else 0 print(f"韓文映射覆蓋率: {korean_mapping_count}/{len(segments)} = {korean_mapping_rate:.1f}%") if korean_mapping_rate >= 80: print("✅ 韓文映射覆蓋率良好") else: print("⚠️ 韓文映射覆蓋率待改善") # 顯示缺失的片段 print(f"\n缺失韓文翻譯的片段:") missing_count = 0 for segment in segments: if missing_count >= 10: # 只顯示前10個 break result = db.session.execute(sql_text(""" SELECT translated_text FROM dt_translation_cache WHERE source_text = :text AND target_language = :lang ORDER BY created_at DESC LIMIT 1 """), {'text': segment, 'lang': target_language}) if not result.fetchone(): print(f" ❌ '{segment[:40]}...'") missing_count += 1 print(f"\n" + "=" * 80) print("韓文翻譯快取檢查完成!") print("如果映射覆蓋率不足,請重新執行翻譯任務或手動補充快取") print("=" * 80) if __name__ == "__main__": fix_korean_translation_cache()