Files
Document_Translator/fix_korean_translation_cache.py
2025-09-03 15:07:34 +08:00

214 lines
7.7 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
修復韓文翻譯快取問題 - D2-D8欄位韓文翻譯
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
import openpyxl
from app import create_app
def fix_korean_translation_cache():
"""修復韓文翻譯快取問題"""
print("=" * 80)
print("修復韓文翻譯快取問題")
print("目標語言: 韓文 (ko)")
print("=" * 80)
# 檢查韓文翻譯檔案
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\98158984-f335-44f5-a0b4-88fb8ccd5d78")
original_file = prod_dir / "original_panjit_98158984.xlsx"
korean_file = prod_dir / "original_panjit_98158984_ko_translated.xlsx"
if not original_file.exists():
print(f"❌ 原始文件不存在: {original_file}")
return
if not korean_file.exists():
print(f"❌ 韓文翻譯文件不存在: {korean_file}")
return
print(f"✅ 檔案確認:")
print(f" 原始: {original_file.name}")
print(f" 韓文: {korean_file.name}")
# 1. 檢查韓文翻譯檔案內容
print(f"\n1. 檢查韓文翻譯檔案內容")
print("-" * 60)
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
wb_korean = openpyxl.load_workbook(str(korean_file), data_only=False)
# 檢查D2-D8和F2-F6欄位
problem_cells = ['D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'F2', 'F3', 'F4', 'F5', 'F6']
korean_translations = []
for cell_name in problem_cells:
orig_val = wb_orig.active[cell_name].value
korean_val = wb_korean.active[cell_name].value
if orig_val:
print(f"\n{cell_name}:")
print(f" 原文: {repr(orig_val)}")
print(f" 韓文: {repr(korean_val)}")
# 檢查是否為翻譯格式 (原文\n翻譯)
if isinstance(korean_val, str) and '\n' in korean_val:
lines = korean_val.split('\n')
if len(lines) >= 2:
original_text = lines[0].strip()
translated_text = '\n'.join(lines[1:]).strip()
# 驗證原文是否一致
if isinstance(orig_val, str) and orig_val.strip() == original_text:
korean_translations.append({
'cell': cell_name,
'source_text': original_text,
'translated_text': translated_text
})
print(f" ✅ 已翻譯: '{translated_text[:30]}...'")
else:
print(f" ❌ 原文不一致")
else:
print(f" ❌ 格式異常")
else:
if orig_val == korean_val:
print(f" ❌ 未翻譯")
else:
print(f" ⚠️ 格式不明")
wb_orig.close()
wb_korean.close()
print(f"\n找到 {len(korean_translations)} 個韓文翻譯對照")
# 2. 檢查現有韓文快取
print(f"\n2. 檢查現有韓文快取")
print("-" * 60)
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
target_language = 'ko'
source_language = 'zh'
# 檢查韓文快取總數
korean_cache_count = db.session.execute(sql_text("""
SELECT COUNT(*) FROM dt_translation_cache
WHERE target_language = :lang
"""), {'lang': target_language}).fetchone()[0]
print(f"韓文快取總數: {korean_cache_count}")
# 檢查D2-D8是否有韓文快取
missing_korean_cache = []
for trans in korean_translations:
source_text = trans['source_text']
result = db.session.execute(sql_text("""
SELECT translated_text, created_at
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': source_text, 'lang': target_language})
row = result.fetchone()
if row:
print(f"{trans['cell']}: 韓文快取已存在 (時間: {row[1]})")
else:
print(f"{trans['cell']}: 韓文快取不存在")
missing_korean_cache.append(trans)
# 3. 補充缺失的韓文快取
if missing_korean_cache:
print(f"\n3. 補充缺失的韓文快取")
print("-" * 60)
from app.models.cache import TranslationCache
added_count = 0
for trans in missing_korean_cache:
source_text = trans['source_text']
translated_text = trans['translated_text']
print(f"✅ 新增 {trans['cell']}: '{source_text[:30]}...' -> '{translated_text[:30]}...'")
TranslationCache.save_translation(source_text, source_language, target_language, translated_text)
added_count += 1
print(f"\n韓文快取補充完成: 新增 {added_count}")
# 4. 測試韓文翻譯映射
print(f"\n4. 測試韓文翻譯映射")
print("-" * 60)
from app.services.translation_service import ExcelParser
parser = ExcelParser(str(original_file))
segments = parser.extract_text_segments()
print(f"提取文字片段: {len(segments)}")
korean_mapping_count = 0
for segment in segments:
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': segment, 'lang': target_language})
row = result.fetchone()
if row:
korean_mapping_count += 1
korean_mapping_rate = korean_mapping_count / len(segments) * 100 if segments else 0
print(f"韓文映射覆蓋率: {korean_mapping_count}/{len(segments)} = {korean_mapping_rate:.1f}%")
if korean_mapping_rate >= 80:
print("✅ 韓文映射覆蓋率良好")
else:
print("⚠️ 韓文映射覆蓋率待改善")
# 顯示缺失的片段
print(f"\n缺失韓文翻譯的片段:")
missing_count = 0
for segment in segments:
if missing_count >= 10: # 只顯示前10個
break
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': segment, 'lang': target_language})
if not result.fetchone():
print(f"'{segment[:40]}...'")
missing_count += 1
print(f"\n" + "=" * 80)
print("韓文翻譯快取檢查完成!")
print("如果映射覆蓋率不足,請重新執行翻譯任務或手動補充快取")
print("=" * 80)
if __name__ == "__main__":
fix_korean_translation_cache()