214 lines
7.7 KiB
Python
214 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
修復韓文翻譯快取問題 - D2-D8欄位韓文翻譯
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# 設定編碼
|
|
sys.stdout.reconfigure(encoding='utf-8')
|
|
|
|
from pathlib import Path
|
|
import openpyxl
|
|
from app import create_app
|
|
|
|
def fix_korean_translation_cache():
|
|
"""修復韓文翻譯快取問題"""
|
|
|
|
print("=" * 80)
|
|
print("修復韓文翻譯快取問題")
|
|
print("目標語言: 韓文 (ko)")
|
|
print("=" * 80)
|
|
|
|
# 檢查韓文翻譯檔案
|
|
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\98158984-f335-44f5-a0b4-88fb8ccd5d78")
|
|
original_file = prod_dir / "original_panjit_98158984.xlsx"
|
|
korean_file = prod_dir / "original_panjit_98158984_ko_translated.xlsx"
|
|
|
|
if not original_file.exists():
|
|
print(f"❌ 原始文件不存在: {original_file}")
|
|
return
|
|
|
|
if not korean_file.exists():
|
|
print(f"❌ 韓文翻譯文件不存在: {korean_file}")
|
|
return
|
|
|
|
print(f"✅ 檔案確認:")
|
|
print(f" 原始: {original_file.name}")
|
|
print(f" 韓文: {korean_file.name}")
|
|
|
|
# 1. 檢查韓文翻譯檔案內容
|
|
print(f"\n1. 檢查韓文翻譯檔案內容")
|
|
print("-" * 60)
|
|
|
|
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
|
|
wb_korean = openpyxl.load_workbook(str(korean_file), data_only=False)
|
|
|
|
# 檢查D2-D8和F2-F6欄位
|
|
problem_cells = ['D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'F2', 'F3', 'F4', 'F5', 'F6']
|
|
korean_translations = []
|
|
|
|
for cell_name in problem_cells:
|
|
orig_val = wb_orig.active[cell_name].value
|
|
korean_val = wb_korean.active[cell_name].value
|
|
|
|
if orig_val:
|
|
print(f"\n{cell_name}:")
|
|
print(f" 原文: {repr(orig_val)}")
|
|
print(f" 韓文: {repr(korean_val)}")
|
|
|
|
# 檢查是否為翻譯格式 (原文\n翻譯)
|
|
if isinstance(korean_val, str) and '\n' in korean_val:
|
|
lines = korean_val.split('\n')
|
|
if len(lines) >= 2:
|
|
original_text = lines[0].strip()
|
|
translated_text = '\n'.join(lines[1:]).strip()
|
|
|
|
# 驗證原文是否一致
|
|
if isinstance(orig_val, str) and orig_val.strip() == original_text:
|
|
korean_translations.append({
|
|
'cell': cell_name,
|
|
'source_text': original_text,
|
|
'translated_text': translated_text
|
|
})
|
|
print(f" ✅ 已翻譯: '{translated_text[:30]}...'")
|
|
else:
|
|
print(f" ❌ 原文不一致")
|
|
else:
|
|
print(f" ❌ 格式異常")
|
|
else:
|
|
if orig_val == korean_val:
|
|
print(f" ❌ 未翻譯")
|
|
else:
|
|
print(f" ⚠️ 格式不明")
|
|
|
|
wb_orig.close()
|
|
wb_korean.close()
|
|
|
|
print(f"\n找到 {len(korean_translations)} 個韓文翻譯對照")
|
|
|
|
# 2. 檢查現有韓文快取
|
|
print(f"\n2. 檢查現有韓文快取")
|
|
print("-" * 60)
|
|
|
|
app = create_app()
|
|
|
|
with app.app_context():
|
|
from sqlalchemy import text as sql_text
|
|
from app import db
|
|
|
|
target_language = 'ko'
|
|
source_language = 'zh'
|
|
|
|
# 檢查韓文快取總數
|
|
korean_cache_count = db.session.execute(sql_text("""
|
|
SELECT COUNT(*) FROM dt_translation_cache
|
|
WHERE target_language = :lang
|
|
"""), {'lang': target_language}).fetchone()[0]
|
|
|
|
print(f"韓文快取總數: {korean_cache_count}")
|
|
|
|
# 檢查D2-D8是否有韓文快取
|
|
missing_korean_cache = []
|
|
|
|
for trans in korean_translations:
|
|
source_text = trans['source_text']
|
|
|
|
result = db.session.execute(sql_text("""
|
|
SELECT translated_text, created_at
|
|
FROM dt_translation_cache
|
|
WHERE source_text = :text AND target_language = :lang
|
|
ORDER BY created_at DESC
|
|
LIMIT 1
|
|
"""), {'text': source_text, 'lang': target_language})
|
|
|
|
row = result.fetchone()
|
|
if row:
|
|
print(f"✅ {trans['cell']}: 韓文快取已存在 (時間: {row[1]})")
|
|
else:
|
|
print(f"❌ {trans['cell']}: 韓文快取不存在")
|
|
missing_korean_cache.append(trans)
|
|
|
|
# 3. 補充缺失的韓文快取
|
|
if missing_korean_cache:
|
|
print(f"\n3. 補充缺失的韓文快取")
|
|
print("-" * 60)
|
|
|
|
from app.models.cache import TranslationCache
|
|
|
|
added_count = 0
|
|
|
|
for trans in missing_korean_cache:
|
|
source_text = trans['source_text']
|
|
translated_text = trans['translated_text']
|
|
|
|
print(f"✅ 新增 {trans['cell']}: '{source_text[:30]}...' -> '{translated_text[:30]}...'")
|
|
|
|
TranslationCache.save_translation(source_text, source_language, target_language, translated_text)
|
|
added_count += 1
|
|
|
|
print(f"\n韓文快取補充完成: 新增 {added_count} 筆")
|
|
|
|
# 4. 測試韓文翻譯映射
|
|
print(f"\n4. 測試韓文翻譯映射")
|
|
print("-" * 60)
|
|
|
|
from app.services.translation_service import ExcelParser
|
|
|
|
parser = ExcelParser(str(original_file))
|
|
segments = parser.extract_text_segments()
|
|
|
|
print(f"提取文字片段: {len(segments)} 個")
|
|
|
|
korean_mapping_count = 0
|
|
|
|
for segment in segments:
|
|
result = db.session.execute(sql_text("""
|
|
SELECT translated_text
|
|
FROM dt_translation_cache
|
|
WHERE source_text = :text AND target_language = :lang
|
|
ORDER BY created_at DESC
|
|
LIMIT 1
|
|
"""), {'text': segment, 'lang': target_language})
|
|
|
|
row = result.fetchone()
|
|
if row:
|
|
korean_mapping_count += 1
|
|
|
|
korean_mapping_rate = korean_mapping_count / len(segments) * 100 if segments else 0
|
|
print(f"韓文映射覆蓋率: {korean_mapping_count}/{len(segments)} = {korean_mapping_rate:.1f}%")
|
|
|
|
if korean_mapping_rate >= 80:
|
|
print("✅ 韓文映射覆蓋率良好")
|
|
else:
|
|
print("⚠️ 韓文映射覆蓋率待改善")
|
|
|
|
# 顯示缺失的片段
|
|
print(f"\n缺失韓文翻譯的片段:")
|
|
missing_count = 0
|
|
for segment in segments:
|
|
if missing_count >= 10: # 只顯示前10個
|
|
break
|
|
|
|
result = db.session.execute(sql_text("""
|
|
SELECT translated_text
|
|
FROM dt_translation_cache
|
|
WHERE source_text = :text AND target_language = :lang
|
|
ORDER BY created_at DESC
|
|
LIMIT 1
|
|
"""), {'text': segment, 'lang': target_language})
|
|
|
|
if not result.fetchone():
|
|
print(f" ❌ '{segment[:40]}...'")
|
|
missing_count += 1
|
|
|
|
print(f"\n" + "=" * 80)
|
|
print("韓文翻譯快取檢查完成!")
|
|
print("如果映射覆蓋率不足,請重新執行翻譯任務或手動補充快取")
|
|
print("=" * 80)
|
|
|
|
if __name__ == "__main__":
|
|
fix_korean_translation_cache() |