5th_fix excel problem
This commit is contained in:
214
fix_korean_translation_cache.py
Normal file
214
fix_korean_translation_cache.py
Normal file
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
修復韓文翻譯快取問題 - D2-D8欄位韓文翻譯
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 設定編碼
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
from pathlib import Path
|
||||
import openpyxl
|
||||
from app import create_app
|
||||
|
||||
def fix_korean_translation_cache():
|
||||
"""修復韓文翻譯快取問題"""
|
||||
|
||||
print("=" * 80)
|
||||
print("修復韓文翻譯快取問題")
|
||||
print("目標語言: 韓文 (ko)")
|
||||
print("=" * 80)
|
||||
|
||||
# 檢查韓文翻譯檔案
|
||||
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\98158984-f335-44f5-a0b4-88fb8ccd5d78")
|
||||
original_file = prod_dir / "original_panjit_98158984.xlsx"
|
||||
korean_file = prod_dir / "original_panjit_98158984_ko_translated.xlsx"
|
||||
|
||||
if not original_file.exists():
|
||||
print(f"❌ 原始文件不存在: {original_file}")
|
||||
return
|
||||
|
||||
if not korean_file.exists():
|
||||
print(f"❌ 韓文翻譯文件不存在: {korean_file}")
|
||||
return
|
||||
|
||||
print(f"✅ 檔案確認:")
|
||||
print(f" 原始: {original_file.name}")
|
||||
print(f" 韓文: {korean_file.name}")
|
||||
|
||||
# 1. 檢查韓文翻譯檔案內容
|
||||
print(f"\n1. 檢查韓文翻譯檔案內容")
|
||||
print("-" * 60)
|
||||
|
||||
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
|
||||
wb_korean = openpyxl.load_workbook(str(korean_file), data_only=False)
|
||||
|
||||
# 檢查D2-D8和F2-F6欄位
|
||||
problem_cells = ['D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'F2', 'F3', 'F4', 'F5', 'F6']
|
||||
korean_translations = []
|
||||
|
||||
for cell_name in problem_cells:
|
||||
orig_val = wb_orig.active[cell_name].value
|
||||
korean_val = wb_korean.active[cell_name].value
|
||||
|
||||
if orig_val:
|
||||
print(f"\n{cell_name}:")
|
||||
print(f" 原文: {repr(orig_val)}")
|
||||
print(f" 韓文: {repr(korean_val)}")
|
||||
|
||||
# 檢查是否為翻譯格式 (原文\n翻譯)
|
||||
if isinstance(korean_val, str) and '\n' in korean_val:
|
||||
lines = korean_val.split('\n')
|
||||
if len(lines) >= 2:
|
||||
original_text = lines[0].strip()
|
||||
translated_text = '\n'.join(lines[1:]).strip()
|
||||
|
||||
# 驗證原文是否一致
|
||||
if isinstance(orig_val, str) and orig_val.strip() == original_text:
|
||||
korean_translations.append({
|
||||
'cell': cell_name,
|
||||
'source_text': original_text,
|
||||
'translated_text': translated_text
|
||||
})
|
||||
print(f" ✅ 已翻譯: '{translated_text[:30]}...'")
|
||||
else:
|
||||
print(f" ❌ 原文不一致")
|
||||
else:
|
||||
print(f" ❌ 格式異常")
|
||||
else:
|
||||
if orig_val == korean_val:
|
||||
print(f" ❌ 未翻譯")
|
||||
else:
|
||||
print(f" ⚠️ 格式不明")
|
||||
|
||||
wb_orig.close()
|
||||
wb_korean.close()
|
||||
|
||||
print(f"\n找到 {len(korean_translations)} 個韓文翻譯對照")
|
||||
|
||||
# 2. 檢查現有韓文快取
|
||||
print(f"\n2. 檢查現有韓文快取")
|
||||
print("-" * 60)
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
from sqlalchemy import text as sql_text
|
||||
from app import db
|
||||
|
||||
target_language = 'ko'
|
||||
source_language = 'zh'
|
||||
|
||||
# 檢查韓文快取總數
|
||||
korean_cache_count = db.session.execute(sql_text("""
|
||||
SELECT COUNT(*) FROM dt_translation_cache
|
||||
WHERE target_language = :lang
|
||||
"""), {'lang': target_language}).fetchone()[0]
|
||||
|
||||
print(f"韓文快取總數: {korean_cache_count}")
|
||||
|
||||
# 檢查D2-D8是否有韓文快取
|
||||
missing_korean_cache = []
|
||||
|
||||
for trans in korean_translations:
|
||||
source_text = trans['source_text']
|
||||
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT translated_text, created_at
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': source_text, 'lang': target_language})
|
||||
|
||||
row = result.fetchone()
|
||||
if row:
|
||||
print(f"✅ {trans['cell']}: 韓文快取已存在 (時間: {row[1]})")
|
||||
else:
|
||||
print(f"❌ {trans['cell']}: 韓文快取不存在")
|
||||
missing_korean_cache.append(trans)
|
||||
|
||||
# 3. 補充缺失的韓文快取
|
||||
if missing_korean_cache:
|
||||
print(f"\n3. 補充缺失的韓文快取")
|
||||
print("-" * 60)
|
||||
|
||||
from app.models.cache import TranslationCache
|
||||
|
||||
added_count = 0
|
||||
|
||||
for trans in missing_korean_cache:
|
||||
source_text = trans['source_text']
|
||||
translated_text = trans['translated_text']
|
||||
|
||||
print(f"✅ 新增 {trans['cell']}: '{source_text[:30]}...' -> '{translated_text[:30]}...'")
|
||||
|
||||
TranslationCache.save_translation(source_text, source_language, target_language, translated_text)
|
||||
added_count += 1
|
||||
|
||||
print(f"\n韓文快取補充完成: 新增 {added_count} 筆")
|
||||
|
||||
# 4. 測試韓文翻譯映射
|
||||
print(f"\n4. 測試韓文翻譯映射")
|
||||
print("-" * 60)
|
||||
|
||||
from app.services.translation_service import ExcelParser
|
||||
|
||||
parser = ExcelParser(str(original_file))
|
||||
segments = parser.extract_text_segments()
|
||||
|
||||
print(f"提取文字片段: {len(segments)} 個")
|
||||
|
||||
korean_mapping_count = 0
|
||||
|
||||
for segment in segments:
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': segment, 'lang': target_language})
|
||||
|
||||
row = result.fetchone()
|
||||
if row:
|
||||
korean_mapping_count += 1
|
||||
|
||||
korean_mapping_rate = korean_mapping_count / len(segments) * 100 if segments else 0
|
||||
print(f"韓文映射覆蓋率: {korean_mapping_count}/{len(segments)} = {korean_mapping_rate:.1f}%")
|
||||
|
||||
if korean_mapping_rate >= 80:
|
||||
print("✅ 韓文映射覆蓋率良好")
|
||||
else:
|
||||
print("⚠️ 韓文映射覆蓋率待改善")
|
||||
|
||||
# 顯示缺失的片段
|
||||
print(f"\n缺失韓文翻譯的片段:")
|
||||
missing_count = 0
|
||||
for segment in segments:
|
||||
if missing_count >= 10: # 只顯示前10個
|
||||
break
|
||||
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': segment, 'lang': target_language})
|
||||
|
||||
if not result.fetchone():
|
||||
print(f" ❌ '{segment[:40]}...'")
|
||||
missing_count += 1
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("韓文翻譯快取檢查完成!")
|
||||
print("如果映射覆蓋率不足,請重新執行翻譯任務或手動補充快取")
|
||||
print("=" * 80)
|
||||
|
||||
if __name__ == "__main__":
|
||||
fix_korean_translation_cache()
|
Reference in New Issue
Block a user