Files
Document_Translator/fix_missing_excel_cache.py
2025-09-03 15:07:34 +08:00

184 lines
6.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
修復Excel翻譯快取缺失問題 - 從已翻譯的Excel檔案中提取翻譯並補充快取
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from pathlib import Path
import openpyxl
from app import create_app
def extract_translations_from_excel():
"""從已翻譯的Excel檔案中提取翻譯對照"""
print("=" * 80)
print("修復Excel翻譯快取缺失問題")
print("從已翻譯檔案提取翻譯對照並補充快取")
print("=" * 80)
# 使用已翻譯的Excel檔案
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
original_file = prod_dir / "original_panjit_f8b0febc.xlsx"
translated_file = prod_dir / "original_panjit_f8b0febc_ja_translated.xlsx"
if not original_file.exists() or not translated_file.exists():
print("❌ 需要的檔案不存在")
return
# 1. 提取翻譯對照
print("\n1. 提取翻譯對照")
print("-" * 60)
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
translation_pairs = []
target_language = 'ja'
source_language = 'zh'
# 檢查所有儲存格,找出有翻譯的
for row in range(1, 50): # 前50行應該足夠
for col in range(1, 20): # 前20列
orig_cell = wb_orig.active.cell(row=row, column=col)
trans_cell = wb_trans.active.cell(row=row, column=col)
orig_val = orig_cell.value
trans_val = trans_cell.value
if not orig_val or not trans_val:
continue
# 檢查是否為翻譯格式 (原文\n翻譯)
if isinstance(trans_val, str) and '\n' in trans_val:
lines = trans_val.split('\n')
if len(lines) >= 2:
original_text = lines[0].strip()
translated_text = '\n'.join(lines[1:]).strip()
# 驗證原文是否一致
if isinstance(orig_val, str) and orig_val.strip() == original_text:
cell_name = f"{chr(64+col)}{row}"
translation_pairs.append({
'cell': cell_name,
'source_text': original_text,
'translated_text': translated_text
})
print(f"{cell_name}: '{original_text[:30]}...' -> '{translated_text[:30]}...'")
wb_orig.close()
wb_trans.close()
print(f"\n找到 {len(translation_pairs)} 個翻譯對照")
# 2. 補充到快取中
print(f"\n2. 補充翻譯快取")
print("-" * 60)
app = create_app()
with app.app_context():
from app.models.cache import TranslationCache
from app import db
added_count = 0
updated_count = 0
skipped_count = 0
for pair in translation_pairs:
source_text = pair['source_text']
translated_text = pair['translated_text']
# 檢查是否已存在
existing = TranslationCache.get_translation(source_text, source_language, target_language)
if existing:
if existing.strip() == translated_text.strip():
print(f"⚠️ {pair['cell']}: 快取已存在且相同")
skipped_count += 1
else:
print(f"🔄 {pair['cell']}: 更新快取翻譯")
TranslationCache.save_translation(source_text, source_language, target_language, translated_text)
updated_count += 1
else:
print(f"{pair['cell']}: 新增快取翻譯")
TranslationCache.save_translation(source_text, source_language, target_language, translated_text)
added_count += 1
print(f"\n快取補充結果:")
print(f" 新增: {added_count}")
print(f" 更新: {updated_count}")
print(f" 跳過: {skipped_count}")
print(f" 總計: {added_count + updated_count + skipped_count}")
# 3. 驗證補充結果
print(f"\n3. 驗證補充結果")
print("-" * 60)
verification_failed = 0
for pair in translation_pairs:
source_text = pair['source_text']
cached_translation = TranslationCache.get_translation(source_text, source_language, target_language)
if cached_translation:
if cached_translation.strip() == pair['translated_text'].strip():
print(f"{pair['cell']}: 驗證成功")
else:
print(f"⚠️ {pair['cell']}: 驗證失敗 - 內容不一致")
verification_failed += 1
else:
print(f"{pair['cell']}: 驗證失敗 - 快取中沒有")
verification_failed += 1
print(f"\n驗證結果: {len(translation_pairs) - verification_failed}/{len(translation_pairs)} 成功")
# 4. 測試新的翻譯映射邏輯
print(f"\n4. 測試翻譯映射邏輯")
print("-" * 60)
from app.services.translation_service import ExcelParser
parser = ExcelParser(str(original_file))
segments = parser.extract_text_segments()
print(f"文字片段提取: {len(segments)}")
from sqlalchemy import text as sql_text
mapping_count = 0
for segment in segments:
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': segment, 'lang': target_language})
row = result.fetchone()
if row:
mapping_count += 1
mapping_rate = mapping_count / len(segments) * 100 if segments else 0
print(f"翻譯映射覆蓋率: {mapping_count}/{len(segments)} = {mapping_rate:.1f}%")
if mapping_rate >= 80:
print("✅ 映射覆蓋率良好,翻譯功能應該正常工作")
else:
print("⚠️ 映射覆蓋率不佳,可能仍有部分文字無法翻譯")
print(f"\n" + "=" * 80)
print("Excel翻譯快取修復完成")
print("建議: 重新上傳檔案測試翻譯功能")
print("=" * 80)
if __name__ == "__main__":
extract_translations_from_excel()