5th_fix excel problem
This commit is contained in:
184
fix_missing_excel_cache.py
Normal file
184
fix_missing_excel_cache.py
Normal file
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
修復Excel翻譯快取缺失問題 - 從已翻譯的Excel檔案中提取翻譯並補充快取
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 設定編碼
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
from pathlib import Path
|
||||
import openpyxl
|
||||
from app import create_app
|
||||
|
||||
def extract_translations_from_excel():
|
||||
"""從已翻譯的Excel檔案中提取翻譯對照"""
|
||||
|
||||
print("=" * 80)
|
||||
print("修復Excel翻譯快取缺失問題")
|
||||
print("從已翻譯檔案提取翻譯對照並補充快取")
|
||||
print("=" * 80)
|
||||
|
||||
# 使用已翻譯的Excel檔案
|
||||
prod_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f8b0febc-c0df-4902-8dc3-c90f5634f3b3")
|
||||
original_file = prod_dir / "original_panjit_f8b0febc.xlsx"
|
||||
translated_file = prod_dir / "original_panjit_f8b0febc_ja_translated.xlsx"
|
||||
|
||||
if not original_file.exists() or not translated_file.exists():
|
||||
print("❌ 需要的檔案不存在")
|
||||
return
|
||||
|
||||
# 1. 提取翻譯對照
|
||||
print("\n1. 提取翻譯對照")
|
||||
print("-" * 60)
|
||||
|
||||
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
|
||||
wb_trans = openpyxl.load_workbook(str(translated_file), data_only=False)
|
||||
|
||||
translation_pairs = []
|
||||
target_language = 'ja'
|
||||
source_language = 'zh'
|
||||
|
||||
# 檢查所有儲存格,找出有翻譯的
|
||||
for row in range(1, 50): # 前50行應該足夠
|
||||
for col in range(1, 20): # 前20列
|
||||
orig_cell = wb_orig.active.cell(row=row, column=col)
|
||||
trans_cell = wb_trans.active.cell(row=row, column=col)
|
||||
|
||||
orig_val = orig_cell.value
|
||||
trans_val = trans_cell.value
|
||||
|
||||
if not orig_val or not trans_val:
|
||||
continue
|
||||
|
||||
# 檢查是否為翻譯格式 (原文\n翻譯)
|
||||
if isinstance(trans_val, str) and '\n' in trans_val:
|
||||
lines = trans_val.split('\n')
|
||||
if len(lines) >= 2:
|
||||
original_text = lines[0].strip()
|
||||
translated_text = '\n'.join(lines[1:]).strip()
|
||||
|
||||
# 驗證原文是否一致
|
||||
if isinstance(orig_val, str) and orig_val.strip() == original_text:
|
||||
cell_name = f"{chr(64+col)}{row}"
|
||||
translation_pairs.append({
|
||||
'cell': cell_name,
|
||||
'source_text': original_text,
|
||||
'translated_text': translated_text
|
||||
})
|
||||
print(f"✅ {cell_name}: '{original_text[:30]}...' -> '{translated_text[:30]}...'")
|
||||
|
||||
wb_orig.close()
|
||||
wb_trans.close()
|
||||
|
||||
print(f"\n找到 {len(translation_pairs)} 個翻譯對照")
|
||||
|
||||
# 2. 補充到快取中
|
||||
print(f"\n2. 補充翻譯快取")
|
||||
print("-" * 60)
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
from app.models.cache import TranslationCache
|
||||
from app import db
|
||||
|
||||
added_count = 0
|
||||
updated_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
for pair in translation_pairs:
|
||||
source_text = pair['source_text']
|
||||
translated_text = pair['translated_text']
|
||||
|
||||
# 檢查是否已存在
|
||||
existing = TranslationCache.get_translation(source_text, source_language, target_language)
|
||||
|
||||
if existing:
|
||||
if existing.strip() == translated_text.strip():
|
||||
print(f"⚠️ {pair['cell']}: 快取已存在且相同")
|
||||
skipped_count += 1
|
||||
else:
|
||||
print(f"🔄 {pair['cell']}: 更新快取翻譯")
|
||||
TranslationCache.save_translation(source_text, source_language, target_language, translated_text)
|
||||
updated_count += 1
|
||||
else:
|
||||
print(f"✅ {pair['cell']}: 新增快取翻譯")
|
||||
TranslationCache.save_translation(source_text, source_language, target_language, translated_text)
|
||||
added_count += 1
|
||||
|
||||
print(f"\n快取補充結果:")
|
||||
print(f" 新增: {added_count}")
|
||||
print(f" 更新: {updated_count}")
|
||||
print(f" 跳過: {skipped_count}")
|
||||
print(f" 總計: {added_count + updated_count + skipped_count}")
|
||||
|
||||
# 3. 驗證補充結果
|
||||
print(f"\n3. 驗證補充結果")
|
||||
print("-" * 60)
|
||||
|
||||
verification_failed = 0
|
||||
|
||||
for pair in translation_pairs:
|
||||
source_text = pair['source_text']
|
||||
|
||||
cached_translation = TranslationCache.get_translation(source_text, source_language, target_language)
|
||||
|
||||
if cached_translation:
|
||||
if cached_translation.strip() == pair['translated_text'].strip():
|
||||
print(f"✅ {pair['cell']}: 驗證成功")
|
||||
else:
|
||||
print(f"⚠️ {pair['cell']}: 驗證失敗 - 內容不一致")
|
||||
verification_failed += 1
|
||||
else:
|
||||
print(f"❌ {pair['cell']}: 驗證失敗 - 快取中沒有")
|
||||
verification_failed += 1
|
||||
|
||||
print(f"\n驗證結果: {len(translation_pairs) - verification_failed}/{len(translation_pairs)} 成功")
|
||||
|
||||
# 4. 測試新的翻譯映射邏輯
|
||||
print(f"\n4. 測試翻譯映射邏輯")
|
||||
print("-" * 60)
|
||||
|
||||
from app.services.translation_service import ExcelParser
|
||||
|
||||
parser = ExcelParser(str(original_file))
|
||||
segments = parser.extract_text_segments()
|
||||
|
||||
print(f"文字片段提取: {len(segments)} 個")
|
||||
|
||||
from sqlalchemy import text as sql_text
|
||||
mapping_count = 0
|
||||
|
||||
for segment in segments:
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': segment, 'lang': target_language})
|
||||
|
||||
row = result.fetchone()
|
||||
if row:
|
||||
mapping_count += 1
|
||||
|
||||
mapping_rate = mapping_count / len(segments) * 100 if segments else 0
|
||||
print(f"翻譯映射覆蓋率: {mapping_count}/{len(segments)} = {mapping_rate:.1f}%")
|
||||
|
||||
if mapping_rate >= 80:
|
||||
print("✅ 映射覆蓋率良好,翻譯功能應該正常工作")
|
||||
else:
|
||||
print("⚠️ 映射覆蓋率不佳,可能仍有部分文字無法翻譯")
|
||||
|
||||
print(f"\n" + "=" * 80)
|
||||
print("Excel翻譯快取修復完成!")
|
||||
print("建議: 重新上傳檔案測試翻譯功能")
|
||||
print("=" * 80)
|
||||
|
||||
if __name__ == "__main__":
|
||||
extract_translations_from_excel()
|
Reference in New Issue
Block a user