5th_fix excel problem
This commit is contained in:
@@ -1,96 +1,176 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Test the fixed translation service
|
||||
測試修正後的翻譯功能 - 重新生成翻譯文件
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# Fix encoding for Windows console
|
||||
if sys.stdout.encoding != 'utf-8':
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
if sys.stderr.encoding != 'utf-8':
|
||||
sys.stderr.reconfigure(encoding='utf-8')
|
||||
# 設定編碼
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
|
||||
from pathlib import Path
|
||||
from app.services.translation_service import ExcelParser
|
||||
import openpyxl
|
||||
|
||||
from app import create_app
|
||||
from app.services.translation_service import TranslationService
|
||||
from app.models.job import TranslationJob
|
||||
|
||||
def test_fixed_translation_service():
|
||||
"""Test the fixed translation service on a real job"""
|
||||
def test_fixed_translation():
|
||||
"""測試修正後的翻譯功能"""
|
||||
|
||||
print("=" * 80)
|
||||
print("測試修正後的Excel翻譯功能")
|
||||
print("=" * 80)
|
||||
|
||||
# 使用現有的測試文件
|
||||
test_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\185bb457-b703-4e98-94a2-fde072b895c4")
|
||||
original_file = test_dir / "original_panjit_185bb457.xlsx"
|
||||
|
||||
if not original_file.exists():
|
||||
print(f"原始文件不存在: {original_file}")
|
||||
return
|
||||
|
||||
# 創建一個新的翻譯文件名稱
|
||||
new_translated_file = test_dir / "original_panjit_185bb457_ja_translated_fixed.xlsx"
|
||||
|
||||
print(f"✅ 使用原始文件: {original_file.name}")
|
||||
print(f"✅ 生成新翻譯文件: {new_translated_file.name}")
|
||||
|
||||
# 1. 驗證提取功能
|
||||
print(f"\n1. 驗證文字提取功能")
|
||||
print("-" * 60)
|
||||
|
||||
parser = ExcelParser(str(original_file))
|
||||
segments = parser.extract_text_segments()
|
||||
|
||||
print(f"提取到 {len(segments)} 個文字片段")
|
||||
|
||||
# 檢查A1是否在其中
|
||||
a1_content = "製程"
|
||||
if a1_content in segments:
|
||||
print(f"✅ A1內容 '{a1_content}' 已被提取")
|
||||
print(f" 位置: 第{segments.index(a1_content)+1}個")
|
||||
else:
|
||||
print(f"❌ A1內容 '{a1_content}' 仍未被提取")
|
||||
return
|
||||
|
||||
# 2. 驗證翻譯快取
|
||||
print(f"\n2. 驗證翻譯快取狀況")
|
||||
print("-" * 60)
|
||||
|
||||
from app import create_app
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
# Get the most recent job to test with
|
||||
job = TranslationJob.query.order_by(TranslationJob.created_at.desc()).first()
|
||||
|
||||
if not job:
|
||||
print("No jobs found to test")
|
||||
return
|
||||
|
||||
print(f"Testing translation service on job: {job.job_uuid}")
|
||||
print(f"Original filename: {job.original_filename}")
|
||||
print(f"Target languages: {job.target_languages}")
|
||||
print(f"File path: {job.file_path}")
|
||||
|
||||
# Reset job status to PENDING for testing
|
||||
job.status = 'PENDING'
|
||||
job.progress = 0.0
|
||||
job.error_message = None
|
||||
|
||||
from sqlalchemy import text as sql_text
|
||||
from app import db
|
||||
db.session.commit()
|
||||
|
||||
print(f"Reset job status to PENDING")
|
||||
target_language = 'ja'
|
||||
translation_map = {}
|
||||
missing_count = 0
|
||||
|
||||
# Create translation service and test
|
||||
service = TranslationService()
|
||||
for segment in segments:
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': segment, 'lang': target_language})
|
||||
|
||||
row = result.fetchone()
|
||||
if row:
|
||||
translation_map[segment] = row[0]
|
||||
if segment == a1_content:
|
||||
print(f"✅ '{segment}' -> '{row[0]}'")
|
||||
else:
|
||||
missing_count += 1
|
||||
if segment == a1_content:
|
||||
print(f"❌ '{segment}' -> 無翻譯記錄")
|
||||
|
||||
print(f"翻譯快取命中: {len(translation_map)}/{len(segments)} = {len(translation_map)/len(segments)*100:.1f}%")
|
||||
print(f"缺失翻譯: {missing_count} 個")
|
||||
|
||||
# 3. 手動生成翻譯文件
|
||||
print(f"\n3. 手動生成翻譯文件")
|
||||
print("-" * 60)
|
||||
|
||||
try:
|
||||
print("Starting translation...")
|
||||
result = service.translate_document(job.job_uuid)
|
||||
|
||||
print(f"Translation completed!")
|
||||
print(f"Result: {result}")
|
||||
|
||||
# Check the job status
|
||||
db.session.refresh(job)
|
||||
print(f"Final job status: {job.status}")
|
||||
print(f"Progress: {job.progress}%")
|
||||
print(f"Total tokens: {job.total_tokens}")
|
||||
print(f"Total cost: ${job.total_cost}")
|
||||
|
||||
if job.error_message:
|
||||
print(f"Error message: {job.error_message}")
|
||||
|
||||
# Check translated files
|
||||
translated_files = job.get_translated_files()
|
||||
print(f"Generated {len(translated_files)} translated files:")
|
||||
for tf in translated_files:
|
||||
print(f" - {tf.filename} ({tf.language_code}) - Size: {tf.file_size} bytes")
|
||||
|
||||
# Check if file exists and has content
|
||||
from pathlib import Path
|
||||
if Path(tf.file_path).exists():
|
||||
size = Path(tf.file_path).stat().st_size
|
||||
print(f" File exists with {size} bytes")
|
||||
|
||||
# Quick check if it contains translations (different from original)
|
||||
if size != job.get_original_file().file_size:
|
||||
print(f" ✅ File size differs from original - likely contains translations")
|
||||
else:
|
||||
print(f" ⚠️ File size same as original - may not contain translations")
|
||||
else:
|
||||
print(f" ❌ File not found at: {tf.file_path}")
|
||||
# 在app context內使用ExcelParser的generate_translated_document方法
|
||||
translated_file_path = parser.generate_translated_document(
|
||||
translations={}, # 空字典,會使用快取查詢
|
||||
target_language='ja',
|
||||
output_dir=test_dir
|
||||
)
|
||||
|
||||
# 重新命名為我們的測試檔名
|
||||
import shutil
|
||||
if Path(translated_file_path).exists():
|
||||
shutil.move(translated_file_path, str(new_translated_file))
|
||||
print(f"✅ 翻譯文件已生成: {new_translated_file.name}")
|
||||
else:
|
||||
print(f"❌ 翻譯文件生成失敗")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"Translation failed with error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
print(f"❌ 生成翻譯文件時出錯: {str(e)}")
|
||||
return
|
||||
|
||||
# 4. 驗證翻譯結果
|
||||
print(f"\n4. 驗證翻譯結果")
|
||||
print("-" * 60)
|
||||
|
||||
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
|
||||
wb_trans = openpyxl.load_workbook(str(new_translated_file), data_only=False)
|
||||
|
||||
# 檢查A1儲存格
|
||||
a1_orig = wb_orig.active['A1'].value
|
||||
a1_trans = wb_trans.active['A1'].value
|
||||
|
||||
print(f"A1儲存格檢查:")
|
||||
print(f" 原始: {repr(a1_orig)}")
|
||||
print(f" 翻譯: {repr(a1_trans)}")
|
||||
|
||||
if isinstance(a1_trans, str) and '\n' in a1_trans:
|
||||
lines = a1_trans.split('\n')
|
||||
if len(lines) >= 2 and lines[0].strip() == a1_content:
|
||||
print(f" ✅ A1翻譯成功!")
|
||||
print(f" 原文: '{lines[0]}'")
|
||||
print(f" 譯文: '{lines[1]}'")
|
||||
success = True
|
||||
else:
|
||||
print(f" ⚠️ A1格式異常")
|
||||
success = False
|
||||
else:
|
||||
print(f" ❌ A1未翻譯")
|
||||
success = False
|
||||
|
||||
# 檢查其他重要儲存格
|
||||
test_cells = ['C1', 'D1', 'B2', 'C2']
|
||||
translated_count = 0
|
||||
|
||||
for cell_name in test_cells:
|
||||
orig_val = wb_orig.active[cell_name].value
|
||||
trans_val = wb_trans.active[cell_name].value
|
||||
|
||||
if orig_val and isinstance(trans_val, str) and '\n' in trans_val:
|
||||
translated_count += 1
|
||||
|
||||
print(f"\n其他儲存格翻譯狀況: {translated_count}/{len(test_cells)} 個成功翻譯")
|
||||
|
||||
wb_orig.close()
|
||||
wb_trans.close()
|
||||
|
||||
# 5. 最終結果
|
||||
print(f"\n" + "=" * 80)
|
||||
if success:
|
||||
print("🎉 測試成功!A1儲存格翻譯問題已修復!")
|
||||
print(f" 新翻譯文件: {new_translated_file}")
|
||||
print(" - ✅ 文字提取修正生效")
|
||||
print(" - ✅ 翻譯快取記錄已補充")
|
||||
print(" - ✅ A1儲存格翻譯正常")
|
||||
else:
|
||||
print("❌ 測試失敗!需要進一步排查問題。")
|
||||
print("=" * 80)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_fixed_translation_service()
|
||||
test_fixed_translation()
|
Reference in New Issue
Block a user