5th_fix excel problem

This commit is contained in:
beabigegg
2025-09-03 15:07:34 +08:00
parent cce3fd4925
commit 5fd0671b4f
28 changed files with 4484 additions and 97 deletions

View File

@@ -1,96 +1,176 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Test the fixed translation service
測試修正後的翻譯功能 - 重新生成翻譯文件
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# Fix encoding for Windows console
if sys.stdout.encoding != 'utf-8':
sys.stdout.reconfigure(encoding='utf-8')
if sys.stderr.encoding != 'utf-8':
sys.stderr.reconfigure(encoding='utf-8')
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
from pathlib import Path
from app.services.translation_service import ExcelParser
import openpyxl
from app import create_app
from app.services.translation_service import TranslationService
from app.models.job import TranslationJob
def test_fixed_translation_service():
"""Test the fixed translation service on a real job"""
def test_fixed_translation():
"""測試修正後的翻譯功能"""
print("=" * 80)
print("測試修正後的Excel翻譯功能")
print("=" * 80)
# 使用現有的測試文件
test_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\185bb457-b703-4e98-94a2-fde072b895c4")
original_file = test_dir / "original_panjit_185bb457.xlsx"
if not original_file.exists():
print(f"原始文件不存在: {original_file}")
return
# 創建一個新的翻譯文件名稱
new_translated_file = test_dir / "original_panjit_185bb457_ja_translated_fixed.xlsx"
print(f"✅ 使用原始文件: {original_file.name}")
print(f"✅ 生成新翻譯文件: {new_translated_file.name}")
# 1. 驗證提取功能
print(f"\n1. 驗證文字提取功能")
print("-" * 60)
parser = ExcelParser(str(original_file))
segments = parser.extract_text_segments()
print(f"提取到 {len(segments)} 個文字片段")
# 檢查A1是否在其中
a1_content = "製程"
if a1_content in segments:
print(f"✅ A1內容 '{a1_content}' 已被提取")
print(f" 位置: 第{segments.index(a1_content)+1}")
else:
print(f"❌ A1內容 '{a1_content}' 仍未被提取")
return
# 2. 驗證翻譯快取
print(f"\n2. 驗證翻譯快取狀況")
print("-" * 60)
from app import create_app
app = create_app()
with app.app_context():
# Get the most recent job to test with
job = TranslationJob.query.order_by(TranslationJob.created_at.desc()).first()
if not job:
print("No jobs found to test")
return
print(f"Testing translation service on job: {job.job_uuid}")
print(f"Original filename: {job.original_filename}")
print(f"Target languages: {job.target_languages}")
print(f"File path: {job.file_path}")
# Reset job status to PENDING for testing
job.status = 'PENDING'
job.progress = 0.0
job.error_message = None
from sqlalchemy import text as sql_text
from app import db
db.session.commit()
print(f"Reset job status to PENDING")
target_language = 'ja'
translation_map = {}
missing_count = 0
# Create translation service and test
service = TranslationService()
for segment in segments:
result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': segment, 'lang': target_language})
row = result.fetchone()
if row:
translation_map[segment] = row[0]
if segment == a1_content:
print(f"'{segment}' -> '{row[0]}'")
else:
missing_count += 1
if segment == a1_content:
print(f"'{segment}' -> 無翻譯記錄")
print(f"翻譯快取命中: {len(translation_map)}/{len(segments)} = {len(translation_map)/len(segments)*100:.1f}%")
print(f"缺失翻譯: {missing_count}")
# 3. 手動生成翻譯文件
print(f"\n3. 手動生成翻譯文件")
print("-" * 60)
try:
print("Starting translation...")
result = service.translate_document(job.job_uuid)
print(f"Translation completed!")
print(f"Result: {result}")
# Check the job status
db.session.refresh(job)
print(f"Final job status: {job.status}")
print(f"Progress: {job.progress}%")
print(f"Total tokens: {job.total_tokens}")
print(f"Total cost: ${job.total_cost}")
if job.error_message:
print(f"Error message: {job.error_message}")
# Check translated files
translated_files = job.get_translated_files()
print(f"Generated {len(translated_files)} translated files:")
for tf in translated_files:
print(f" - {tf.filename} ({tf.language_code}) - Size: {tf.file_size} bytes")
# Check if file exists and has content
from pathlib import Path
if Path(tf.file_path).exists():
size = Path(tf.file_path).stat().st_size
print(f" File exists with {size} bytes")
# Quick check if it contains translations (different from original)
if size != job.get_original_file().file_size:
print(f" ✅ File size differs from original - likely contains translations")
else:
print(f" ⚠️ File size same as original - may not contain translations")
else:
print(f" ❌ File not found at: {tf.file_path}")
# 在app context內使用ExcelParser的generate_translated_document方法
translated_file_path = parser.generate_translated_document(
translations={}, # 空字典,會使用快取查詢
target_language='ja',
output_dir=test_dir
)
# 重新命名為我們的測試檔名
import shutil
if Path(translated_file_path).exists():
shutil.move(translated_file_path, str(new_translated_file))
print(f"✅ 翻譯文件已生成: {new_translated_file.name}")
else:
print(f"❌ 翻譯文件生成失敗")
return
except Exception as e:
print(f"Translation failed with error: {e}")
import traceback
traceback.print_exc()
print(f"❌ 生成翻譯文件時出錯: {str(e)}")
return
# 4. 驗證翻譯結果
print(f"\n4. 驗證翻譯結果")
print("-" * 60)
wb_orig = openpyxl.load_workbook(str(original_file), data_only=False)
wb_trans = openpyxl.load_workbook(str(new_translated_file), data_only=False)
# 檢查A1儲存格
a1_orig = wb_orig.active['A1'].value
a1_trans = wb_trans.active['A1'].value
print(f"A1儲存格檢查:")
print(f" 原始: {repr(a1_orig)}")
print(f" 翻譯: {repr(a1_trans)}")
if isinstance(a1_trans, str) and '\n' in a1_trans:
lines = a1_trans.split('\n')
if len(lines) >= 2 and lines[0].strip() == a1_content:
print(f" ✅ A1翻譯成功")
print(f" 原文: '{lines[0]}'")
print(f" 譯文: '{lines[1]}'")
success = True
else:
print(f" ⚠️ A1格式異常")
success = False
else:
print(f" ❌ A1未翻譯")
success = False
# 檢查其他重要儲存格
test_cells = ['C1', 'D1', 'B2', 'C2']
translated_count = 0
for cell_name in test_cells:
orig_val = wb_orig.active[cell_name].value
trans_val = wb_trans.active[cell_name].value
if orig_val and isinstance(trans_val, str) and '\n' in trans_val:
translated_count += 1
print(f"\n其他儲存格翻譯狀況: {translated_count}/{len(test_cells)} 個成功翻譯")
wb_orig.close()
wb_trans.close()
# 5. 最終結果
print(f"\n" + "=" * 80)
if success:
print("🎉 測試成功A1儲存格翻譯問題已修復")
print(f" 新翻譯文件: {new_translated_file}")
print(" - ✅ 文字提取修正生效")
print(" - ✅ 翻譯快取記錄已補充")
print(" - ✅ A1儲存格翻譯正常")
else:
print("❌ 測試失敗!需要進一步排查問題。")
print("=" * 80)
if __name__ == "__main__":
test_fixed_translation_service()
test_fixed_translation()