4th_fix time error
This commit is contained in:
220
test_xlsx_translation_format.py
Normal file
220
test_xlsx_translation_format.py
Normal file
@@ -0,0 +1,220 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
驗證XLSX翻譯格式 - 檢查翻譯文件內容
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
# Fix encoding for Windows console
|
||||
if sys.stdout.encoding != 'utf-8':
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
if sys.stderr.encoding != 'utf-8':
|
||||
sys.stderr.reconfigure(encoding='utf-8')
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
|
||||
|
||||
from app import create_app, db
|
||||
from app.services.translation_service import ExcelParser
|
||||
from sqlalchemy import text as sql_text
|
||||
|
||||
def test_xlsx_translation_format():
|
||||
"""驗證XLSX翻譯格式"""
|
||||
|
||||
app = create_app()
|
||||
|
||||
with app.app_context():
|
||||
print("=== 驗證XLSX翻譯格式 ===")
|
||||
|
||||
# 尋找現有的XLSX文件進行測試
|
||||
uploads_dir = Path("uploads")
|
||||
xlsx_files = []
|
||||
|
||||
if uploads_dir.exists():
|
||||
for job_dir in uploads_dir.iterdir():
|
||||
if job_dir.is_dir():
|
||||
for file_path in job_dir.iterdir():
|
||||
if file_path.suffix.lower() in ['.xlsx', '.xls']:
|
||||
xlsx_files.append(file_path)
|
||||
|
||||
if not xlsx_files:
|
||||
print("❌ 沒有找到XLSX測試文件")
|
||||
return
|
||||
|
||||
# 使用第一個找到的XLSX文件
|
||||
test_file = xlsx_files[0]
|
||||
print(f"✅ 使用測試文件: {test_file}")
|
||||
|
||||
# 創建測試環境
|
||||
test_dir = Path(tempfile.gettempdir()) / "xlsx_format_test"
|
||||
test_dir.mkdir(exist_ok=True)
|
||||
|
||||
try:
|
||||
# 創建ExcelParser
|
||||
parser = ExcelParser(str(test_file))
|
||||
|
||||
# 提取文字片段
|
||||
text_segments = parser.extract_text_segments()
|
||||
print(f"\n📄 文件分析:")
|
||||
print(f"提取的文字段落數: {len(text_segments)}")
|
||||
|
||||
# 檢查翻譯覆蓋率
|
||||
languages = ['en', 'vi']
|
||||
for lang in languages:
|
||||
translated_count = 0
|
||||
total_count = 0
|
||||
|
||||
for text in text_segments:
|
||||
if text.strip() and len(text.strip()) > 2:
|
||||
total_count += 1
|
||||
result = db.session.execute(sql_text("""
|
||||
SELECT translated_text
|
||||
FROM dt_translation_cache
|
||||
WHERE source_text = :text AND target_language = :lang
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1
|
||||
"""), {'text': text, 'lang': lang})
|
||||
|
||||
row = result.fetchone()
|
||||
if row and row[0]:
|
||||
translated_count += 1
|
||||
|
||||
coverage = (translated_count / total_count * 100) if total_count > 0 else 0
|
||||
print(f" {lang.upper()}翻譯覆蓋率: {coverage:.1f}% ({translated_count}/{total_count})")
|
||||
|
||||
# 生成英文翻譯
|
||||
print(f"\n🔄 生成英文翻譯XLSX文件...")
|
||||
try:
|
||||
en_output_path = parser.generate_translated_document(
|
||||
{}, # 使用空字典,從快取讀取
|
||||
'en',
|
||||
test_dir
|
||||
)
|
||||
print(f"✅ 英文翻譯文件生成: {en_output_path}")
|
||||
|
||||
# 檢查生成的文件內容
|
||||
try:
|
||||
import openpyxl
|
||||
output_file = Path(en_output_path)
|
||||
|
||||
if output_file.exists():
|
||||
print(f"檔案大小: {output_file.stat().st_size:,} bytes")
|
||||
|
||||
# 分析Excel內容
|
||||
wb = openpyxl.load_workbook(str(output_file))
|
||||
print(f"\n📊 Excel文件分析:")
|
||||
print(f"工作表數量: {len(wb.sheetnames)}")
|
||||
|
||||
for sheet_name in wb.sheetnames[:3]: # 檢查前3個工作表
|
||||
ws = wb[sheet_name]
|
||||
print(f"\n📄 工作表: {sheet_name}")
|
||||
print(f" 最大行數: {ws.max_row}")
|
||||
print(f" 最大列數: {ws.max_column}")
|
||||
|
||||
# 檢查前20行的內容
|
||||
chinese_cells = 0
|
||||
english_cells = 0
|
||||
mixed_cells = 0
|
||||
empty_cells = 0
|
||||
|
||||
sample_data = []
|
||||
|
||||
for row in range(1, min(21, ws.max_row + 1)):
|
||||
for col in range(1, min(6, ws.max_column + 1)): # 檢查前5列
|
||||
cell = ws.cell(row, col)
|
||||
if cell.value:
|
||||
cell_text = str(cell.value).strip()
|
||||
|
||||
if cell_text:
|
||||
# 語言檢測
|
||||
has_chinese = any('\u4e00' <= c <= '\u9fff' for c in cell_text)
|
||||
has_english = any(ord(c) < 128 and c.isalpha() and c not in 'PANJIT' for c in cell_text)
|
||||
|
||||
if has_chinese and has_english:
|
||||
mixed_cells += 1
|
||||
lang_status = "🔄 中英混合"
|
||||
elif has_english:
|
||||
english_cells += 1
|
||||
lang_status = "🇺🇸 純英文"
|
||||
elif has_chinese:
|
||||
chinese_cells += 1
|
||||
lang_status = "🇨🇳 純中文"
|
||||
else:
|
||||
lang_status = "❓ 其他"
|
||||
|
||||
# 收集前10個樣本
|
||||
if len(sample_data) < 10:
|
||||
sample_data.append({
|
||||
'position': f"{chr(64+col)}{row}",
|
||||
'status': lang_status,
|
||||
'content': cell_text[:50]
|
||||
})
|
||||
else:
|
||||
empty_cells += 1
|
||||
else:
|
||||
empty_cells += 1
|
||||
|
||||
print(f" 內容統計:")
|
||||
print(f" 純中文儲存格: {chinese_cells}")
|
||||
print(f" 純英文儲存格: {english_cells}")
|
||||
print(f" 中英混合儲存格: {mixed_cells}")
|
||||
print(f" 空儲存格: {empty_cells}")
|
||||
|
||||
if sample_data:
|
||||
print(f" 前10個內容樣本:")
|
||||
for sample in sample_data:
|
||||
print(f" {sample['position']}: {sample['status']} - {sample['content']}...")
|
||||
|
||||
# 判斷翻譯格式
|
||||
total_content_cells = chinese_cells + english_cells + mixed_cells
|
||||
if total_content_cells == 0:
|
||||
print(f"\n❌ 沒有發現任何內容,可能翻譯失敗")
|
||||
elif english_cells > chinese_cells * 0.5:
|
||||
print(f"\n✅ XLSX翻譯格式良好")
|
||||
print(f" - 英文內容比例: {english_cells / total_content_cells * 100:.1f}%")
|
||||
elif mixed_cells > chinese_cells * 0.3:
|
||||
print(f"\n⚠️ XLSX翻譯採用混合格式")
|
||||
print(f" - 混合內容比例: {mixed_cells / total_content_cells * 100:.1f}%")
|
||||
else:
|
||||
print(f"\n🔍 XLSX翻譯可能使用原始格式(主要為中文)")
|
||||
print(f" - 中文內容比例: {chinese_cells / total_content_cells * 100:.1f}%")
|
||||
|
||||
wb.close()
|
||||
|
||||
else:
|
||||
print(f"❌ 生成的檔案不存在")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 分析Excel檔案失敗: {e}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 生成英文翻譯失敗: {e}")
|
||||
|
||||
# 簡單測試越南文翻譯
|
||||
print(f"\n🔄 生成越南文翻譯XLSX文件...")
|
||||
try:
|
||||
vi_output_path = parser.generate_translated_document(
|
||||
{},
|
||||
'vi',
|
||||
test_dir
|
||||
)
|
||||
print(f"✅ 越南文翻譯文件生成: {vi_output_path}")
|
||||
|
||||
# 快速檢查文件是否有內容
|
||||
vi_file = Path(vi_output_path)
|
||||
if vi_file.exists():
|
||||
print(f" 檔案大小: {vi_file.stat().st_size:,} bytes")
|
||||
else:
|
||||
print(f" ❌ 越南文文件不存在")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 生成越南文翻譯失敗: {e}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ XLSX格式驗證失敗: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_xlsx_translation_format()
|
Reference in New Issue
Block a user