116 lines
4.1 KiB
Python
116 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
檢查翻譯快取資料
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
|
|
# Fix encoding for Windows console
|
|
if sys.stdout.encoding != 'utf-8':
|
|
sys.stdout.reconfigure(encoding='utf-8')
|
|
if sys.stderr.encoding != 'utf-8':
|
|
sys.stderr.reconfigure(encoding='utf-8')
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
|
|
|
|
from app import create_app, db
|
|
from sqlalchemy import text
|
|
|
|
def check_translation_cache():
|
|
"""檢查翻譯快取資料"""
|
|
|
|
app = create_app()
|
|
|
|
with app.app_context():
|
|
print("=== 檢查翻譯快取資料 ===")
|
|
|
|
# 總記錄數
|
|
total_result = db.session.execute(text("SELECT COUNT(*) FROM dt_translation_cache"))
|
|
total_count = total_result.scalar()
|
|
print(f"翻譯快取總記錄數: {total_count:,}")
|
|
|
|
# 按語言分組統計
|
|
lang_result = db.session.execute(text("""
|
|
SELECT target_language, COUNT(*)
|
|
FROM dt_translation_cache
|
|
GROUP BY target_language
|
|
ORDER BY COUNT(*) DESC
|
|
"""))
|
|
|
|
print(f"\n按語言分組:")
|
|
for row in lang_result.fetchall():
|
|
print(f" {row[0]}: {row[1]:,} 條")
|
|
|
|
# 最近的翻譯記錄
|
|
recent_result = db.session.execute(text("""
|
|
SELECT source_text, target_language, translated_text, created_at
|
|
FROM dt_translation_cache
|
|
ORDER BY created_at DESC
|
|
LIMIT 10
|
|
"""))
|
|
|
|
print(f"\n最近的10條翻譯記錄:")
|
|
for row in recent_result.fetchall():
|
|
source = row[0][:50] + "..." if len(row[0]) > 50 else row[0]
|
|
target = row[2][:50] + "..." if len(row[2]) > 50 else row[2]
|
|
print(f" [{row[1]}] {source} -> {target} ({row[3]})")
|
|
|
|
# 搜尋包含DOCX任務相關的翻譯
|
|
print(f"\n=== 搜尋DOCX任務相關翻譯 ===")
|
|
|
|
# 搜尋常見的中文詞彙
|
|
keywords = ["目的", "适用范围", "定义", "烤箱设备", "维护保养"]
|
|
|
|
for keyword in keywords:
|
|
search_result = db.session.execute(text("""
|
|
SELECT source_text, target_language, translated_text
|
|
FROM dt_translation_cache
|
|
WHERE source_text LIKE :keyword
|
|
ORDER BY created_at DESC
|
|
LIMIT 3
|
|
"""), {'keyword': f'%{keyword}%'})
|
|
|
|
results = search_result.fetchall()
|
|
if results:
|
|
print(f"\n包含'{keyword}'的翻譯:")
|
|
for row in results:
|
|
source = row[0][:60] + "..." if len(row[0]) > 60 else row[0]
|
|
target = row[2][:60] + "..." if len(row[2]) > 60 else row[2]
|
|
print(f" [{row[1]}] {source}")
|
|
print(f" -> {target}")
|
|
|
|
# 檢查英文翻譯品質
|
|
print(f"\n=== 檢查翻譯品質 ===")
|
|
|
|
en_sample_result = db.session.execute(text("""
|
|
SELECT source_text, translated_text
|
|
FROM dt_translation_cache
|
|
WHERE target_language = 'en'
|
|
AND CHAR_LENGTH(source_text) > 10
|
|
ORDER BY created_at DESC
|
|
LIMIT 5
|
|
"""))
|
|
|
|
print(f"英文翻譯範例:")
|
|
for row in en_sample_result.fetchall():
|
|
print(f" 原文: {row[0]}")
|
|
print(f" 譯文: {row[1]}")
|
|
|
|
# 檢查翻譯是否正確
|
|
has_chinese = any('\u4e00' <= c <= '\u9fff' for c in row[1])
|
|
has_english = any(ord(c) < 128 and c.isalpha() for c in row[1])
|
|
|
|
if has_chinese and not has_english:
|
|
print(f" ❌ 翻譯失敗 - 譯文仍是中文")
|
|
elif has_english and not has_chinese:
|
|
print(f" ✅ 翻譯成功 - 譯文是英文")
|
|
elif has_chinese and has_english:
|
|
print(f" ⚠️ 混合語言 - 可能是交錯格式")
|
|
else:
|
|
print(f" ❓ 未知狀態")
|
|
print()
|
|
|
|
if __name__ == "__main__":
|
|
check_translation_cache() |