Files
Document_Translator/check_translation_cache.py
2025-09-03 09:05:51 +08:00

116 lines
4.1 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
檢查翻譯快取資料
"""
import sys
import os
# Fix encoding for Windows console
if sys.stdout.encoding != 'utf-8':
sys.stdout.reconfigure(encoding='utf-8')
if sys.stderr.encoding != 'utf-8':
sys.stderr.reconfigure(encoding='utf-8')
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
from app import create_app, db
from sqlalchemy import text
def check_translation_cache():
"""檢查翻譯快取資料"""
app = create_app()
with app.app_context():
print("=== 檢查翻譯快取資料 ===")
# 總記錄數
total_result = db.session.execute(text("SELECT COUNT(*) FROM dt_translation_cache"))
total_count = total_result.scalar()
print(f"翻譯快取總記錄數: {total_count:,}")
# 按語言分組統計
lang_result = db.session.execute(text("""
SELECT target_language, COUNT(*)
FROM dt_translation_cache
GROUP BY target_language
ORDER BY COUNT(*) DESC
"""))
print(f"\n按語言分組:")
for row in lang_result.fetchall():
print(f" {row[0]}: {row[1]:,}")
# 最近的翻譯記錄
recent_result = db.session.execute(text("""
SELECT source_text, target_language, translated_text, created_at
FROM dt_translation_cache
ORDER BY created_at DESC
LIMIT 10
"""))
print(f"\n最近的10條翻譯記錄:")
for row in recent_result.fetchall():
source = row[0][:50] + "..." if len(row[0]) > 50 else row[0]
target = row[2][:50] + "..." if len(row[2]) > 50 else row[2]
print(f" [{row[1]}] {source} -> {target} ({row[3]})")
# 搜尋包含DOCX任務相關的翻譯
print(f"\n=== 搜尋DOCX任務相關翻譯 ===")
# 搜尋常見的中文詞彙
keywords = ["目的", "适用范围", "定义", "烤箱设备", "维护保养"]
for keyword in keywords:
search_result = db.session.execute(text("""
SELECT source_text, target_language, translated_text
FROM dt_translation_cache
WHERE source_text LIKE :keyword
ORDER BY created_at DESC
LIMIT 3
"""), {'keyword': f'%{keyword}%'})
results = search_result.fetchall()
if results:
print(f"\n包含'{keyword}'的翻譯:")
for row in results:
source = row[0][:60] + "..." if len(row[0]) > 60 else row[0]
target = row[2][:60] + "..." if len(row[2]) > 60 else row[2]
print(f" [{row[1]}] {source}")
print(f" -> {target}")
# 檢查英文翻譯品質
print(f"\n=== 檢查翻譯品質 ===")
en_sample_result = db.session.execute(text("""
SELECT source_text, translated_text
FROM dt_translation_cache
WHERE target_language = 'en'
AND CHAR_LENGTH(source_text) > 10
ORDER BY created_at DESC
LIMIT 5
"""))
print(f"英文翻譯範例:")
for row in en_sample_result.fetchall():
print(f" 原文: {row[0]}")
print(f" 譯文: {row[1]}")
# 檢查翻譯是否正確
has_chinese = any('\u4e00' <= c <= '\u9fff' for c in row[1])
has_english = any(ord(c) < 128 and c.isalpha() for c in row[1])
if has_chinese and not has_english:
print(f" ❌ 翻譯失敗 - 譯文仍是中文")
elif has_english and not has_chinese:
print(f" ✅ 翻譯成功 - 譯文是英文")
elif has_chinese and has_english:
print(f" ⚠️ 混合語言 - 可能是交錯格式")
else:
print(f" ❓ 未知狀態")
print()
if __name__ == "__main__":
check_translation_cache()