Files
Document_Translator/debug_translation_success.py
2025-09-03 15:07:34 +08:00

128 lines
4.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
調試翻譯成功率問題 - 為什麼整段落快取沒有儲存
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
from app import create_app
def debug_translation_success():
"""調試翻譯成功率問題"""
print("=" * 80)
print("調試翻譯成功率問題 - 為什麼整段落快取沒有儲存")
print("=" * 80)
app = create_app()
with app.app_context():
from sqlalchemy import text as sql_text
from app import db
# 測試有問題的多行文字
test_texts = [
"與 WB inline 串線DB→WB、時效快支援 Sn/Au 晶片\n最小可支援9mil晶粒\n支援EAP管控",
"空洞表現穩定、尺寸/厚度範圍廣\n最小可支援9mil晶粒\n支援EAP管控"
]
target_language = 'ja'
print(f"檢查多行文字的句子級快取狀況...")
print("-" * 60)
for i, text in enumerate(test_texts, 1):
print(f"\n測試文字 {i}: {repr(text[:50])}...")
lines = text.split('\n')
print(f" 分解為 {len(lines)} 行:")
all_lines_cached = True
for j, line in enumerate(lines, 1):
line = line.strip()
if not line:
continue
print(f"\n{j}: {repr(line)}")
# 檢查這行是否有快取
result = db.session.execute(sql_text("""
SELECT translated_text, created_at
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': line, 'lang': target_language})
row = result.fetchone()
if row:
print(f" ✅ 句子快取存在: '{row[0][:30]}...' ({row[1]})")
else:
print(f" ❌ 句子快取不存在")
all_lines_cached = False
# 進一步檢查:分句處理
from app.services.document_processor import DocumentProcessor
processor = DocumentProcessor()
sentences = processor.split_text_into_sentences(line, 'zh')
if len(sentences) > 1:
print(f" 📝 分句結果: {len(sentences)} 個句子")
for k, sentence in enumerate(sentences, 1):
sentence = sentence.strip()
if not sentence:
continue
sentence_result = db.session.execute(sql_text("""
SELECT translated_text
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': sentence, 'lang': target_language})
sentence_row = sentence_result.fetchone()
if sentence_row:
print(f" ✅ 句子{k}: '{sentence[:20]}...' -> 有快取")
else:
print(f" ❌ 句子{k}: '{sentence[:20]}...' -> 無快取")
all_lines_cached = False
print(f"\n 整體快取狀況: {'✅ 完整' if all_lines_cached else '❌ 不完整'}")
# 檢查整段落快取
whole_result = db.session.execute(sql_text("""
SELECT translated_text, created_at
FROM dt_translation_cache
WHERE source_text = :text AND target_language = :lang
ORDER BY created_at DESC
LIMIT 1
"""), {'text': text, 'lang': target_language})
whole_row = whole_result.fetchone()
if whole_row:
print(f" ✅ 整段落快取存在: 時間 {whole_row[1]}")
else:
print(f" ❌ 整段落快取不存在")
# 可能的原因分析
if not all_lines_cached:
print(f" 原因: 某些句子翻譯失敗all_successful=False")
else:
print(f" 原因: 可能是其他錯誤或邏輯問題")
print(f"\n" + "=" * 80)
print("翻譯成功率調試完成!")
print("建議: 檢查 translate_segment_with_sentences 中的錯誤處理邏輯")
print("=" * 80)
if __name__ == "__main__":
debug_translation_success()