128 lines
4.9 KiB
Python
128 lines
4.9 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
調試翻譯成功率問題 - 為什麼整段落快取沒有儲存
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# 設定編碼
|
||
sys.stdout.reconfigure(encoding='utf-8')
|
||
|
||
from app import create_app
|
||
|
||
def debug_translation_success():
|
||
"""調試翻譯成功率問題"""
|
||
|
||
print("=" * 80)
|
||
print("調試翻譯成功率問題 - 為什麼整段落快取沒有儲存")
|
||
print("=" * 80)
|
||
|
||
app = create_app()
|
||
|
||
with app.app_context():
|
||
from sqlalchemy import text as sql_text
|
||
from app import db
|
||
|
||
# 測試有問題的多行文字
|
||
test_texts = [
|
||
"與 WB inline 串線(DB→WB)、時效快;支援 Sn/Au 晶片\n最小可支援9mil晶粒\n支援EAP管控",
|
||
"空洞表現穩定、尺寸/厚度範圍廣\n最小可支援9mil晶粒\n支援EAP管控"
|
||
]
|
||
|
||
target_language = 'ja'
|
||
|
||
print(f"檢查多行文字的句子級快取狀況...")
|
||
print("-" * 60)
|
||
|
||
for i, text in enumerate(test_texts, 1):
|
||
print(f"\n測試文字 {i}: {repr(text[:50])}...")
|
||
|
||
lines = text.split('\n')
|
||
print(f" 分解為 {len(lines)} 行:")
|
||
|
||
all_lines_cached = True
|
||
|
||
for j, line in enumerate(lines, 1):
|
||
line = line.strip()
|
||
if not line:
|
||
continue
|
||
|
||
print(f"\n 行 {j}: {repr(line)}")
|
||
|
||
# 檢查這行是否有快取
|
||
result = db.session.execute(sql_text("""
|
||
SELECT translated_text, created_at
|
||
FROM dt_translation_cache
|
||
WHERE source_text = :text AND target_language = :lang
|
||
ORDER BY created_at DESC
|
||
LIMIT 1
|
||
"""), {'text': line, 'lang': target_language})
|
||
|
||
row = result.fetchone()
|
||
if row:
|
||
print(f" ✅ 句子快取存在: '{row[0][:30]}...' ({row[1]})")
|
||
else:
|
||
print(f" ❌ 句子快取不存在")
|
||
all_lines_cached = False
|
||
|
||
# 進一步檢查:分句處理
|
||
from app.services.document_processor import DocumentProcessor
|
||
processor = DocumentProcessor()
|
||
|
||
sentences = processor.split_text_into_sentences(line, 'zh')
|
||
if len(sentences) > 1:
|
||
print(f" 📝 分句結果: {len(sentences)} 個句子")
|
||
|
||
for k, sentence in enumerate(sentences, 1):
|
||
sentence = sentence.strip()
|
||
if not sentence:
|
||
continue
|
||
|
||
sentence_result = db.session.execute(sql_text("""
|
||
SELECT translated_text
|
||
FROM dt_translation_cache
|
||
WHERE source_text = :text AND target_language = :lang
|
||
ORDER BY created_at DESC
|
||
LIMIT 1
|
||
"""), {'text': sentence, 'lang': target_language})
|
||
|
||
sentence_row = sentence_result.fetchone()
|
||
if sentence_row:
|
||
print(f" ✅ 句子{k}: '{sentence[:20]}...' -> 有快取")
|
||
else:
|
||
print(f" ❌ 句子{k}: '{sentence[:20]}...' -> 無快取")
|
||
all_lines_cached = False
|
||
|
||
print(f"\n 整體快取狀況: {'✅ 完整' if all_lines_cached else '❌ 不完整'}")
|
||
|
||
# 檢查整段落快取
|
||
whole_result = db.session.execute(sql_text("""
|
||
SELECT translated_text, created_at
|
||
FROM dt_translation_cache
|
||
WHERE source_text = :text AND target_language = :lang
|
||
ORDER BY created_at DESC
|
||
LIMIT 1
|
||
"""), {'text': text, 'lang': target_language})
|
||
|
||
whole_row = whole_result.fetchone()
|
||
if whole_row:
|
||
print(f" ✅ 整段落快取存在: 時間 {whole_row[1]}")
|
||
else:
|
||
print(f" ❌ 整段落快取不存在")
|
||
|
||
# 可能的原因分析
|
||
if not all_lines_cached:
|
||
print(f" 原因: 某些句子翻譯失敗,all_successful=False")
|
||
else:
|
||
print(f" 原因: 可能是其他錯誤或邏輯問題")
|
||
|
||
print(f"\n" + "=" * 80)
|
||
print("翻譯成功率調試完成!")
|
||
print("建議: 檢查 translate_segment_with_sentences 中的錯誤處理邏輯")
|
||
print("=" * 80)
|
||
|
||
if __name__ == "__main__":
|
||
debug_translation_success() |