263 lines
9.8 KiB
Python
263 lines
9.8 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
深度診斷"超温"翻譯問題
|
|
檢查從提取到插入的完整流程
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# 設定編碼
|
|
sys.stdout.reconfigure(encoding='utf-8')
|
|
|
|
from pathlib import Path
|
|
import docx
|
|
from docx.table import Table
|
|
from app import create_app
|
|
from app.services.document_processor import should_translate
|
|
from app.services.translation_service import TranslationService
|
|
|
|
def debug_chaoweng_extraction():
|
|
"""檢查"超温"在文件提取階段是否被正確識別"""
|
|
|
|
print("=" * 80)
|
|
print("診斷步驟1: 檢查文件提取階段")
|
|
print("=" * 80)
|
|
|
|
app = create_app()
|
|
|
|
with app.app_context():
|
|
from app.services.document_processor import DocumentProcessor
|
|
|
|
# 檔案路徑
|
|
base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
|
|
original_file = base_dir / "original_-OR026_17e05695.docx"
|
|
|
|
if not original_file.exists():
|
|
print(f"❌ 原始檔案不存在")
|
|
return
|
|
|
|
processor = DocumentProcessor()
|
|
|
|
# 提取所有segment
|
|
segments = processor.extract_docx_segments(str(original_file))
|
|
|
|
print(f"提取到 {len(segments)} 個segments")
|
|
|
|
# 找包含"超温"的segments
|
|
chaoweng_segments = []
|
|
for i, seg in enumerate(segments):
|
|
if "超温" in seg.text:
|
|
chaoweng_segments.append((i, seg))
|
|
print(f"\nSegment {i}:")
|
|
print(f" 種類: {seg.kind}")
|
|
print(f" 上下文: {seg.ctx}")
|
|
print(f" 內容: {repr(seg.text)}")
|
|
print(f" 長度: {len(seg.text.strip())}")
|
|
|
|
# 檢查是否應該翻譯
|
|
should_trans = should_translate(seg.text, 'zh')
|
|
print(f" should_translate: {should_trans}")
|
|
|
|
if seg.kind == "table_cell":
|
|
print(f" 🎯 這是表格儲存格segment")
|
|
else:
|
|
print(f" ⚠️ 不是表格儲存格類型")
|
|
|
|
if not chaoweng_segments:
|
|
print("❌ 沒有找到包含'超温'的segments")
|
|
else:
|
|
print(f"✅ 找到 {len(chaoweng_segments)} 個包含'超温'的segments")
|
|
|
|
return chaoweng_segments
|
|
|
|
def debug_chaoweng_translation(chaoweng_segments):
|
|
"""檢查"超温"在翻譯階段是否被正確處理"""
|
|
|
|
print(f"\n" + "=" * 80)
|
|
print("診斷步驟2: 檢查翻譯階段")
|
|
print("=" * 80)
|
|
|
|
if not chaoweng_segments:
|
|
print("❌ 沒有segments可以測試翻譯")
|
|
return
|
|
|
|
app = create_app()
|
|
|
|
with app.app_context():
|
|
service = TranslationService()
|
|
|
|
for seg_idx, seg in chaoweng_segments:
|
|
print(f"\n測試 Segment {seg_idx} 的翻譯:")
|
|
print(f"原文: {repr(seg.text)}")
|
|
|
|
try:
|
|
if seg.kind == "table_cell":
|
|
print("使用 translate_word_table_cell() 方法")
|
|
translated = service.translate_word_table_cell(
|
|
text=seg.text,
|
|
source_language="zh",
|
|
target_language="en",
|
|
user_id=None
|
|
)
|
|
else:
|
|
print("使用 translate_segment_with_sentences() 方法")
|
|
translated = service.translate_segment_with_sentences(
|
|
text=seg.text,
|
|
source_language="zh",
|
|
target_language="en",
|
|
user_id=None
|
|
)
|
|
|
|
print(f"翻譯結果: {repr(translated[:100])}...")
|
|
|
|
# 檢查翻譯是否成功
|
|
if "【翻譯失敗" in translated:
|
|
print("❌ 翻譯失敗")
|
|
elif translated == seg.text:
|
|
print("❌ 翻譯結果與原文相同,可能未翻譯")
|
|
else:
|
|
print("✅ 翻譯成功")
|
|
|
|
except Exception as e:
|
|
print(f"❌ 翻譯過程發生錯誤: {e}")
|
|
|
|
def debug_chaoweng_cache():
|
|
"""檢查"超温"的翻譯快取狀況"""
|
|
|
|
print(f"\n" + "=" * 80)
|
|
print("診斷步驟3: 檢查翻譯快取")
|
|
print("=" * 80)
|
|
|
|
app = create_app()
|
|
|
|
with app.app_context():
|
|
from sqlalchemy import text as sql_text
|
|
from app import db
|
|
|
|
# 1. 搜尋精確匹配"超温"
|
|
print("1. 搜尋精確的'超温'記錄:")
|
|
exact_results = db.session.execute(sql_text("""
|
|
SELECT id, source_text, target_language, translated_text, created_at
|
|
FROM dt_translation_cache
|
|
WHERE source_text = '超温'
|
|
ORDER BY created_at DESC
|
|
""")).fetchall()
|
|
|
|
if exact_results:
|
|
for row in exact_results:
|
|
print(f" ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3]}'")
|
|
else:
|
|
print(" ❌ 沒有找到精確的'超温'記錄")
|
|
|
|
# 2. 搜尋包含"超温"但可能有額外字符的記錄
|
|
print(f"\n2. 搜尋包含'超温'的記錄:")
|
|
like_results = db.session.execute(sql_text("""
|
|
SELECT id, source_text, target_language, translated_text, created_at
|
|
FROM dt_translation_cache
|
|
WHERE source_text LIKE '%超温%'
|
|
AND CHAR_LENGTH(source_text) <= 10
|
|
ORDER BY created_at DESC
|
|
LIMIT 10
|
|
""")).fetchall()
|
|
|
|
if like_results:
|
|
for row in like_results:
|
|
print(f" ROW {row[0]}: '{row[1]}' -> {row[2]} -> '{row[3][:30]}...'")
|
|
else:
|
|
print(" ❌ 沒有找到包含'超温'的短記錄")
|
|
|
|
def debug_chaoweng_insertion():
|
|
"""檢查"超温"的翻譯插入狀況"""
|
|
|
|
print(f"\n" + "=" * 80)
|
|
print("診斷步驟4: 檢查已翻譯文件的插入狀況")
|
|
print("=" * 80)
|
|
|
|
# 檢查翻譯後的文件
|
|
base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
|
|
translated_files = [
|
|
("英文", base_dir / "translated_original_-OR026_17e05695_en_translat.docx"),
|
|
("越南文", base_dir / "translated_original_-OR026_17e05695_vi_translat.docx")
|
|
]
|
|
|
|
for lang, file_path in translated_files:
|
|
if not file_path.exists():
|
|
print(f"❌ {lang}翻譯檔案不存在")
|
|
continue
|
|
|
|
print(f"\n檢查{lang}翻譯檔案:")
|
|
try:
|
|
doc = docx.Document(str(file_path))
|
|
|
|
found_chaoweng = False
|
|
found_translation = False
|
|
|
|
for table_idx, table in enumerate(doc.tables):
|
|
for row_idx, row in enumerate(table.rows):
|
|
for cell_idx, cell in enumerate(row.cells):
|
|
cell_text = cell.text.strip()
|
|
|
|
if "超温" in cell_text:
|
|
found_chaoweng = True
|
|
print(f" 🔍 表格{table_idx+1} 行{row_idx+1} 列{cell_idx+1}:")
|
|
print(f" 內容: {repr(cell_text[:100])}")
|
|
|
|
# 檢查該儲存格的段落結構
|
|
print(f" 段落數: {len(cell.paragraphs)}")
|
|
for p_idx, para in enumerate(cell.paragraphs):
|
|
p_text = para.text.strip()
|
|
if p_text:
|
|
print(f" 段落{p_idx+1}: {repr(p_text)}")
|
|
|
|
# 檢查是否有英文翻譯跡象
|
|
if lang == "英文" and any(word in p_text.lower() for word in ['over', 'heat', 'temp', 'hot']):
|
|
found_translation = True
|
|
print(f" 🎯 可能的英文翻譯")
|
|
elif lang == "越南文" and any(word in p_text.lower() for word in ['quá', 'nóng', 'nhiệt']):
|
|
found_translation = True
|
|
print(f" 🎯 可能的越南文翻譯")
|
|
|
|
print(f" 原文'超温': {'✅ 找到' if found_chaoweng else '❌ 未找到'}")
|
|
print(f" {lang}翻譯: {'✅ 找到' if found_translation else '❌ 未找到'}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ 讀取{lang}翻譯檔案失敗: {e}")
|
|
|
|
def main():
|
|
"""主診斷函數"""
|
|
|
|
print("🔍 深度診斷'超温'翻譯問題")
|
|
print("檢查完整的提取->翻譯->插入流程")
|
|
|
|
try:
|
|
# 步驟1: 檢查文件提取
|
|
chaoweng_segments = debug_chaoweng_extraction()
|
|
|
|
# 步驟2: 檢查翻譯邏輯
|
|
debug_chaoweng_translation(chaoweng_segments)
|
|
|
|
# 步驟3: 檢查翻譯快取
|
|
debug_chaoweng_cache()
|
|
|
|
# 步驟4: 檢查插入結果
|
|
debug_chaoweng_insertion()
|
|
|
|
print(f"\n" + "=" * 80)
|
|
print("診斷完成!")
|
|
print("可能的問題:")
|
|
print("1. 提取階段: segments沒有正確提取'超温'")
|
|
print("2. 翻譯階段: 翻譯邏輯沒有處理該segment")
|
|
print("3. 快取階段: 翻譯沒有正確存儲")
|
|
print("4. 插入階段: 翻譯沒有正確插入到文件")
|
|
print("=" * 80)
|
|
|
|
except Exception as e:
|
|
print(f"❌ 診斷過程發生錯誤: {e}")
|
|
import traceback
|
|
print(f"錯誤詳情: {traceback.format_exc()}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |