204 lines
8.0 KiB
Python
204 lines
8.0 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
檢查docx文件表格翻譯問題
|
|
特別分析"超温"文字的翻譯狀況
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# 設定編碼
|
|
sys.stdout.reconfigure(encoding='utf-8')
|
|
|
|
from pathlib import Path
|
|
import docx
|
|
from docx.table import Table
|
|
from app import create_app
|
|
|
|
def analyze_docx_table_translation():
|
|
"""分析docx表格翻譯問題"""
|
|
|
|
print("=" * 80)
|
|
print("檢查docx表格翻譯問題")
|
|
print("任務ID: 17e05695-406f-47af-96eb-a0e23843770e")
|
|
print("=" * 80)
|
|
|
|
base_dir = Path(r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\17e05695-406f-47af-96eb-a0e23843770e")
|
|
original_file = base_dir / "original_-OR026_17e05695.docx"
|
|
translated_en = base_dir / "translated_original_-OR026_17e05695_en_translat.docx"
|
|
translated_vi = base_dir / "translated_original_-OR026_17e05695_vi_translat.docx"
|
|
|
|
if not original_file.exists():
|
|
print(f"❌ 原始檔案不存在: {original_file}")
|
|
return
|
|
|
|
print(f"✅ 原始檔案: {original_file.name}")
|
|
|
|
# 1. 檢查原始文件中的"超温"
|
|
print(f"\n1. 分析原始文件表格內容")
|
|
print("-" * 60)
|
|
|
|
try:
|
|
doc = docx.Document(str(original_file))
|
|
tables_found = 0
|
|
target_text_found = False
|
|
|
|
for table_idx, table in enumerate(doc.tables):
|
|
tables_found += 1
|
|
print(f"表格 {table_idx + 1}:")
|
|
|
|
for row_idx, row in enumerate(table.rows):
|
|
for cell_idx, cell in enumerate(row.cells):
|
|
cell_text = cell.text.strip()
|
|
if cell_text:
|
|
print(f" 行{row_idx+1} 列{cell_idx+1}: {repr(cell_text)}")
|
|
|
|
if "超温" in cell_text:
|
|
print(f" 🎯 找到目標文字 '超温'")
|
|
target_text_found = True
|
|
|
|
# 檢查該儲存格的詳細結構
|
|
print(f" 儲存格段落數: {len(cell.paragraphs)}")
|
|
for p_idx, para in enumerate(cell.paragraphs):
|
|
print(f" 段落{p_idx+1}: {repr(para.text)}")
|
|
|
|
print(f"\n總表格數: {tables_found}")
|
|
print(f"是否找到'超温': {'✅' if target_text_found else '❌'}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ 讀取原始文件失敗: {e}")
|
|
return
|
|
|
|
# 2. 檢查翻譯版本中的對應內容
|
|
for lang, trans_file in [("英文", translated_en), ("越南文", translated_vi)]:
|
|
if not trans_file.exists():
|
|
print(f"\n❌ {lang}翻譯檔案不存在")
|
|
continue
|
|
|
|
print(f"\n2. 檢查{lang}翻譯結果")
|
|
print("-" * 60)
|
|
|
|
try:
|
|
trans_doc = docx.Document(str(trans_file))
|
|
translation_found = False
|
|
|
|
for table_idx, table in enumerate(trans_doc.tables):
|
|
print(f"{lang}表格 {table_idx + 1}:")
|
|
|
|
for row_idx, row in enumerate(table.rows):
|
|
for cell_idx, cell in enumerate(row.cells):
|
|
cell_text = cell.text.strip()
|
|
if cell_text:
|
|
# 檢查是否包含原文"超温"
|
|
if "超温" in cell_text:
|
|
print(f" 行{row_idx+1} 列{cell_idx+1}: {repr(cell_text)}")
|
|
print(f" ⚠️ 仍包含原文'超温',可能未翻譯")
|
|
|
|
# 詳細分析該儲存格
|
|
print(f" 儲存格段落數: {len(cell.paragraphs)}")
|
|
for p_idx, para in enumerate(cell.paragraphs):
|
|
p_text = para.text.strip()
|
|
print(f" 段落{p_idx+1}: {repr(p_text)}")
|
|
|
|
# 檢查是否有翻譯標記
|
|
if "【翻譯失敗」" in p_text or "translation:" in p_text.lower():
|
|
print(f" 🔍 發現翻譯標記")
|
|
elif "\u200b" in p_text: # 零寬空格標記
|
|
print(f" 🔍 發現翻譯插入標記")
|
|
|
|
# 檢查可能的翻譯結果
|
|
elif any(keyword in cell_text.lower() for keyword in ['overheating', 'over-heating', 'quá nóng']):
|
|
print(f" 行{row_idx+1} 列{cell_idx+1}: {repr(cell_text)}")
|
|
print(f" ✅ 可能的翻譯結果")
|
|
translation_found = True
|
|
|
|
print(f"{lang}翻譯狀態: {'✅ 找到翻譯' if translation_found else '❌ 未找到翻譯'}")
|
|
|
|
except Exception as e:
|
|
print(f"❌ 讀取{lang}翻譯檔案失敗: {e}")
|
|
|
|
def check_translation_cache():
|
|
"""檢查翻譯快取中是否有"超温"的記錄"""
|
|
|
|
print(f"\n" + "=" * 80)
|
|
print("檢查翻譯快取")
|
|
print("=" * 80)
|
|
|
|
app = create_app()
|
|
|
|
with app.app_context():
|
|
from sqlalchemy import text as sql_text
|
|
from app import db
|
|
|
|
print(f"\n1. 搜尋'超温'相關的快取記錄")
|
|
print("-" * 60)
|
|
|
|
# 搜尋包含"超温"的快取記錄
|
|
cache_results = db.session.execute(sql_text("""
|
|
SELECT id, source_text, target_language, translated_text, created_at
|
|
FROM dt_translation_cache
|
|
WHERE source_text LIKE '%超温%'
|
|
ORDER BY created_at DESC
|
|
LIMIT 10
|
|
""")).fetchall()
|
|
|
|
if cache_results:
|
|
print(f"找到 {len(cache_results)} 條相關記錄:")
|
|
for row in cache_results:
|
|
print(f"ROW {row[0]}: {row[3]} -> {row[1]}")
|
|
print(f" 目標語言: {row[1]}")
|
|
print(f" 翻譯結果: {repr(row[2][:50])}...")
|
|
print(f" 時間: {row[4]}")
|
|
print()
|
|
else:
|
|
print("❌ 未找到包含'超温'的快取記錄")
|
|
|
|
# 搜尋近期的翻譯記錄
|
|
print(f"\n2. 檢查近期的翻譯記錄")
|
|
print("-" * 60)
|
|
|
|
recent_results = db.session.execute(sql_text("""
|
|
SELECT id, source_text, target_language, translated_text, created_at
|
|
FROM dt_translation_cache
|
|
WHERE created_at >= DATE_SUB(NOW(), INTERVAL 1 DAY)
|
|
AND (target_language = 'en' OR target_language = 'vi')
|
|
ORDER BY created_at DESC
|
|
LIMIT 20
|
|
""")).fetchall()
|
|
|
|
print(f"近24小時內的英文/越南文翻譯記錄 (共{len(recent_results)}條):")
|
|
for row in recent_results:
|
|
print(f"ROW {row[0]}: {repr(row[1][:20])}... -> {row[2]} -> {repr(row[3][:30])}...")
|
|
|
|
def main():
|
|
"""主檢查函數"""
|
|
|
|
print("🔍 診斷docx表格翻譯問題")
|
|
print("重點檢查: '超温' 文字翻譯狀況")
|
|
|
|
try:
|
|
# 分析文件表格
|
|
analyze_docx_table_translation()
|
|
|
|
# 檢查翻譯快取
|
|
check_translation_cache()
|
|
|
|
print(f"\n" + "=" * 80)
|
|
print("診斷總結")
|
|
print("=" * 80)
|
|
print("請根據以上結果判斷問題類型:")
|
|
print("1. 解析問題: 原始文件中找不到'超温'")
|
|
print("2. 翻譯問題: 快取中沒有'超温'的翻譯記錄")
|
|
print("3. 插入問題: 有翻譯記錄但未插入到文件中")
|
|
print("4. 版面問題: 翻譯已插入但格式或位置導致看不到")
|
|
print("=" * 80)
|
|
|
|
except Exception as e:
|
|
print(f"❌ 診斷過程發生錯誤: {e}")
|
|
import traceback
|
|
print(f"錯誤詳情: {traceback.format_exc()}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |