Files
Document_Translator/check_translation_issues.py
2025-09-03 15:07:34 +08:00

180 lines
5.8 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
檢查文件翻譯問題
"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import openpyxl
from docx import Document
import pymysql
from pathlib import Path
# 設定編碼
sys.stdout.reconfigure(encoding='utf-8')
def check_excel_translation(file_path):
"""檢查Excel文件翻譯情況"""
print("\n" + "="*60)
print("檢查 Excel 文件翻譯")
print("="*60)
# 原始文件
original_file = Path(file_path) / "original_panjit_f0b78200.xlsx"
# 翻譯後文件(日文版)
translated_file = Path(file_path) / "original_panjit_f0b78200_ja_translated.xlsx"
if not original_file.exists():
print(f"原始文件不存在: {original_file}")
return
if not translated_file.exists():
print(f"翻譯文件不存在: {translated_file}")
return
# 讀取原始文件
wb_original = openpyxl.load_workbook(original_file)
ws_original = wb_original.active
# 讀取翻譯文件
wb_translated = openpyxl.load_workbook(translated_file)
ws_translated = wb_translated.active
print(f"\n原始文件: {original_file.name}")
print(f"翻譯文件: {translated_file.name}")
# 檢查A1儲存格
print(f"\nA1 儲存格:")
print(f" 原始: '{ws_original['A1'].value}'")
print(f" 翻譯: '{ws_translated['A1'].value}'")
# 檢查前10行10列的內容
print("\n前10行10列的對比:")
for row in range(1, min(11, ws_original.max_row + 1)):
for col in range(1, min(11, ws_original.max_column + 1)):
cell_original = ws_original.cell(row=row, column=col)
cell_translated = ws_translated.cell(row=row, column=col)
if cell_original.value and cell_original.value != cell_translated.value:
print(f"\n [{openpyxl.utils.get_column_letter(col)}{row}]")
print(f" 原始: '{cell_original.value}'")
print(f" 翻譯: '{cell_translated.value}'")
wb_original.close()
wb_translated.close()
def check_docx_translation(file_path):
"""檢查DOCX文件翻譯情況"""
print("\n" + "="*60)
print("檢查 DOCX 文件翻譯")
print("="*60)
# 原始文件
original_file = Path(file_path) / "original_-OR026_49e95f53.docx"
# 翻譯後文件(英文版)
translated_file = Path(file_path) / "translated_original_-OR026_49e95f53_en_translat.docx"
if not original_file.exists():
print(f"原始文件不存在: {original_file}")
return
if not translated_file.exists():
print(f"翻譯文件不存在: {translated_file}")
return
# 讀取原始文件
doc_original = Document(original_file)
doc_translated = Document(translated_file)
print(f"\n原始文件: {original_file.name}")
print(f"翻譯文件: {translated_file.name}")
# 搜索特定字串
target_strings = ["超温", "存放", "工务部"]
print("\n搜尋目標字串在原始文件中:")
for para_idx, para in enumerate(doc_original.paragraphs):
if any(target in para.text for target in target_strings):
print(f"\n段落 {para_idx}: {para.text[:100]}...")
for target in target_strings:
if target in para.text:
print(f" 找到 '{target}'")
print("\n搜尋目標字串在翻譯文件中:")
for para_idx, para in enumerate(doc_translated.paragraphs):
for target in target_strings:
if target in para.text:
print(f"\n段落 {para_idx}: {para.text[:100]}...")
print(f" 仍包含未翻譯的 '{target}'")
def check_translation_cache(job_uuid, target_strings):
"""檢查MySQL翻譯快取"""
print("\n" + "="*60)
print("檢查 MySQL 翻譯快取")
print("="*60)
# 連接資料庫
conn = pymysql.connect(
host='mysql.theaken.com',
port=33306,
user='A060',
password='WLeSCi0yhtc7',
database='db_A060',
charset='utf8mb4'
)
cursor = conn.cursor()
print(f"\n任務UUID: {job_uuid}")
print(f"搜尋字串: {target_strings}")
# 查詢翻譯快取
for target in target_strings:
sql = """
SELECT source_text, translated_text, source_language, target_language
FROM dt_translation_cache
WHERE source_text LIKE %s
"""
cursor.execute(sql, (f'%{target}%',))
results = cursor.fetchall()
if results:
print(f"\n找到包含 '{target}' 的翻譯記錄:")
for source, translated, src_lang, tgt_lang in results:
print(f" 原文: {source[:100]}...")
print(f" 譯文: {translated[:100]}...")
print(f" 語言: {src_lang} -> {tgt_lang}")
else:
print(f"\n未找到包含 '{target}' 的翻譯記錄")
cursor.close()
conn.close()
def main():
# Excel文件路徑
excel_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f0b78200-2c5e-41a4-bac8-1536f92529e9"
# DOCX文件路徑
docx_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\49e95f53-5092-47c0-8275-e19c8c99e5ac"
# 檢查Excel
check_excel_translation(excel_path)
# 檢查DOCX
check_docx_translation(docx_path)
# 檢查DOCX的翻譯快取
print("\n" + "="*60)
print("查詢 DOCX 翻譯快取")
check_translation_cache("49e95f53-5092-47c0-8275-e19c8c99e5ac", ["超温", "存放", "工务部"])
# 檢查Excel的翻譯快取
print("\n" + "="*60)
print("查詢 Excel 翻譯快取")
check_translation_cache("f0b78200-2c5e-41a4-bac8-1536f92529e9", ["产品型号"])
if __name__ == "__main__":
main()