#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 檢查文件翻譯問題 """ import sys import os sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import openpyxl from docx import Document import pymysql from pathlib import Path # 設定編碼 sys.stdout.reconfigure(encoding='utf-8') def check_excel_translation(file_path): """檢查Excel文件翻譯情況""" print("\n" + "="*60) print("檢查 Excel 文件翻譯") print("="*60) # 原始文件 original_file = Path(file_path) / "original_panjit_f0b78200.xlsx" # 翻譯後文件(日文版) translated_file = Path(file_path) / "original_panjit_f0b78200_ja_translated.xlsx" if not original_file.exists(): print(f"原始文件不存在: {original_file}") return if not translated_file.exists(): print(f"翻譯文件不存在: {translated_file}") return # 讀取原始文件 wb_original = openpyxl.load_workbook(original_file) ws_original = wb_original.active # 讀取翻譯文件 wb_translated = openpyxl.load_workbook(translated_file) ws_translated = wb_translated.active print(f"\n原始文件: {original_file.name}") print(f"翻譯文件: {translated_file.name}") # 檢查A1儲存格 print(f"\nA1 儲存格:") print(f" 原始: '{ws_original['A1'].value}'") print(f" 翻譯: '{ws_translated['A1'].value}'") # 檢查前10行10列的內容 print("\n前10行10列的對比:") for row in range(1, min(11, ws_original.max_row + 1)): for col in range(1, min(11, ws_original.max_column + 1)): cell_original = ws_original.cell(row=row, column=col) cell_translated = ws_translated.cell(row=row, column=col) if cell_original.value and cell_original.value != cell_translated.value: print(f"\n [{openpyxl.utils.get_column_letter(col)}{row}]") print(f" 原始: '{cell_original.value}'") print(f" 翻譯: '{cell_translated.value}'") wb_original.close() wb_translated.close() def check_docx_translation(file_path): """檢查DOCX文件翻譯情況""" print("\n" + "="*60) print("檢查 DOCX 文件翻譯") print("="*60) # 原始文件 original_file = Path(file_path) / "original_-OR026_49e95f53.docx" # 翻譯後文件(英文版) translated_file = Path(file_path) / "translated_original_-OR026_49e95f53_en_translat.docx" if not original_file.exists(): print(f"原始文件不存在: {original_file}") return if not translated_file.exists(): print(f"翻譯文件不存在: {translated_file}") return # 讀取原始文件 doc_original = Document(original_file) doc_translated = Document(translated_file) print(f"\n原始文件: {original_file.name}") print(f"翻譯文件: {translated_file.name}") # 搜索特定字串 target_strings = ["超温", "存放", "工务部"] print("\n搜尋目標字串在原始文件中:") for para_idx, para in enumerate(doc_original.paragraphs): if any(target in para.text for target in target_strings): print(f"\n段落 {para_idx}: {para.text[:100]}...") for target in target_strings: if target in para.text: print(f" 找到 '{target}'") print("\n搜尋目標字串在翻譯文件中:") for para_idx, para in enumerate(doc_translated.paragraphs): for target in target_strings: if target in para.text: print(f"\n段落 {para_idx}: {para.text[:100]}...") print(f" 仍包含未翻譯的 '{target}'") def check_translation_cache(job_uuid, target_strings): """檢查MySQL翻譯快取""" print("\n" + "="*60) print("檢查 MySQL 翻譯快取") print("="*60) # 連接資料庫 conn = pymysql.connect( host='mysql.theaken.com', port=33306, user='A060', password='WLeSCi0yhtc7', database='db_A060', charset='utf8mb4' ) cursor = conn.cursor() print(f"\n任務UUID: {job_uuid}") print(f"搜尋字串: {target_strings}") # 查詢翻譯快取 for target in target_strings: sql = """ SELECT source_text, translated_text, source_language, target_language FROM dt_translation_cache WHERE source_text LIKE %s """ cursor.execute(sql, (f'%{target}%',)) results = cursor.fetchall() if results: print(f"\n找到包含 '{target}' 的翻譯記錄:") for source, translated, src_lang, tgt_lang in results: print(f" 原文: {source[:100]}...") print(f" 譯文: {translated[:100]}...") print(f" 語言: {src_lang} -> {tgt_lang}") else: print(f"\n未找到包含 '{target}' 的翻譯記錄") cursor.close() conn.close() def main(): # Excel文件路徑 excel_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f0b78200-2c5e-41a4-bac8-1536f92529e9" # DOCX文件路徑 docx_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\49e95f53-5092-47c0-8275-e19c8c99e5ac" # 檢查Excel check_excel_translation(excel_path) # 檢查DOCX check_docx_translation(docx_path) # 檢查DOCX的翻譯快取 print("\n" + "="*60) print("查詢 DOCX 翻譯快取") check_translation_cache("49e95f53-5092-47c0-8275-e19c8c99e5ac", ["超温", "存放", "工务部"]) # 檢查Excel的翻譯快取 print("\n" + "="*60) print("查詢 Excel 翻譯快取") check_translation_cache("f0b78200-2c5e-41a4-bac8-1536f92529e9", ["产品型号"]) if __name__ == "__main__": main()