180 lines
5.8 KiB
Python
180 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
檢查文件翻譯問題
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
import openpyxl
|
|
from docx import Document
|
|
import pymysql
|
|
from pathlib import Path
|
|
|
|
# 設定編碼
|
|
sys.stdout.reconfigure(encoding='utf-8')
|
|
|
|
def check_excel_translation(file_path):
|
|
"""檢查Excel文件翻譯情況"""
|
|
print("\n" + "="*60)
|
|
print("檢查 Excel 文件翻譯")
|
|
print("="*60)
|
|
|
|
# 原始文件
|
|
original_file = Path(file_path) / "original_panjit_f0b78200.xlsx"
|
|
# 翻譯後文件(日文版)
|
|
translated_file = Path(file_path) / "original_panjit_f0b78200_ja_translated.xlsx"
|
|
|
|
if not original_file.exists():
|
|
print(f"原始文件不存在: {original_file}")
|
|
return
|
|
|
|
if not translated_file.exists():
|
|
print(f"翻譯文件不存在: {translated_file}")
|
|
return
|
|
|
|
# 讀取原始文件
|
|
wb_original = openpyxl.load_workbook(original_file)
|
|
ws_original = wb_original.active
|
|
|
|
# 讀取翻譯文件
|
|
wb_translated = openpyxl.load_workbook(translated_file)
|
|
ws_translated = wb_translated.active
|
|
|
|
print(f"\n原始文件: {original_file.name}")
|
|
print(f"翻譯文件: {translated_file.name}")
|
|
|
|
# 檢查A1儲存格
|
|
print(f"\nA1 儲存格:")
|
|
print(f" 原始: '{ws_original['A1'].value}'")
|
|
print(f" 翻譯: '{ws_translated['A1'].value}'")
|
|
|
|
# 檢查前10行10列的內容
|
|
print("\n前10行10列的對比:")
|
|
for row in range(1, min(11, ws_original.max_row + 1)):
|
|
for col in range(1, min(11, ws_original.max_column + 1)):
|
|
cell_original = ws_original.cell(row=row, column=col)
|
|
cell_translated = ws_translated.cell(row=row, column=col)
|
|
|
|
if cell_original.value and cell_original.value != cell_translated.value:
|
|
print(f"\n [{openpyxl.utils.get_column_letter(col)}{row}]")
|
|
print(f" 原始: '{cell_original.value}'")
|
|
print(f" 翻譯: '{cell_translated.value}'")
|
|
|
|
wb_original.close()
|
|
wb_translated.close()
|
|
|
|
def check_docx_translation(file_path):
|
|
"""檢查DOCX文件翻譯情況"""
|
|
print("\n" + "="*60)
|
|
print("檢查 DOCX 文件翻譯")
|
|
print("="*60)
|
|
|
|
# 原始文件
|
|
original_file = Path(file_path) / "original_-OR026_49e95f53.docx"
|
|
# 翻譯後文件(英文版)
|
|
translated_file = Path(file_path) / "translated_original_-OR026_49e95f53_en_translat.docx"
|
|
|
|
if not original_file.exists():
|
|
print(f"原始文件不存在: {original_file}")
|
|
return
|
|
|
|
if not translated_file.exists():
|
|
print(f"翻譯文件不存在: {translated_file}")
|
|
return
|
|
|
|
# 讀取原始文件
|
|
doc_original = Document(original_file)
|
|
doc_translated = Document(translated_file)
|
|
|
|
print(f"\n原始文件: {original_file.name}")
|
|
print(f"翻譯文件: {translated_file.name}")
|
|
|
|
# 搜索特定字串
|
|
target_strings = ["超温", "存放", "工务部"]
|
|
|
|
print("\n搜尋目標字串在原始文件中:")
|
|
for para_idx, para in enumerate(doc_original.paragraphs):
|
|
if any(target in para.text for target in target_strings):
|
|
print(f"\n段落 {para_idx}: {para.text[:100]}...")
|
|
for target in target_strings:
|
|
if target in para.text:
|
|
print(f" 找到 '{target}'")
|
|
|
|
print("\n搜尋目標字串在翻譯文件中:")
|
|
for para_idx, para in enumerate(doc_translated.paragraphs):
|
|
for target in target_strings:
|
|
if target in para.text:
|
|
print(f"\n段落 {para_idx}: {para.text[:100]}...")
|
|
print(f" 仍包含未翻譯的 '{target}'")
|
|
|
|
def check_translation_cache(job_uuid, target_strings):
|
|
"""檢查MySQL翻譯快取"""
|
|
print("\n" + "="*60)
|
|
print("檢查 MySQL 翻譯快取")
|
|
print("="*60)
|
|
|
|
# 連接資料庫
|
|
conn = pymysql.connect(
|
|
host='mysql.theaken.com',
|
|
port=33306,
|
|
user='A060',
|
|
password='WLeSCi0yhtc7',
|
|
database='db_A060',
|
|
charset='utf8mb4'
|
|
)
|
|
|
|
cursor = conn.cursor()
|
|
|
|
print(f"\n任務UUID: {job_uuid}")
|
|
print(f"搜尋字串: {target_strings}")
|
|
|
|
# 查詢翻譯快取
|
|
for target in target_strings:
|
|
sql = """
|
|
SELECT source_text, translated_text, source_language, target_language
|
|
FROM dt_translation_cache
|
|
WHERE source_text LIKE %s
|
|
"""
|
|
cursor.execute(sql, (f'%{target}%',))
|
|
results = cursor.fetchall()
|
|
|
|
if results:
|
|
print(f"\n找到包含 '{target}' 的翻譯記錄:")
|
|
for source, translated, src_lang, tgt_lang in results:
|
|
print(f" 原文: {source[:100]}...")
|
|
print(f" 譯文: {translated[:100]}...")
|
|
print(f" 語言: {src_lang} -> {tgt_lang}")
|
|
else:
|
|
print(f"\n未找到包含 '{target}' 的翻譯記錄")
|
|
|
|
cursor.close()
|
|
conn.close()
|
|
|
|
def main():
|
|
# Excel文件路徑
|
|
excel_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\f0b78200-2c5e-41a4-bac8-1536f92529e9"
|
|
|
|
# DOCX文件路徑
|
|
docx_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\49e95f53-5092-47c0-8275-e19c8c99e5ac"
|
|
|
|
# 檢查Excel
|
|
check_excel_translation(excel_path)
|
|
|
|
# 檢查DOCX
|
|
check_docx_translation(docx_path)
|
|
|
|
# 檢查DOCX的翻譯快取
|
|
print("\n" + "="*60)
|
|
print("查詢 DOCX 翻譯快取")
|
|
check_translation_cache("49e95f53-5092-47c0-8275-e19c8c99e5ac", ["超温", "存放", "工务部"])
|
|
|
|
# 檢查Excel的翻譯快取
|
|
print("\n" + "="*60)
|
|
print("查詢 Excel 翻譯快取")
|
|
check_translation_cache("f0b78200-2c5e-41a4-bac8-1536f92529e9", ["产品型号"])
|
|
|
|
if __name__ == "__main__":
|
|
main() |