#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 檢查翻譯文件的實際內容 """ import sys import os from pathlib import Path # Fix encoding for Windows console if sys.stdout.encoding != 'utf-8': sys.stdout.reconfigure(encoding='utf-8') if sys.stderr.encoding != 'utf-8': sys.stderr.reconfigure(encoding='utf-8') sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app')) from app import create_app from app.models.job import TranslationJob def check_translation_content(): """檢查翻譯文件的實際內容""" app = create_app() with app.app_context(): print("=== 檢查翻譯文件內容 ===") # 檢查最近完成的任務 job = TranslationJob.query.filter_by(job_uuid='485e0fdc-75fb-4b5a-b44b-3531951200a1').first() if not job: print("任務不存在") return print(f"任務狀態: {job.status}") translated_files = job.get_translated_files() print(f"翻譯檔案數: {len(translated_files)}") for tf in translated_files: file_path = Path(tf.file_path) print(f"\n【檔案】 {tf.filename}") print(f"語言: {tf.language_code}") print(f"路徑: {tf.file_path}") print(f"存在: {file_path.exists()}") if file_path.exists(): print(f"大小: {file_path.stat().st_size:,} bytes") # 如果是 DOCX,檢查內容 if tf.filename.endswith('.docx'): try: from docx import Document doc = Document(str(file_path)) paragraphs = [p.text.strip() for p in doc.paragraphs if p.text.strip()] print(f"段落數: {len(paragraphs)}") if paragraphs: print(f"第一段內容: {paragraphs[0][:150]}...") # 檢查前幾段內容 sample_count = min(3, len(paragraphs)) for i in range(sample_count): if i < len(paragraphs): para = paragraphs[i] print(f"段落 {i+1}: {para[:100]}...") # 檢查是否包含交錯翻譯格式(原文+翻譯) lines = para.split('\n') if len(lines) > 1: print(f" -> 多行內容,可能是交錯格式: {len(lines)} 行") for j, line in enumerate(lines[:2]): # 只顯示前兩行 print(f" 行{j+1}: {line[:80]}...") # 簡單檢查是否有英文或越南文內容 all_text = ' '.join(paragraphs[:5]) # 檢查前5段 has_latin = any(ord(c) < 128 and c.isalpha() for c in all_text) print(f"包含拉丁字符(可能是翻譯): {has_latin}") except Exception as e: print(f"讀取DOCX錯誤: {e}") print("-" * 50) if __name__ == "__main__": check_translation_content()