Files
Document_Translator/check_translation_content.py
2025-09-02 16:47:16 +08:00

86 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
檢查翻譯文件的實際內容
"""
import sys
import os
from pathlib import Path
# Fix encoding for Windows console
if sys.stdout.encoding != 'utf-8':
sys.stdout.reconfigure(encoding='utf-8')
if sys.stderr.encoding != 'utf-8':
sys.stderr.reconfigure(encoding='utf-8')
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
from app import create_app
from app.models.job import TranslationJob
def check_translation_content():
"""檢查翻譯文件的實際內容"""
app = create_app()
with app.app_context():
print("=== 檢查翻譯文件內容 ===")
# 檢查最近完成的任務
job = TranslationJob.query.filter_by(job_uuid='485e0fdc-75fb-4b5a-b44b-3531951200a1').first()
if not job:
print("任務不存在")
return
print(f"任務狀態: {job.status}")
translated_files = job.get_translated_files()
print(f"翻譯檔案數: {len(translated_files)}")
for tf in translated_files:
file_path = Path(tf.file_path)
print(f"\n【檔案】 {tf.filename}")
print(f"語言: {tf.language_code}")
print(f"路徑: {tf.file_path}")
print(f"存在: {file_path.exists()}")
if file_path.exists():
print(f"大小: {file_path.stat().st_size:,} bytes")
# 如果是 DOCX檢查內容
if tf.filename.endswith('.docx'):
try:
from docx import Document
doc = Document(str(file_path))
paragraphs = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
print(f"段落數: {len(paragraphs)}")
if paragraphs:
print(f"第一段內容: {paragraphs[0][:150]}...")
# 檢查前幾段內容
sample_count = min(3, len(paragraphs))
for i in range(sample_count):
if i < len(paragraphs):
para = paragraphs[i]
print(f"段落 {i+1}: {para[:100]}...")
# 檢查是否包含交錯翻譯格式(原文+翻譯)
lines = para.split('\n')
if len(lines) > 1:
print(f" -> 多行內容,可能是交錯格式: {len(lines)}")
for j, line in enumerate(lines[:2]): # 只顯示前兩行
print(f"{j+1}: {line[:80]}...")
# 簡單檢查是否有英文或越南文內容
all_text = ' '.join(paragraphs[:5]) # 檢查前5段
has_latin = any(ord(c) < 128 and c.isalpha() for c in all_text)
print(f"包含拉丁字符(可能是翻譯): {has_latin}")
except Exception as e:
print(f"讀取DOCX錯誤: {e}")
print("-" * 50)
if __name__ == "__main__":
check_translation_content()