86 lines
3.5 KiB
Python
86 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
檢查翻譯文件的實際內容
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
from pathlib import Path
|
||
|
||
# Fix encoding for Windows console
|
||
if sys.stdout.encoding != 'utf-8':
|
||
sys.stdout.reconfigure(encoding='utf-8')
|
||
if sys.stderr.encoding != 'utf-8':
|
||
sys.stderr.reconfigure(encoding='utf-8')
|
||
|
||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
|
||
|
||
from app import create_app
|
||
from app.models.job import TranslationJob
|
||
|
||
def check_translation_content():
|
||
"""檢查翻譯文件的實際內容"""
|
||
|
||
app = create_app()
|
||
|
||
with app.app_context():
|
||
print("=== 檢查翻譯文件內容 ===")
|
||
|
||
# 檢查最近完成的任務
|
||
job = TranslationJob.query.filter_by(job_uuid='485e0fdc-75fb-4b5a-b44b-3531951200a1').first()
|
||
if not job:
|
||
print("任務不存在")
|
||
return
|
||
|
||
print(f"任務狀態: {job.status}")
|
||
translated_files = job.get_translated_files()
|
||
print(f"翻譯檔案數: {len(translated_files)}")
|
||
|
||
for tf in translated_files:
|
||
file_path = Path(tf.file_path)
|
||
print(f"\n【檔案】 {tf.filename}")
|
||
print(f"語言: {tf.language_code}")
|
||
print(f"路徑: {tf.file_path}")
|
||
print(f"存在: {file_path.exists()}")
|
||
|
||
if file_path.exists():
|
||
print(f"大小: {file_path.stat().st_size:,} bytes")
|
||
|
||
# 如果是 DOCX,檢查內容
|
||
if tf.filename.endswith('.docx'):
|
||
try:
|
||
from docx import Document
|
||
doc = Document(str(file_path))
|
||
paragraphs = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
|
||
print(f"段落數: {len(paragraphs)}")
|
||
|
||
if paragraphs:
|
||
print(f"第一段內容: {paragraphs[0][:150]}...")
|
||
|
||
# 檢查前幾段內容
|
||
sample_count = min(3, len(paragraphs))
|
||
for i in range(sample_count):
|
||
if i < len(paragraphs):
|
||
para = paragraphs[i]
|
||
print(f"段落 {i+1}: {para[:100]}...")
|
||
|
||
# 檢查是否包含交錯翻譯格式(原文+翻譯)
|
||
lines = para.split('\n')
|
||
if len(lines) > 1:
|
||
print(f" -> 多行內容,可能是交錯格式: {len(lines)} 行")
|
||
for j, line in enumerate(lines[:2]): # 只顯示前兩行
|
||
print(f" 行{j+1}: {line[:80]}...")
|
||
|
||
# 簡單檢查是否有英文或越南文內容
|
||
all_text = ' '.join(paragraphs[:5]) # 檢查前5段
|
||
has_latin = any(ord(c) < 128 and c.isalpha() for c in all_text)
|
||
print(f"包含拉丁字符(可能是翻譯): {has_latin}")
|
||
|
||
except Exception as e:
|
||
print(f"讀取DOCX錯誤: {e}")
|
||
|
||
print("-" * 50)
|
||
|
||
if __name__ == "__main__":
|
||
check_translation_content() |