4th_fix time error

This commit is contained in:
beabigegg
2025-09-03 09:05:51 +08:00
parent e6e5332705
commit cce3fd4925
26 changed files with 2551 additions and 82 deletions

View File

@@ -0,0 +1,178 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
使用乾淨的DOCX文件測試翻譯插入
"""
import sys
import os
import tempfile
import shutil
from pathlib import Path
# Fix encoding for Windows console
if sys.stdout.encoding != 'utf-8':
sys.stdout.reconfigure(encoding='utf-8')
if sys.stderr.encoding != 'utf-8':
sys.stderr.reconfigure(encoding='utf-8')
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
from app import create_app, db
from app.services.translation_service import DocxParser
from sqlalchemy import text
def test_clean_docx_translation():
"""使用乾淨的DOCX文件測試翻譯插入"""
app = create_app()
with app.app_context():
print("=== 使用乾淨的DOCX文件測試翻譯插入 ===")
# 原始文件
original_path = r"C:\Users\EGG\WORK\data\user_scrip\TOOL\Document_translator_V2\uploads\9c6548ac-2f59-45f4-aade-0a9b3895bbfd\original_-OR026_9c6548ac.docx"
# 創建乾淨的副本
clean_copy_dir = Path(tempfile.gettempdir()) / "clean_docx_test"
clean_copy_dir.mkdir(exist_ok=True)
clean_copy_path = clean_copy_dir / "clean_original.docx"
shutil.copy2(original_path, clean_copy_path)
print(f"✅ 創建乾淨副本: {clean_copy_path}")
# 使用乾淨副本測試翻譯
parser = DocxParser(str(clean_copy_path))
# 檢查前幾個段落的當前狀態
try:
from docx import Document
doc = Document(str(clean_copy_path))
print(f"\n📄 乾淨文檔當前狀態:")
print(f"總段落數: {len(doc.paragraphs)}")
for i, para in enumerate(doc.paragraphs[:10]):
if para.text.strip():
print(f" 段落 {i+1}: {para.text.strip()[:60]}...")
# 檢查是否有零寬空格標記(翻譯插入標記)
has_marker = any('\u200b' in (r.text or '') for r in para.runs)
if has_marker:
print(f" ⚠️ 此段落已包含翻譯插入標記")
except Exception as e:
print(f"❌ 檢查文檔狀態失敗: {e}")
return
# 測試翻譯生成只生成前3個段落來測試
print(f"\n🔄 測試翻譯生成...")
try:
output_dir = clean_copy_dir
# 使用空的translations字典因為我們從快取讀取
empty_translations = {}
en_output_path = parser.generate_translated_document(
empty_translations,
'en',
output_dir
)
print(f"✅ 翻譯文件生成成功: {en_output_path}")
# 檢查生成的文件
output_file = Path(en_output_path)
if output_file.exists():
print(f"文件大小: {output_file.stat().st_size:,} bytes")
try:
doc2 = Document(str(output_file))
paragraphs = [p for p in doc2.paragraphs if p.text.strip()]
print(f"\n📄 生成文件詳細分析:")
print(f"總段落數: {len(paragraphs)}")
chinese_count = 0
english_count = 0
mixed_count = 0
marker_count = 0
print(f"\n前20段落詳情:")
for i, para in enumerate(paragraphs[:20]):
text = para.text.strip()
# 語言檢測
has_chinese = any('\u4e00' <= c <= '\u9fff' for c in text)
has_english = any(ord(c) < 128 and c.isalpha() and c not in 'PANJIT' for c in text)
has_marker = any('\u200b' in (r.text or '') for r in para.runs)
if has_marker:
marker_count += 1
if has_chinese and has_english:
mixed_count += 1
lang_status = "🔄 中英混合"
elif has_english:
english_count += 1
lang_status = "🇺🇸 純英文"
elif has_chinese:
chinese_count += 1
lang_status = "🇨🇳 純中文"
else:
lang_status = "❓ 其他"
marker_status = " 🏷️" if has_marker else ""
print(f" 段落 {i+1:2d}: {lang_status}{marker_status} - {text[:70]}...")
print(f"\n📊 統計結果:")
print(f" 純中文段落: {chinese_count}")
print(f" 純英文段落: {english_count}")
print(f" 中英混合段落: {mixed_count}")
print(f" 帶翻譯標記的段落: {marker_count}")
# 判斷翻譯效果
if english_count > 10:
print(f"\n✅ 翻譯效果優秀 - 有 {english_count} 個純英文段落")
elif english_count > 0:
print(f"\n⚠️ 翻譯部分成功 - 有 {english_count} 個純英文段落")
elif marker_count > 10:
print(f"\n🔍 翻譯可能成功但格式問題 - 有 {marker_count} 個帶標記的段落")
else:
print(f"\n❌ 翻譯可能失敗 - 沒有明顯的英文內容")
# 檢查是否有連續的中英文段落(交錯格式)
alternating_pairs = 0
for i in range(len(paragraphs) - 1):
current = paragraphs[i].text.strip()
next_para = paragraphs[i + 1].text.strip()
current_chinese = any('\u4e00' <= c <= '\u9fff' for c in current)
current_english = any(ord(c) < 128 and c.isalpha() and c not in 'PANJIT' for c in current)
next_chinese = any('\u4e00' <= c <= '\u9fff' for c in next_para)
next_english = any(ord(c) < 128 and c.isalpha() and c not in 'PANJIT' for c in next_para)
if current_chinese and not current_english and next_english and not next_chinese:
alternating_pairs += 1
if alternating_pairs <= 3: # 顯示前3個交錯對
print(f"\n 交錯對 {alternating_pairs}:")
print(f" 中文: {current[:50]}...")
print(f" 英文: {next_para[:50]}...")
if alternating_pairs > 0:
print(f"\n✅ 發現交錯翻譯格式!共 {alternating_pairs}")
else:
print(f"\n❌ 沒有發現交錯翻譯格式")
except Exception as e:
print(f"❌ 分析生成文件失敗: {e}")
else:
print(f"❌ 生成的文件不存在")
except Exception as e:
print(f"❌ 翻譯生成失敗: {e}")
if __name__ == "__main__":
test_clean_docx_translation()