Files
Document_Translator/debug_translation.py
beabigegg b11a8272c4 2ND
2025-09-02 13:11:48 +08:00

143 lines
5.6 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Debug script to trace translation file generation issue
"""
import sys
import os
# Fix encoding for Windows console
if sys.stdout.encoding != 'utf-8':
sys.stdout.reconfigure(encoding='utf-8')
if sys.stderr.encoding != 'utf-8':
sys.stderr.reconfigure(encoding='utf-8')
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
from pathlib import Path
from app.services.document_processor import DocumentProcessor
def debug_docx_processing(file_path):
"""Debug DOCX processing to understand why translations aren't being inserted"""
print(f"=== Debugging DOCX file: {file_path} ===")
if not Path(file_path).exists():
print(f"ERROR: File does not exist: {file_path}")
return
processor = DocumentProcessor()
try:
# Extract segments
segments = processor.extract_docx_segments(file_path)
print(f"Extracted {len(segments)} segments:")
for i, seg in enumerate(segments):
print(f" Segment {i+1}:")
print(f" Kind: {seg.kind}")
print(f" Context: {seg.ctx}")
print(f" Text: {repr(seg.text[:100])}")
print(f" Should translate: {processor.should_translate_text(seg.text, 'auto')}")
print()
# Simulate translation map
sample_translation_map = {}
target_languages = ['vi', 'en']
for target_lang in target_languages:
for seg in segments:
if processor.should_translate_text(seg.text, 'auto'):
# Simulate a translation
key = (target_lang, seg.text)
sample_translation_map[key] = f"[TRANSLATED_{target_lang.upper()}] {seg.text}"
print(f"Built translation map with {len(sample_translation_map)} entries:")
for key, value in list(sample_translation_map.items())[:5]:
print(f" {key[0]} | {repr(key[1][:50])} -> {repr(value[:50])}")
print()
# Test translation insertion
output_path = str(Path(file_path).parent / "debug_translated.docx")
print(f"Testing translation insertion to: {output_path}")
ok_count, skip_count = processor.insert_docx_translations(
file_path=file_path,
segments=segments,
translation_map=sample_translation_map,
target_languages=target_languages,
output_path=output_path
)
print(f"Translation insertion result: {ok_count} OK, {skip_count} skipped")
if Path(output_path).exists():
print(f"SUCCESS: Output file created with size {Path(output_path).stat().st_size} bytes")
else:
print("ERROR: Output file was not created")
except Exception as e:
print(f"ERROR during processing: {str(e)}")
import traceback
traceback.print_exc()
def check_jobs():
"""Check for jobs and debug them"""
try:
from app import create_app
from app.models.job import TranslationJob
app = create_app()
with app.app_context():
# Check all recent jobs
all_jobs = TranslationJob.query.order_by(TranslationJob.created_at.desc()).limit(5).all()
print(f"\n=== Found {len(all_jobs)} recent jobs ===")
for job in all_jobs:
print(f"Job {job.job_uuid}: {job.original_filename}")
print(f" Status: {job.status}")
print(f" File path: {job.file_path}")
print(f" File exists: {Path(job.file_path).exists() if job.file_path else 'N/A'}")
print(f" Target languages: {job.target_languages}")
print(f" Total tokens: {job.total_tokens}")
print(f" Total cost: {job.total_cost}")
# Check API usage stats
from app.models.stats import APIUsageStats
api_stats = APIUsageStats.query.filter_by(job_id=job.id).all()
print(f" API calls made: {len(api_stats)}")
for stat in api_stats[:3]: # Show first 3 calls
print(f" - {stat.api_endpoint}: {stat.total_tokens} tokens, ${stat.cost:.4f}, success: {stat.success}")
if not stat.success:
print(f" Error: {stat.error_message}")
if job.file_path and Path(job.file_path).exists() and job.status == 'COMPLETED':
print(f" >>> Debugging COMPLETED job file: {job.file_path}")
debug_docx_processing(job.file_path)
# Check translated files
translated_files = job.get_translated_files()
print(f" >>> Found {len(translated_files)} translated files:")
for tf in translated_files:
print(f" - {tf.filename} ({tf.language_code}) - Size: {tf.file_size} bytes")
if Path(tf.file_path).exists():
print(f" File exists: {tf.file_path}")
else:
print(f" File MISSING: {tf.file_path}")
print()
return all_jobs
except Exception as e:
print(f"Error checking jobs: {str(e)}")
import traceback
traceback.print_exc()
return []
if __name__ == "__main__":
if len(sys.argv) > 1:
# Debug specific file
debug_docx_processing(sys.argv[1])
else:
# Debug recent jobs
check_jobs()