143 lines
5.6 KiB
Python
143 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Debug script to trace translation file generation issue
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
|
|
# Fix encoding for Windows console
|
|
if sys.stdout.encoding != 'utf-8':
|
|
sys.stdout.reconfigure(encoding='utf-8')
|
|
if sys.stderr.encoding != 'utf-8':
|
|
sys.stderr.reconfigure(encoding='utf-8')
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
|
|
|
|
from pathlib import Path
|
|
from app.services.document_processor import DocumentProcessor
|
|
|
|
def debug_docx_processing(file_path):
|
|
"""Debug DOCX processing to understand why translations aren't being inserted"""
|
|
print(f"=== Debugging DOCX file: {file_path} ===")
|
|
|
|
if not Path(file_path).exists():
|
|
print(f"ERROR: File does not exist: {file_path}")
|
|
return
|
|
|
|
processor = DocumentProcessor()
|
|
|
|
try:
|
|
# Extract segments
|
|
segments = processor.extract_docx_segments(file_path)
|
|
print(f"Extracted {len(segments)} segments:")
|
|
|
|
for i, seg in enumerate(segments):
|
|
print(f" Segment {i+1}:")
|
|
print(f" Kind: {seg.kind}")
|
|
print(f" Context: {seg.ctx}")
|
|
print(f" Text: {repr(seg.text[:100])}")
|
|
print(f" Should translate: {processor.should_translate_text(seg.text, 'auto')}")
|
|
print()
|
|
|
|
# Simulate translation map
|
|
sample_translation_map = {}
|
|
target_languages = ['vi', 'en']
|
|
|
|
for target_lang in target_languages:
|
|
for seg in segments:
|
|
if processor.should_translate_text(seg.text, 'auto'):
|
|
# Simulate a translation
|
|
key = (target_lang, seg.text)
|
|
sample_translation_map[key] = f"[TRANSLATED_{target_lang.upper()}] {seg.text}"
|
|
|
|
print(f"Built translation map with {len(sample_translation_map)} entries:")
|
|
for key, value in list(sample_translation_map.items())[:5]:
|
|
print(f" {key[0]} | {repr(key[1][:50])} -> {repr(value[:50])}")
|
|
print()
|
|
|
|
# Test translation insertion
|
|
output_path = str(Path(file_path).parent / "debug_translated.docx")
|
|
print(f"Testing translation insertion to: {output_path}")
|
|
|
|
ok_count, skip_count = processor.insert_docx_translations(
|
|
file_path=file_path,
|
|
segments=segments,
|
|
translation_map=sample_translation_map,
|
|
target_languages=target_languages,
|
|
output_path=output_path
|
|
)
|
|
|
|
print(f"Translation insertion result: {ok_count} OK, {skip_count} skipped")
|
|
|
|
if Path(output_path).exists():
|
|
print(f"SUCCESS: Output file created with size {Path(output_path).stat().st_size} bytes")
|
|
else:
|
|
print("ERROR: Output file was not created")
|
|
|
|
except Exception as e:
|
|
print(f"ERROR during processing: {str(e)}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
def check_jobs():
|
|
"""Check for jobs and debug them"""
|
|
try:
|
|
from app import create_app
|
|
from app.models.job import TranslationJob
|
|
|
|
app = create_app()
|
|
with app.app_context():
|
|
# Check all recent jobs
|
|
all_jobs = TranslationJob.query.order_by(TranslationJob.created_at.desc()).limit(5).all()
|
|
|
|
print(f"\n=== Found {len(all_jobs)} recent jobs ===")
|
|
for job in all_jobs:
|
|
print(f"Job {job.job_uuid}: {job.original_filename}")
|
|
print(f" Status: {job.status}")
|
|
print(f" File path: {job.file_path}")
|
|
print(f" File exists: {Path(job.file_path).exists() if job.file_path else 'N/A'}")
|
|
print(f" Target languages: {job.target_languages}")
|
|
print(f" Total tokens: {job.total_tokens}")
|
|
print(f" Total cost: {job.total_cost}")
|
|
|
|
# Check API usage stats
|
|
from app.models.stats import APIUsageStats
|
|
api_stats = APIUsageStats.query.filter_by(job_id=job.id).all()
|
|
print(f" API calls made: {len(api_stats)}")
|
|
for stat in api_stats[:3]: # Show first 3 calls
|
|
print(f" - {stat.api_endpoint}: {stat.total_tokens} tokens, ${stat.cost:.4f}, success: {stat.success}")
|
|
if not stat.success:
|
|
print(f" Error: {stat.error_message}")
|
|
|
|
if job.file_path and Path(job.file_path).exists() and job.status == 'COMPLETED':
|
|
print(f" >>> Debugging COMPLETED job file: {job.file_path}")
|
|
debug_docx_processing(job.file_path)
|
|
|
|
# Check translated files
|
|
translated_files = job.get_translated_files()
|
|
print(f" >>> Found {len(translated_files)} translated files:")
|
|
for tf in translated_files:
|
|
print(f" - {tf.filename} ({tf.language_code}) - Size: {tf.file_size} bytes")
|
|
if Path(tf.file_path).exists():
|
|
print(f" File exists: {tf.file_path}")
|
|
else:
|
|
print(f" File MISSING: {tf.file_path}")
|
|
print()
|
|
|
|
return all_jobs
|
|
|
|
except Exception as e:
|
|
print(f"Error checking jobs: {str(e)}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return []
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1:
|
|
# Debug specific file
|
|
debug_docx_processing(sys.argv[1])
|
|
else:
|
|
# Debug recent jobs
|
|
check_jobs() |