#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Debug script to trace translation file generation issue """ import sys import os # Fix encoding for Windows console if sys.stdout.encoding != 'utf-8': sys.stdout.reconfigure(encoding='utf-8') if sys.stderr.encoding != 'utf-8': sys.stderr.reconfigure(encoding='utf-8') sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app')) from pathlib import Path from app.services.document_processor import DocumentProcessor def debug_docx_processing(file_path): """Debug DOCX processing to understand why translations aren't being inserted""" print(f"=== Debugging DOCX file: {file_path} ===") if not Path(file_path).exists(): print(f"ERROR: File does not exist: {file_path}") return processor = DocumentProcessor() try: # Extract segments segments = processor.extract_docx_segments(file_path) print(f"Extracted {len(segments)} segments:") for i, seg in enumerate(segments): print(f" Segment {i+1}:") print(f" Kind: {seg.kind}") print(f" Context: {seg.ctx}") print(f" Text: {repr(seg.text[:100])}") print(f" Should translate: {processor.should_translate_text(seg.text, 'auto')}") print() # Simulate translation map sample_translation_map = {} target_languages = ['vi', 'en'] for target_lang in target_languages: for seg in segments: if processor.should_translate_text(seg.text, 'auto'): # Simulate a translation key = (target_lang, seg.text) sample_translation_map[key] = f"[TRANSLATED_{target_lang.upper()}] {seg.text}" print(f"Built translation map with {len(sample_translation_map)} entries:") for key, value in list(sample_translation_map.items())[:5]: print(f" {key[0]} | {repr(key[1][:50])} -> {repr(value[:50])}") print() # Test translation insertion output_path = str(Path(file_path).parent / "debug_translated.docx") print(f"Testing translation insertion to: {output_path}") ok_count, skip_count = processor.insert_docx_translations( file_path=file_path, segments=segments, translation_map=sample_translation_map, target_languages=target_languages, output_path=output_path ) print(f"Translation insertion result: {ok_count} OK, {skip_count} skipped") if Path(output_path).exists(): print(f"SUCCESS: Output file created with size {Path(output_path).stat().st_size} bytes") else: print("ERROR: Output file was not created") except Exception as e: print(f"ERROR during processing: {str(e)}") import traceback traceback.print_exc() def check_jobs(): """Check for jobs and debug them""" try: from app import create_app from app.models.job import TranslationJob app = create_app() with app.app_context(): # Check all recent jobs all_jobs = TranslationJob.query.order_by(TranslationJob.created_at.desc()).limit(5).all() print(f"\n=== Found {len(all_jobs)} recent jobs ===") for job in all_jobs: print(f"Job {job.job_uuid}: {job.original_filename}") print(f" Status: {job.status}") print(f" File path: {job.file_path}") print(f" File exists: {Path(job.file_path).exists() if job.file_path else 'N/A'}") print(f" Target languages: {job.target_languages}") print(f" Total tokens: {job.total_tokens}") print(f" Total cost: {job.total_cost}") # Check API usage stats from app.models.stats import APIUsageStats api_stats = APIUsageStats.query.filter_by(job_id=job.id).all() print(f" API calls made: {len(api_stats)}") for stat in api_stats[:3]: # Show first 3 calls print(f" - {stat.api_endpoint}: {stat.total_tokens} tokens, ${stat.cost:.4f}, success: {stat.success}") if not stat.success: print(f" Error: {stat.error_message}") if job.file_path and Path(job.file_path).exists() and job.status == 'COMPLETED': print(f" >>> Debugging COMPLETED job file: {job.file_path}") debug_docx_processing(job.file_path) # Check translated files translated_files = job.get_translated_files() print(f" >>> Found {len(translated_files)} translated files:") for tf in translated_files: print(f" - {tf.filename} ({tf.language_code}) - Size: {tf.file_size} bytes") if Path(tf.file_path).exists(): print(f" File exists: {tf.file_path}") else: print(f" File MISSING: {tf.file_path}") print() return all_jobs except Exception as e: print(f"Error checking jobs: {str(e)}") import traceback traceback.print_exc() return [] if __name__ == "__main__": if len(sys.argv) > 1: # Debug specific file debug_docx_processing(sys.argv[1]) else: # Debug recent jobs check_jobs()