2ND
This commit is contained in:
143
debug_translation.py
Normal file
143
debug_translation.py
Normal file
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Debug script to trace translation file generation issue
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Fix encoding for Windows console
|
||||
if sys.stdout.encoding != 'utf-8':
|
||||
sys.stdout.reconfigure(encoding='utf-8')
|
||||
if sys.stderr.encoding != 'utf-8':
|
||||
sys.stderr.reconfigure(encoding='utf-8')
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'app'))
|
||||
|
||||
from pathlib import Path
|
||||
from app.services.document_processor import DocumentProcessor
|
||||
|
||||
def debug_docx_processing(file_path):
|
||||
"""Debug DOCX processing to understand why translations aren't being inserted"""
|
||||
print(f"=== Debugging DOCX file: {file_path} ===")
|
||||
|
||||
if not Path(file_path).exists():
|
||||
print(f"ERROR: File does not exist: {file_path}")
|
||||
return
|
||||
|
||||
processor = DocumentProcessor()
|
||||
|
||||
try:
|
||||
# Extract segments
|
||||
segments = processor.extract_docx_segments(file_path)
|
||||
print(f"Extracted {len(segments)} segments:")
|
||||
|
||||
for i, seg in enumerate(segments):
|
||||
print(f" Segment {i+1}:")
|
||||
print(f" Kind: {seg.kind}")
|
||||
print(f" Context: {seg.ctx}")
|
||||
print(f" Text: {repr(seg.text[:100])}")
|
||||
print(f" Should translate: {processor.should_translate_text(seg.text, 'auto')}")
|
||||
print()
|
||||
|
||||
# Simulate translation map
|
||||
sample_translation_map = {}
|
||||
target_languages = ['vi', 'en']
|
||||
|
||||
for target_lang in target_languages:
|
||||
for seg in segments:
|
||||
if processor.should_translate_text(seg.text, 'auto'):
|
||||
# Simulate a translation
|
||||
key = (target_lang, seg.text)
|
||||
sample_translation_map[key] = f"[TRANSLATED_{target_lang.upper()}] {seg.text}"
|
||||
|
||||
print(f"Built translation map with {len(sample_translation_map)} entries:")
|
||||
for key, value in list(sample_translation_map.items())[:5]:
|
||||
print(f" {key[0]} | {repr(key[1][:50])} -> {repr(value[:50])}")
|
||||
print()
|
||||
|
||||
# Test translation insertion
|
||||
output_path = str(Path(file_path).parent / "debug_translated.docx")
|
||||
print(f"Testing translation insertion to: {output_path}")
|
||||
|
||||
ok_count, skip_count = processor.insert_docx_translations(
|
||||
file_path=file_path,
|
||||
segments=segments,
|
||||
translation_map=sample_translation_map,
|
||||
target_languages=target_languages,
|
||||
output_path=output_path
|
||||
)
|
||||
|
||||
print(f"Translation insertion result: {ok_count} OK, {skip_count} skipped")
|
||||
|
||||
if Path(output_path).exists():
|
||||
print(f"SUCCESS: Output file created with size {Path(output_path).stat().st_size} bytes")
|
||||
else:
|
||||
print("ERROR: Output file was not created")
|
||||
|
||||
except Exception as e:
|
||||
print(f"ERROR during processing: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def check_jobs():
|
||||
"""Check for jobs and debug them"""
|
||||
try:
|
||||
from app import create_app
|
||||
from app.models.job import TranslationJob
|
||||
|
||||
app = create_app()
|
||||
with app.app_context():
|
||||
# Check all recent jobs
|
||||
all_jobs = TranslationJob.query.order_by(TranslationJob.created_at.desc()).limit(5).all()
|
||||
|
||||
print(f"\n=== Found {len(all_jobs)} recent jobs ===")
|
||||
for job in all_jobs:
|
||||
print(f"Job {job.job_uuid}: {job.original_filename}")
|
||||
print(f" Status: {job.status}")
|
||||
print(f" File path: {job.file_path}")
|
||||
print(f" File exists: {Path(job.file_path).exists() if job.file_path else 'N/A'}")
|
||||
print(f" Target languages: {job.target_languages}")
|
||||
print(f" Total tokens: {job.total_tokens}")
|
||||
print(f" Total cost: {job.total_cost}")
|
||||
|
||||
# Check API usage stats
|
||||
from app.models.stats import APIUsageStats
|
||||
api_stats = APIUsageStats.query.filter_by(job_id=job.id).all()
|
||||
print(f" API calls made: {len(api_stats)}")
|
||||
for stat in api_stats[:3]: # Show first 3 calls
|
||||
print(f" - {stat.api_endpoint}: {stat.total_tokens} tokens, ${stat.cost:.4f}, success: {stat.success}")
|
||||
if not stat.success:
|
||||
print(f" Error: {stat.error_message}")
|
||||
|
||||
if job.file_path and Path(job.file_path).exists() and job.status == 'COMPLETED':
|
||||
print(f" >>> Debugging COMPLETED job file: {job.file_path}")
|
||||
debug_docx_processing(job.file_path)
|
||||
|
||||
# Check translated files
|
||||
translated_files = job.get_translated_files()
|
||||
print(f" >>> Found {len(translated_files)} translated files:")
|
||||
for tf in translated_files:
|
||||
print(f" - {tf.filename} ({tf.language_code}) - Size: {tf.file_size} bytes")
|
||||
if Path(tf.file_path).exists():
|
||||
print(f" File exists: {tf.file_path}")
|
||||
else:
|
||||
print(f" File MISSING: {tf.file_path}")
|
||||
print()
|
||||
|
||||
return all_jobs
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error checking jobs: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return []
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
# Debug specific file
|
||||
debug_docx_processing(sys.argv[1])
|
||||
else:
|
||||
# Debug recent jobs
|
||||
check_jobs()
|
Reference in New Issue
Block a user