feat: add translated PDF export with layout preservation

Adds the ability to download translated documents as PDF files while
preserving the original document layout. Key changes:

- Add apply_translations() function to merge translation JSON with UnifiedDocument
- Add generate_translated_pdf() method to PDFGeneratorService
- Add POST /api/v2/translate/{task_id}/pdf endpoint
- Add downloadTranslatedPdf() method and PDF button in frontend
- Add comprehensive unit tests (52 tests: merge, PDF generation, API endpoints)
- Archive add-translated-pdf-export proposal

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-02 12:33:31 +08:00
parent 8d9b69ba93
commit a07aad96b3
15 changed files with 2663 additions and 2 deletions

View File

@@ -3601,6 +3601,100 @@ class PDFGeneratorService:
except Exception as e:
logger.error(f"Failed to draw image element {element.element_id}: {e}")
def generate_translated_pdf(
self,
result_json_path: Path,
translation_json_path: Path,
output_path: Path,
source_file_path: Optional[Path] = None
) -> bool:
"""
Generate layout-preserving PDF with translated content.
This method loads the original result JSON and translation JSON,
merges them to replace original content with translations, and
generates a PDF with the translated content at original positions.
Args:
result_json_path: Path to original result JSON file (UnifiedDocument format)
translation_json_path: Path to translation JSON file
output_path: Path to save generated translated PDF
source_file_path: Optional path to original source file
Returns:
True if successful, False otherwise
"""
import tempfile
try:
# Import apply_translations from translation service
from app.services.translation_service import apply_translations
# Load original result JSON
logger.info(f"Loading result JSON: {result_json_path}")
with open(result_json_path, 'r', encoding='utf-8') as f:
result_json = json.load(f)
# Load translation JSON
logger.info(f"Loading translation JSON: {translation_json_path}")
with open(translation_json_path, 'r', encoding='utf-8') as f:
translation_json = json.load(f)
# Extract translations dict from translation JSON
translations = translation_json.get('translations', {})
if not translations:
logger.warning("No translations found in translation JSON")
# Still generate PDF with original content as fallback
return self.generate_layout_pdf(
json_path=result_json_path,
output_path=output_path,
source_file_path=source_file_path
)
# Apply translations to result JSON
translated_doc = apply_translations(result_json, translations)
target_lang = translation_json.get('target_lang', 'unknown')
logger.info(
f"Generating translated PDF: {len(translations)} translations applied, "
f"target_lang={target_lang}"
)
# Write translated JSON to a temporary file and use existing generate_layout_pdf
with tempfile.NamedTemporaryFile(
mode='w',
suffix='_translated.json',
delete=False,
encoding='utf-8'
) as tmp_file:
json.dump(translated_doc, tmp_file, ensure_ascii=False, indent=2)
tmp_path = Path(tmp_file.name)
try:
# Use existing PDF generation with translated content
success = self.generate_layout_pdf(
json_path=tmp_path,
output_path=output_path,
source_file_path=source_file_path
)
return success
finally:
# Clean up temporary file
if tmp_path.exists():
tmp_path.unlink()
except FileNotFoundError as e:
logger.error(f"File not found: {e}")
return False
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON: {e}")
return False
except Exception as e:
logger.error(f"Failed to generate translated PDF: {e}")
import traceback
traceback.print_exc()
return False
# Singleton instance
pdf_generator_service = PDFGeneratorService()