feat: add translated PDF export with layout preservation

Adds the ability to download translated documents as PDF files while
preserving the original document layout. Key changes:

- Add apply_translations() function to merge translation JSON with UnifiedDocument
- Add generate_translated_pdf() method to PDFGeneratorService
- Add POST /api/v2/translate/{task_id}/pdf endpoint
- Add downloadTranslatedPdf() method and PDF button in frontend
- Add comprehensive unit tests (52 tests: merge, PDF generation, API endpoints)
- Archive add-translated-pdf-export proposal

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-02 12:33:31 +08:00
parent 8d9b69ba93
commit a07aad96b3
15 changed files with 2663 additions and 2 deletions

View File

@@ -501,3 +501,139 @@ async def delete_translation(
logger.info(f"Deleted translation {lang} for task {task_id}")
return None
@router.post("/{task_id}/pdf")
async def download_translated_pdf(
task_id: str,
lang: str = Query(..., description="Target language code"),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Download a translated PDF with layout preservation.
- **task_id**: Task UUID
- **lang**: Target language code (e.g., 'en', 'ja')
Returns PDF file with translated content preserving original layout.
"""
from app.services.pdf_generator_service import pdf_generator_service
from app.services.translation_service import list_available_translations
import tempfile
# Verify task ownership
task = task_service.get_task_by_id(
db=db,
task_id=task_id,
user_id=current_user.id
)
if not task:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Task not found"
)
if not task.result_json_path:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="OCR result not found"
)
result_json_path = Path(task.result_json_path)
if not result_json_path.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Result file not found"
)
# Find translation file
result_dir = result_json_path.parent
base_name = result_json_path.stem.replace('_result', '').replace('edit_', '')
translation_file = result_dir / f"{base_name}_translated_{lang}.json"
# Also try with edit_ prefix removed differently
if not translation_file.exists():
translation_file = result_dir / f"edit_translated_{lang}.json"
if not translation_file.exists():
# List available translations for error message
available = list_available_translations(result_dir)
if available:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Translation for language '{lang}' not found. Available translations: {', '.join(available)}"
)
else:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"No translations found for this task. Please translate the document first."
)
# Check translation status in translation JSON
try:
with open(translation_file, 'r', encoding='utf-8') as f:
translation_data = json.load(f)
if not translation_data.get('translations'):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Translation file is empty or incomplete"
)
except json.JSONDecodeError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid translation file format"
)
# Generate translated PDF to temp file
output_filename = f"{task_id}_translated_{lang}.pdf"
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file:
output_path = Path(tmp_file.name)
try:
# Get source file path for images if available
source_file_path = None
if task.file_path and Path(task.file_path).exists():
source_file_path = Path(task.file_path)
success = pdf_generator_service.generate_translated_pdf(
result_json_path=result_json_path,
translation_json_path=translation_file,
output_path=output_path,
source_file_path=source_file_path
)
if not success:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to generate translated PDF"
)
logger.info(f"Generated translated PDF for task {task_id}, lang={lang}")
return FileResponse(
path=str(output_path),
filename=output_filename,
media_type="application/pdf",
headers={
"Content-Disposition": f'attachment; filename="{output_filename}"'
}
)
except HTTPException:
# Clean up temp file on HTTP errors
if output_path.exists():
output_path.unlink()
raise
except Exception as e:
# Clean up temp file on unexpected errors
if output_path.exists():
output_path.unlink()
logger.exception(f"Failed to generate translated PDF for task {task_id}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to generate translated PDF: {str(e)}"
)