feat: add translated PDF format selection (layout/reflow)
- Add generate_translated_layout_pdf() method for layout-preserving translated PDFs - Add generate_translated_pdf() method for reflow translated PDFs - Update translate router to accept format parameter (layout/reflow) - Update frontend with dropdown to select translated PDF format - Fix reflow PDF table cell extraction from content dict - Add embedded images handling in reflow PDF tables - Archive improve-translated-text-fitting openspec proposal 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -645,16 +645,22 @@ async def download_markdown(
|
||||
@router.get("/{task_id}/download/pdf", summary="Download PDF result")
|
||||
async def download_pdf(
|
||||
task_id: str,
|
||||
format: Optional[str] = Query(
|
||||
None,
|
||||
description="PDF format: 'layout' (default) preserves original coordinates, 'reflow' provides flowing text with consistent font sizes"
|
||||
),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Download task result as layout-preserving PDF file
|
||||
Download task result as PDF file
|
||||
|
||||
- **task_id**: Task UUID
|
||||
- **format**: Optional format parameter
|
||||
- `layout` (default): Preserves original document layout and coordinates
|
||||
- `reflow`: Flowing text with consistent font sizes for better readability
|
||||
|
||||
Returns a PDF that preserves the original document layout using OCR results.
|
||||
The PDF is generated from OCR JSON data and cached for subsequent requests.
|
||||
Returns a PDF generated from OCR JSON data. The PDF is cached for subsequent requests.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from app.services.pdf_generator_service import pdf_generator_service
|
||||
@@ -679,12 +685,15 @@ async def download_pdf(
|
||||
detail="Task is not completed yet. Please wait for OCR processing to finish."
|
||||
)
|
||||
|
||||
# Check if PDF path is stored in database
|
||||
if task.result_pdf_path and Path(task.result_pdf_path).exists():
|
||||
# Determine format (default to layout)
|
||||
use_reflow = format and format.lower() == "reflow"
|
||||
|
||||
# Check if PDF path is stored in database (only for layout format, as reflow is always generated)
|
||||
if not use_reflow and task.result_pdf_path and Path(task.result_pdf_path).exists():
|
||||
pdf_path = Path(task.result_pdf_path)
|
||||
logger.info(f"Using pre-generated PDF from database: {pdf_path.name}")
|
||||
else:
|
||||
# Fallback: Try to generate PDF on-demand
|
||||
# Generate PDF on-demand
|
||||
result_dir = Path(settings.result_dir) / task_id
|
||||
|
||||
# Use stored JSON path or construct it
|
||||
@@ -700,13 +709,14 @@ async def download_pdf(
|
||||
)
|
||||
json_path = json_files[0]
|
||||
|
||||
# Construct PDF path based on JSON filename
|
||||
pdf_filename = json_path.stem.replace("_result", "_layout") + ".pdf"
|
||||
# Construct PDF path based on JSON filename and format
|
||||
format_suffix = "_reflow" if use_reflow else "_layout"
|
||||
pdf_filename = json_path.stem.replace("_result", format_suffix) + ".pdf"
|
||||
pdf_path = result_dir / pdf_filename
|
||||
|
||||
# Generate PDF if it doesn't exist
|
||||
if not pdf_path.exists():
|
||||
logger.info(f"Generating layout-preserving PDF for task {task_id}")
|
||||
logger.info(f"Generating {'reflow' if use_reflow else 'layout-preserving'} PDF for task {task_id}")
|
||||
|
||||
# Get source file path if available
|
||||
source_file = None
|
||||
@@ -714,12 +724,20 @@ async def download_pdf(
|
||||
if task_file and task_file.stored_path and Path(task_file.stored_path).exists():
|
||||
source_file = Path(task_file.stored_path)
|
||||
|
||||
# Generate PDF
|
||||
success = pdf_generator_service.generate_layout_pdf(
|
||||
json_path=json_path,
|
||||
output_path=pdf_path,
|
||||
source_file_path=source_file
|
||||
)
|
||||
# Generate PDF based on format
|
||||
if use_reflow:
|
||||
# For reflow, pass result_dir as source_file_path (contains extracted images)
|
||||
success = pdf_generator_service.generate_reflow_pdf(
|
||||
json_path=json_path,
|
||||
output_path=pdf_path,
|
||||
source_file_path=result_dir
|
||||
)
|
||||
else:
|
||||
success = pdf_generator_service.generate_layout_pdf(
|
||||
json_path=json_path,
|
||||
output_path=pdf_path,
|
||||
source_file_path=source_file
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
@@ -743,8 +761,10 @@ async def download_pdf(
|
||||
detail=error_msg
|
||||
)
|
||||
|
||||
# Return file
|
||||
filename = f"{task.filename or task_id}_result.pdf"
|
||||
# Return file with format indication in filename
|
||||
base_name = task.filename or task_id
|
||||
format_suffix = "_reflow" if use_reflow else "_layout"
|
||||
filename = f"{base_name}{format_suffix}.pdf"
|
||||
return FileResponse(
|
||||
path=str(pdf_path),
|
||||
filename=filename,
|
||||
|
||||
@@ -507,16 +507,18 @@ async def delete_translation(
|
||||
async def download_translated_pdf(
|
||||
task_id: str,
|
||||
lang: str = Query(..., description="Target language code"),
|
||||
format: str = Query("reflow", description="PDF format: 'layout' or 'reflow'"),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Download a translated PDF with layout preservation.
|
||||
Download a translated PDF.
|
||||
|
||||
- **task_id**: Task UUID
|
||||
- **lang**: Target language code (e.g., 'en', 'ja')
|
||||
- **format**: PDF format - 'layout' (preserves positions with text wrapping) or 'reflow' (flowing layout)
|
||||
|
||||
Returns PDF file with translated content preserving original layout.
|
||||
Returns PDF file with translated content.
|
||||
"""
|
||||
from app.services.pdf_generator_service import pdf_generator_service
|
||||
from app.services.translation_service import list_available_translations
|
||||
@@ -587,26 +589,37 @@ async def download_translated_pdf(
|
||||
detail="Invalid translation file format"
|
||||
)
|
||||
|
||||
# Validate format parameter
|
||||
use_layout = format.lower() == 'layout'
|
||||
|
||||
# Generate translated PDF to temp file
|
||||
output_filename = f"{task_id}_translated_{lang}.pdf"
|
||||
format_suffix = '_layout' if use_layout else '_reflow'
|
||||
output_filename = f"{task_id}_translated_{lang}{format_suffix}.pdf"
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file:
|
||||
output_path = Path(tmp_file.name)
|
||||
|
||||
try:
|
||||
# Get source file path for images if available
|
||||
source_file_path = None
|
||||
if task.files and len(task.files) > 0:
|
||||
stored_path = task.files[0].stored_path
|
||||
if stored_path and Path(stored_path).exists():
|
||||
source_file_path = Path(stored_path)
|
||||
# Use result_dir as image source (contains extracted images)
|
||||
image_dir = result_json_path.parent
|
||||
|
||||
success = pdf_generator_service.generate_translated_pdf(
|
||||
result_json_path=result_json_path,
|
||||
translation_json_path=translation_file,
|
||||
output_path=output_path,
|
||||
source_file_path=source_file_path
|
||||
)
|
||||
# Choose PDF generation method based on format
|
||||
if use_layout:
|
||||
# Layout mode: preserve original positions with text wrapping
|
||||
success = pdf_generator_service.generate_translated_layout_pdf(
|
||||
result_json_path=result_json_path,
|
||||
translation_json_path=translation_file,
|
||||
output_path=output_path,
|
||||
source_file_path=image_dir
|
||||
)
|
||||
else:
|
||||
# Reflow mode: flowing layout
|
||||
success = pdf_generator_service.generate_translated_pdf(
|
||||
result_json_path=result_json_path,
|
||||
translation_json_path=translation_file,
|
||||
output_path=output_path,
|
||||
source_file_path=image_dir
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
|
||||
Reference in New Issue
Block a user