feat: add translated PDF format selection (layout/reflow)

- Add generate_translated_layout_pdf() method for layout-preserving translated PDFs
- Add generate_translated_pdf() method for reflow translated PDFs
- Update translate router to accept format parameter (layout/reflow)
- Update frontend with dropdown to select translated PDF format
- Fix reflow PDF table cell extraction from content dict
- Add embedded images handling in reflow PDF tables
- Archive improve-translated-text-fitting openspec proposal

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-03 10:10:28 +08:00
parent 0dcea4a7e7
commit 08adf3d01d
15 changed files with 1384 additions and 1222 deletions

View File

@@ -507,16 +507,18 @@ async def delete_translation(
async def download_translated_pdf(
task_id: str,
lang: str = Query(..., description="Target language code"),
format: str = Query("reflow", description="PDF format: 'layout' or 'reflow'"),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Download a translated PDF with layout preservation.
Download a translated PDF.
- **task_id**: Task UUID
- **lang**: Target language code (e.g., 'en', 'ja')
- **format**: PDF format - 'layout' (preserves positions with text wrapping) or 'reflow' (flowing layout)
Returns PDF file with translated content preserving original layout.
Returns PDF file with translated content.
"""
from app.services.pdf_generator_service import pdf_generator_service
from app.services.translation_service import list_available_translations
@@ -587,26 +589,37 @@ async def download_translated_pdf(
detail="Invalid translation file format"
)
# Validate format parameter
use_layout = format.lower() == 'layout'
# Generate translated PDF to temp file
output_filename = f"{task_id}_translated_{lang}.pdf"
format_suffix = '_layout' if use_layout else '_reflow'
output_filename = f"{task_id}_translated_{lang}{format_suffix}.pdf"
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file:
output_path = Path(tmp_file.name)
try:
# Get source file path for images if available
source_file_path = None
if task.files and len(task.files) > 0:
stored_path = task.files[0].stored_path
if stored_path and Path(stored_path).exists():
source_file_path = Path(stored_path)
# Use result_dir as image source (contains extracted images)
image_dir = result_json_path.parent
success = pdf_generator_service.generate_translated_pdf(
result_json_path=result_json_path,
translation_json_path=translation_file,
output_path=output_path,
source_file_path=source_file_path
)
# Choose PDF generation method based on format
if use_layout:
# Layout mode: preserve original positions with text wrapping
success = pdf_generator_service.generate_translated_layout_pdf(
result_json_path=result_json_path,
translation_json_path=translation_file,
output_path=output_path,
source_file_path=image_dir
)
else:
# Reflow mode: flowing layout
success = pdf_generator_service.generate_translated_pdf(
result_json_path=result_json_path,
translation_json_path=translation_file,
output_path=output_path,
source_file_path=image_dir
)
if not success:
raise HTTPException(