feat: optimize task file generation and add visualization download

Backend changes:
- Disable PP-Structure debug file generation by default
- Separate raw_ocr_regions.json generation from debug flag (critical file)
- Add visualization folder download endpoint as ZIP
- Add has_visualization field to TaskDetailResponse
- Stop generating Markdown files
- Save translated PDFs to task folder with caching

Frontend changes:
- Replace JSON/MD download buttons with PDF buttons in TaskHistoryPage
- Add visualization download button in TaskDetailPage
- Fix Processing page task switching issue (reset isNotFound)

Archives two OpenSpec proposals:
- optimize-task-files-and-visualization
- simplify-frontend-add-billing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-12 19:11:50 +08:00
parent 65abd51d60
commit efa7e4175c
14 changed files with 534 additions and 97 deletions

View File

@@ -637,17 +637,31 @@ async def download_translated_pdf(
# Validate format parameter
use_layout = format.lower() == 'layout'
# Generate translated PDF to temp file
# Generate translated PDF to task result folder (not temp)
# Use base name from result JSON (e.g., "scan" or "edit")
result_dir = result_json_path.parent
base_name = result_json_path.stem.replace('_result', '')
format_suffix = '_layout' if use_layout else '_reflow'
output_filename = f"{task_id}_translated_{lang}{format_suffix}.pdf"
output_filename = f"{base_name}_translated_{lang}{format_suffix}.pdf"
output_path = result_dir / output_filename
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file:
output_path = Path(tmp_file.name)
# Check if PDF already exists and is newer than translation JSON
if output_path.exists():
pdf_mtime = output_path.stat().st_mtime
translation_mtime = translation_file.stat().st_mtime
if pdf_mtime >= translation_mtime:
# PDF is up-to-date, serve directly
logger.info(f"Serving cached translated PDF: {output_path}")
return FileResponse(
path=str(output_path),
filename=output_filename,
media_type="application/pdf",
headers={
"Content-Disposition": f'attachment; filename="{output_filename}"'
}
)
try:
# Use result_dir as image source (contains extracted images)
image_dir = result_json_path.parent
# Choose PDF generation method based on format
if use_layout:
# Layout mode: preserve original positions with text wrapping
@@ -655,7 +669,7 @@ async def download_translated_pdf(
result_json_path=result_json_path,
translation_json_path=translation_file,
output_path=output_path,
source_file_path=image_dir
source_file_path=result_dir
)
else:
# Reflow mode: flowing layout
@@ -663,7 +677,7 @@ async def download_translated_pdf(
result_json_path=result_json_path,
translation_json_path=translation_file,
output_path=output_path,
source_file_path=image_dir
source_file_path=result_dir
)
if not success:
@@ -672,7 +686,7 @@ async def download_translated_pdf(
detail="Failed to generate translated PDF"
)
logger.info(f"Generated translated PDF for task {task_id}, lang={lang}")
logger.info(f"Generated translated PDF: {output_path}")
return FileResponse(
path=str(output_path),
@@ -684,14 +698,8 @@ async def download_translated_pdf(
)
except HTTPException:
# Clean up temp file on HTTP errors
if output_path.exists():
output_path.unlink()
raise
except Exception as e:
# Clean up temp file on unexpected errors
if output_path.exists():
output_path.unlink()
logger.exception(f"Failed to generate translated PDF for task {task_id}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,