feat: optimize task file generation and add visualization download

Backend changes:
- Disable PP-Structure debug file generation by default
- Separate raw_ocr_regions.json generation from debug flag (critical file)
- Add visualization folder download endpoint as ZIP
- Add has_visualization field to TaskDetailResponse
- Stop generating Markdown files
- Save translated PDFs to task folder with caching

Frontend changes:
- Replace JSON/MD download buttons with PDF buttons in TaskHistoryPage
- Add visualization download button in TaskDetailPage
- Fix Processing page task switching issue (reset isNotFound)

Archives two OpenSpec proposals:
- optimize-task-files-and-visualization
- simplify-frontend-add-billing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-12 19:11:50 +08:00
parent 65abd51d60
commit efa7e4175c
14 changed files with 534 additions and 97 deletions

View File

@@ -10,9 +10,11 @@ import shutil
import hashlib
from fastapi import APIRouter, Depends, HTTPException, status, Query, UploadFile, File, BackgroundTasks
from fastapi.responses import FileResponse
from fastapi.responses import FileResponse, StreamingResponse
from sqlalchemy.orm import Session
import json
import zipfile
import io
from datetime import datetime
from app.core.deps import get_db, get_current_user
@@ -413,8 +415,6 @@ async def get_task(
processing_track = None
if task.result_json_path:
try:
import json
from pathlib import Path
result_path = Path(task.result_json_path)
if result_path.exists():
with open(result_path) as f:
@@ -430,9 +430,18 @@ async def get_task(
except Exception:
pass # Silently ignore errors reading the result file
# Create response with processing_track
# Check for visualization folder (OCR Track only)
has_visualization = False
result_dir = Path(settings.result_dir) / task_id
visualization_dir = result_dir / "visualization"
if visualization_dir.exists() and visualization_dir.is_dir():
png_files = list(visualization_dir.glob("*.png"))
has_visualization = len(png_files) > 0
# Create response with processing_track and has_visualization
response = TaskDetailResponse.model_validate(task)
response.processing_track = processing_track
response.has_visualization = has_visualization
return response
@@ -1198,6 +1207,87 @@ async def download_unified(
)
@router.get("/{task_id}/download/visualization", summary="Download visualization images as ZIP")
async def download_visualization_zip(
task_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Download visualization images as ZIP file.
Only available for OCR Track tasks with visualization folder.
Returns a ZIP file containing all PNG images from the visualization folder.
- **task_id**: Task UUID
"""
try:
# Get task details
task = task_service.get_task_by_id(
db=db,
task_id=task_id,
user_id=current_user.id
)
if not task:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Task not found"
)
if task.status != TaskStatus.COMPLETED:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Task not completed"
)
# Check for visualization folder
result_dir = Path(settings.result_dir) / task_id
visualization_dir = result_dir / "visualization"
if not visualization_dir.exists() or not visualization_dir.is_dir():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Visualization folder not found. This feature is only available for OCR Track tasks."
)
# Get all PNG files
png_files = list(visualization_dir.glob("*.png"))
if not png_files:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No visualization images found"
)
# Create ZIP in memory
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for png_file in sorted(png_files):
# Add file with relative name (just the filename)
zip_file.write(png_file, png_file.name)
zip_buffer.seek(0)
logger.info(f"Created visualization ZIP for task {task_id} with {len(png_files)} images")
return StreamingResponse(
zip_buffer,
media_type="application/zip",
headers={
"Content-Disposition": f"attachment; filename={task_id}_visualization.zip"
}
)
except HTTPException:
raise
except Exception as e:
logger.exception(f"Failed to download visualization for task {task_id}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to download: {str(e)}"
)
# ===== Preprocessing Preview Endpoints =====
@router.post("/{task_id}/preview/preprocessing", response_model=PreprocessingPreviewResponse, summary="Preview preprocessing effect")