feat: optimize task file generation and add visualization download

Backend changes: - Disable PP-Structure debug file generation by default - Separate raw_ocr_regions.json generation from debug flag (critical file) - Add visualization folder download endpoint as ZIP - Add has_visualization field to TaskDetailResponse - Stop generating Markdown files - Save translated PDFs to task folder with caching Frontend changes: - Replace JSON/MD download buttons with PDF buttons in TaskHistoryPage - Add visualization download button in TaskDetailPage - Fix Processing page task switching issue (reset isNotFound) Archives two OpenSpec proposals: - optimize-task-files-and-visualization - simplify-frontend-add-billing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 19:11:50 +08:00
parent 65abd51d60
commit efa7e4175c
14 changed files with 534 additions and 97 deletions
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -376,8 +376,8 @@ class Settings(BaseSettings):

    # ===== Debug Configuration =====
    # Enable debug outputs for PP-StructureV3 analysis
-    pp_structure_debug_enabled: bool = Field(default=True)  # Save debug files for PP-StructureV3
-    pp_structure_debug_visualization: bool = Field(default=True)  # Generate visualization images
+    pp_structure_debug_enabled: bool = Field(default=False)  # Save debug files for PP-StructureV3
+    pp_structure_debug_visualization: bool = Field(default=False)  # Generate visualization images

    # Performance tuning
    use_fp16_inference: bool = Field(default=False)  # Half-precision (if supported)
--- a/backend/app/routers/tasks.py
+++ b/backend/app/routers/tasks.py
@@ -10,9 +10,11 @@ import shutil
 import hashlib

 from fastapi import APIRouter, Depends, HTTPException, status, Query, UploadFile, File, BackgroundTasks
-from fastapi.responses import FileResponse
+from fastapi.responses import FileResponse, StreamingResponse
 from sqlalchemy.orm import Session
 import json
+import zipfile
+import io
 from datetime import datetime

 from app.core.deps import get_db, get_current_user
@@ -413,8 +415,6 @@ async def get_task(
    processing_track = None
    if task.result_json_path:
        try:
-            import json
-            from pathlib import Path
            result_path = Path(task.result_json_path)
            if result_path.exists():
                with open(result_path) as f:
@@ -430,9 +430,18 @@ async def get_task(
        except Exception:
            pass  # Silently ignore errors reading the result file

-    # Create response with processing_track
+    # Check for visualization folder (OCR Track only)
+    has_visualization = False
+    result_dir = Path(settings.result_dir) / task_id
+    visualization_dir = result_dir / "visualization"
+    if visualization_dir.exists() and visualization_dir.is_dir():
+        png_files = list(visualization_dir.glob("*.png"))
+        has_visualization = len(png_files) > 0
+
+    # Create response with processing_track and has_visualization
    response = TaskDetailResponse.model_validate(task)
    response.processing_track = processing_track
+    response.has_visualization = has_visualization
    return response


@@ -1198,6 +1207,87 @@ async def download_unified(
        )


+@router.get("/{task_id}/download/visualization", summary="Download visualization images as ZIP")
+async def download_visualization_zip(
+    task_id: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    """
+    Download visualization images as ZIP file.
+
+    Only available for OCR Track tasks with visualization folder.
+    Returns a ZIP file containing all PNG images from the visualization folder.
+
+    - **task_id**: Task UUID
+    """
+    try:
+        # Get task details
+        task = task_service.get_task_by_id(
+            db=db,
+            task_id=task_id,
+            user_id=current_user.id
+        )
+
+        if not task:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="Task not found"
+            )
+
+        if task.status != TaskStatus.COMPLETED:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Task not completed"
+            )
+
+        # Check for visualization folder
+        result_dir = Path(settings.result_dir) / task_id
+        visualization_dir = result_dir / "visualization"
+
+        if not visualization_dir.exists() or not visualization_dir.is_dir():
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="Visualization folder not found. This feature is only available for OCR Track tasks."
+            )
+
+        # Get all PNG files
+        png_files = list(visualization_dir.glob("*.png"))
+        if not png_files:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="No visualization images found"
+            )
+
+        # Create ZIP in memory
+        zip_buffer = io.BytesIO()
+        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+            for png_file in sorted(png_files):
+                # Add file with relative name (just the filename)
+                zip_file.write(png_file, png_file.name)
+
+        zip_buffer.seek(0)
+
+        logger.info(f"Created visualization ZIP for task {task_id} with {len(png_files)} images")
+
+        return StreamingResponse(
+            zip_buffer,
+            media_type="application/zip",
+            headers={
+                "Content-Disposition": f"attachment; filename={task_id}_visualization.zip"
+            }
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(f"Failed to download visualization for task {task_id}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to download: {str(e)}"
+        )
+
+
 # ===== Preprocessing Preview Endpoints =====

@router.post("/{task_id}/preview/preprocessing", response_model=PreprocessingPreviewResponse, summary="Preview preprocessing effect")
--- a/backend/app/routers/translate.py
+++ b/backend/app/routers/translate.py
@@ -637,17 +637,31 @@ async def download_translated_pdf(
    # Validate format parameter
    use_layout = format.lower() == 'layout'

-    # Generate translated PDF to temp file
+    # Generate translated PDF to task result folder (not temp)
+    # Use base name from result JSON (e.g., "scan" or "edit")
+    result_dir = result_json_path.parent
+    base_name = result_json_path.stem.replace('_result', '')
    format_suffix = '_layout' if use_layout else '_reflow'
-    output_filename = f"{task_id}_translated_{lang}{format_suffix}.pdf"
+    output_filename = f"{base_name}_translated_{lang}{format_suffix}.pdf"
+    output_path = result_dir / output_filename

-    with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file:
-        output_path = Path(tmp_file.name)
+    # Check if PDF already exists and is newer than translation JSON
+    if output_path.exists():
+        pdf_mtime = output_path.stat().st_mtime
+        translation_mtime = translation_file.stat().st_mtime
+        if pdf_mtime >= translation_mtime:
+            # PDF is up-to-date, serve directly
+            logger.info(f"Serving cached translated PDF: {output_path}")
+            return FileResponse(
+                path=str(output_path),
+                filename=output_filename,
+                media_type="application/pdf",
+                headers={
+                    "Content-Disposition": f'attachment; filename="{output_filename}"'
+                }
+            )

    try:
-        # Use result_dir as image source (contains extracted images)
-        image_dir = result_json_path.parent
-
        # Choose PDF generation method based on format
        if use_layout:
            # Layout mode: preserve original positions with text wrapping
@@ -655,7 +669,7 @@ async def download_translated_pdf(
                result_json_path=result_json_path,
                translation_json_path=translation_file,
                output_path=output_path,
-                source_file_path=image_dir
+                source_file_path=result_dir
            )
        else:
            # Reflow mode: flowing layout
@@ -663,7 +677,7 @@ async def download_translated_pdf(
                result_json_path=result_json_path,
                translation_json_path=translation_file,
                output_path=output_path,
-                source_file_path=image_dir
+                source_file_path=result_dir
            )

        if not success:
@@ -672,7 +686,7 @@ async def download_translated_pdf(
                detail="Failed to generate translated PDF"
            )

-        logger.info(f"Generated translated PDF for task {task_id}, lang={lang}")
+        logger.info(f"Generated translated PDF: {output_path}")

        return FileResponse(
            path=str(output_path),
@@ -684,14 +698,8 @@ async def download_translated_pdf(
        )

    except HTTPException:
-        # Clean up temp file on HTTP errors
-        if output_path.exists():
-            output_path.unlink()
        raise
    except Exception as e:
-        # Clean up temp file on unexpected errors
-        if output_path.exists():
-            output_path.unlink()
        logger.exception(f"Failed to generate translated PDF for task {task_id}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
--- a/backend/app/schemas/task.py
+++ b/backend/app/schemas/task.py
@@ -187,6 +187,8 @@ class TaskDetailResponse(TaskResponse):
    files: List[TaskFileResponse] = []
    # Dual-track processing field (extracted from result metadata)
    processing_track: Optional[ProcessingTrackEnum] = None
+    # Visualization availability (OCR Track only)
+    has_visualization: bool = False


 class TaskListResponse(BaseModel):
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -1510,14 +1510,25 @@ class OCRService:
                    'height': ocr_height
                }]

-                # Generate PP-StructureV3 debug outputs if enabled
+                # Always save raw_ocr_regions.json (required for PDF generation and translation)
+                if output_dir:
+                    try:
+                        import json
+                        ocr_json_path = output_dir / f"{image_path.stem}_raw_ocr_regions.json"
+                        with open(ocr_json_path, 'w', encoding='utf-8') as f:
+                            json.dump(text_regions, f, ensure_ascii=False, indent=2)
+                        logger.info(f"Saved raw OCR regions to {ocr_json_path}")
+                    except Exception as ocr_save_error:
+                        logger.warning(f"Failed to save raw OCR regions: {ocr_save_error}")
+
+                # Generate PP-StructureV3 debug outputs if enabled (debug files only)
                if settings.pp_structure_debug_enabled and output_dir:
                    try:
                        from app.services.pp_structure_debug import PPStructureDebug
                        debug_service = PPStructureDebug(output_dir)

-                        # Save raw results as JSON
-                        debug_service.save_raw_results(
+                        # Save PP-Structure raw results and summary (debug only)
+                        debug_service.save_debug_results(
                            pp_structure_results={
                                'elements': layout_data.get('elements', []),
                                'total_elements': layout_data.get('total_elements', 0),
@@ -2536,7 +2547,7 @@ class OCRService:
        source_file_path: Optional[Path] = None
    ) -> Tuple[Optional[Path], Optional[Path], Optional[Path]]:
        """
-        Save OCR results to JSON, Markdown, and layout-preserving PDF files
+        Save OCR results to JSON and layout-preserving PDF files

        Args:
            result: OCR result (UnifiedDocument or dictionary)
@@ -2546,9 +2557,11 @@ class OCRService:

        Returns:
            Tuple of (json_path, markdown_path, pdf_path)
+            Note: markdown_path is always None (Markdown generation removed)
        """
        try:
            output_dir.mkdir(parents=True, exist_ok=True)
+            markdown_path = None  # Markdown generation removed

            # Use UnifiedDocumentExporter for standardized export
            if isinstance(result, UnifiedDocument) and UnifiedDocumentExporter is not None:
@@ -2560,31 +2573,16 @@ class OCRService:
                    include_metadata=True,
                    include_statistics=True
                )
-
-                markdown_path = output_dir / f"{file_id}_output.md"
-                UnifiedDocumentExporter.export_to_markdown(
-                    result,
-                    markdown_path,
-                    include_metadata_header=False  # Keep output clean
-                )
-
-                markdown_content = result.extract_all_text()
            else:
                # Legacy path for dict results
                result_dict = result if isinstance(result, dict) else result.to_dict()
-                markdown_content = result.get('markdown_content', '') if isinstance(result, dict) else ''

                # Save JSON
                json_path = output_dir / f"{file_id}_result.json"
                with open(json_path, 'w', encoding='utf-8') as f:
                    json.dump(result_dict, f, ensure_ascii=False, indent=2)

-                # Save Markdown
-                markdown_path = output_dir / f"{file_id}_output.md"
-                with open(markdown_path, 'w', encoding='utf-8') as f:
-                    f.write(markdown_content)
-
-            logger.info(f"Results saved: {json_path.name}, {markdown_path.name}")
+            logger.info(f"Results saved: {json_path.name}")

            # Generate layout-preserving PDF
            pdf_path = None
--- a/backend/app/services/pp_structure_debug.py
+++ b/backend/app/services/pp_structure_debug.py
@@ -107,6 +107,50 @@ class PPStructureDebug:

        return saved_files

+    def save_debug_results(
+        self,
+        pp_structure_results: Dict[str, Any],
+        raw_ocr_regions: List[Dict[str, Any]],
+        filename_prefix: str = "debug"
+    ) -> Dict[str, Path]:
+        """
+        Save debug-only files (PP-Structure raw results and summary).
+        Does NOT save raw_ocr_regions.json (that's handled separately).
+
+        Args:
+            pp_structure_results: Raw PP-StructureV3 analysis results
+            raw_ocr_regions: Raw OCR text regions (for summary generation only)
+            filename_prefix: Prefix for output files
+
+        Returns:
+            Dictionary with paths to saved files
+        """
+        saved_files = {}
+
+        # Save PP-StructureV3 results
+        pp_json_path = self.output_dir / f"{filename_prefix}_pp_structure_raw.json"
+        try:
+            serializable_results = self._make_serializable(pp_structure_results)
+            with open(pp_json_path, 'w', encoding='utf-8') as f:
+                json.dump(serializable_results, f, ensure_ascii=False, indent=2)
+            saved_files['pp_structure'] = pp_json_path
+            logger.info(f"Saved PP-StructureV3 raw results to {pp_json_path}")
+        except Exception as e:
+            logger.error(f"Failed to save PP-StructureV3 results: {e}")
+
+        # Save summary comparison
+        summary_path = self.output_dir / f"{filename_prefix}_debug_summary.json"
+        try:
+            summary = self._generate_summary(pp_structure_results, raw_ocr_regions)
+            with open(summary_path, 'w', encoding='utf-8') as f:
+                json.dump(summary, f, ensure_ascii=False, indent=2)
+            saved_files['summary'] = summary_path
+            logger.info(f"Saved debug summary to {summary_path}")
+        except Exception as e:
+            logger.error(f"Failed to save debug summary: {e}")
+
+        return saved_files
+
    def generate_visualization(
        self,
        image_path: Path,
--- a/backend/app/services/unified_document_exporter.py
+++ b/backend/app/services/unified_document_exporter.py
@@ -255,14 +255,8 @@ class UnifiedDocumentExporter:
            logger.error(f"Failed to export JSON: {e}")
            results['json'] = None

-        # Export Markdown
-        try:
-            md_path = output_dir / f"{file_id}_output.md"
-            UnifiedDocumentExporter.export_to_markdown(document, md_path)
-            results['markdown'] = md_path
-        except Exception as e:
-            logger.error(f"Failed to export Markdown: {e}")
-            results['markdown'] = None
+        # Markdown export removed - no longer generating _output.md files
+        results['markdown'] = None

        # Export plain text
        try:
@@ -469,13 +463,13 @@ def save_unified_document(
        document: The UnifiedDocument to save
        output_dir: Output directory
        file_id: Base filename
-        formats: List of formats to export (default: ['json', 'markdown'])
+        formats: List of formats to export (default: ['json'])

    Returns:
        Dictionary mapping format names to output paths
    """
    if formats is None:
-        formats = ['json', 'markdown']
+        formats = ['json']

    results = {}
    output_dir = Path(output_dir)
@@ -488,9 +482,9 @@ def save_unified_document(
                UnifiedDocumentExporter.export_to_json(document, path)
                results['json'] = path
            elif fmt == 'markdown':
-                path = output_dir / f"{file_id}_output.md"
-                UnifiedDocumentExporter.export_to_markdown(document, path)
-                results['markdown'] = path
+                # Markdown export removed - skip silently
+                results['markdown'] = None
+                continue
            elif fmt == 'text':
                path = output_dir / f"{file_id}_text.txt"
                UnifiedDocumentExporter.export_to_text(document, path)