From efa7e4175c273d15eff5ca14ffa00b3fadc865de Mon Sep 17 00:00:00 2001
From: egg <lin4637lin4637@gmail.com>
Date: Fri, 12 Dec 2025 19:11:50 +0800
Subject: [PATCH] feat: optimize task file generation and add visualization
 download
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Backend changes:
- Disable PP-Structure debug file generation by default
- Separate raw_ocr_regions.json generation from debug flag (critical file)
- Add visualization folder download endpoint as ZIP
- Add has_visualization field to TaskDetailResponse
- Stop generating Markdown files
- Save translated PDFs to task folder with caching

Frontend changes:
- Replace JSON/MD download buttons with PDF buttons in TaskHistoryPage
- Add visualization download button in TaskDetailPage
- Fix Processing page task switching issue (reset isNotFound)

Archives two OpenSpec proposals:
- optimize-task-files-and-visualization
- simplify-frontend-add-billing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 backend/app/core/config.py                    |   4 +-
 backend/app/routers/tasks.py                  |  98 ++++++++-
 backend/app/routers/translate.py              |  40 ++--
 backend/app/schemas/task.py                   |   2 +
 backend/app/services/ocr_service.py           |  38 ++--
 backend/app/services/pp_structure_debug.py    |  44 ++++
 .../app/services/unified_document_exporter.py |  20 +-
 frontend/src/hooks/useTaskValidation.ts       |   5 +
 frontend/src/pages/TaskDetailPage.tsx         |  31 +++
 frontend/src/pages/TaskHistoryPage.tsx        |  63 ++----
 frontend/src/services/apiV2.ts                |  16 ++
 frontend/src/types/apiV2.ts                   |   1 +
 .../proposal.md                               | 201 ++++++++++++++++++
 .../tasks.md                                  |  68 ++++++
 14 files changed, 534 insertions(+), 97 deletions(-)
 create mode 100644 openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/proposal.md
 create mode 100644 openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/tasks.md

diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index 0840c3f..91905da 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -376,8 +376,8 @@ class Settings(BaseSettings):
 
     # ===== Debug Configuration =====
     # Enable debug outputs for PP-StructureV3 analysis
-    pp_structure_debug_enabled: bool = Field(default=True)  # Save debug files for PP-StructureV3
-    pp_structure_debug_visualization: bool = Field(default=True)  # Generate visualization images
+    pp_structure_debug_enabled: bool = Field(default=False)  # Save debug files for PP-StructureV3
+    pp_structure_debug_visualization: bool = Field(default=False)  # Generate visualization images
 
     # Performance tuning
     use_fp16_inference: bool = Field(default=False)  # Half-precision (if supported)
diff --git a/backend/app/routers/tasks.py b/backend/app/routers/tasks.py
index 14b66fe..a9ab3d1 100644
--- a/backend/app/routers/tasks.py
+++ b/backend/app/routers/tasks.py
@@ -10,9 +10,11 @@ import shutil
 import hashlib
 
 from fastapi import APIRouter, Depends, HTTPException, status, Query, UploadFile, File, BackgroundTasks
-from fastapi.responses import FileResponse
+from fastapi.responses import FileResponse, StreamingResponse
 from sqlalchemy.orm import Session
 import json
+import zipfile
+import io
 from datetime import datetime
 
 from app.core.deps import get_db, get_current_user
@@ -413,8 +415,6 @@ async def get_task(
     processing_track = None
     if task.result_json_path:
         try:
-            import json
-            from pathlib import Path
             result_path = Path(task.result_json_path)
             if result_path.exists():
                 with open(result_path) as f:
@@ -430,9 +430,18 @@ async def get_task(
         except Exception:
             pass  # Silently ignore errors reading the result file
 
-    # Create response with processing_track
+    # Check for visualization folder (OCR Track only)
+    has_visualization = False
+    result_dir = Path(settings.result_dir) / task_id
+    visualization_dir = result_dir / "visualization"
+    if visualization_dir.exists() and visualization_dir.is_dir():
+        png_files = list(visualization_dir.glob("*.png"))
+        has_visualization = len(png_files) > 0
+
+    # Create response with processing_track and has_visualization
     response = TaskDetailResponse.model_validate(task)
     response.processing_track = processing_track
+    response.has_visualization = has_visualization
     return response
 
 
@@ -1198,6 +1207,87 @@ async def download_unified(
         )
 
 
+@router.get("/{task_id}/download/visualization", summary="Download visualization images as ZIP")
+async def download_visualization_zip(
+    task_id: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    """
+    Download visualization images as ZIP file.
+
+    Only available for OCR Track tasks with visualization folder.
+    Returns a ZIP file containing all PNG images from the visualization folder.
+
+    - **task_id**: Task UUID
+    """
+    try:
+        # Get task details
+        task = task_service.get_task_by_id(
+            db=db,
+            task_id=task_id,
+            user_id=current_user.id
+        )
+
+        if not task:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="Task not found"
+            )
+
+        if task.status != TaskStatus.COMPLETED:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Task not completed"
+            )
+
+        # Check for visualization folder
+        result_dir = Path(settings.result_dir) / task_id
+        visualization_dir = result_dir / "visualization"
+
+        if not visualization_dir.exists() or not visualization_dir.is_dir():
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="Visualization folder not found. This feature is only available for OCR Track tasks."
+            )
+
+        # Get all PNG files
+        png_files = list(visualization_dir.glob("*.png"))
+        if not png_files:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="No visualization images found"
+            )
+
+        # Create ZIP in memory
+        zip_buffer = io.BytesIO()
+        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+            for png_file in sorted(png_files):
+                # Add file with relative name (just the filename)
+                zip_file.write(png_file, png_file.name)
+
+        zip_buffer.seek(0)
+
+        logger.info(f"Created visualization ZIP for task {task_id} with {len(png_files)} images")
+
+        return StreamingResponse(
+            zip_buffer,
+            media_type="application/zip",
+            headers={
+                "Content-Disposition": f"attachment; filename={task_id}_visualization.zip"
+            }
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(f"Failed to download visualization for task {task_id}")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to download: {str(e)}"
+        )
+
+
 # ===== Preprocessing Preview Endpoints =====
 
 @router.post("/{task_id}/preview/preprocessing", response_model=PreprocessingPreviewResponse, summary="Preview preprocessing effect")
diff --git a/backend/app/routers/translate.py b/backend/app/routers/translate.py
index bbe9e12..87b93d7 100644
--- a/backend/app/routers/translate.py
+++ b/backend/app/routers/translate.py
@@ -637,17 +637,31 @@ async def download_translated_pdf(
     # Validate format parameter
     use_layout = format.lower() == 'layout'
 
-    # Generate translated PDF to temp file
+    # Generate translated PDF to task result folder (not temp)
+    # Use base name from result JSON (e.g., "scan" or "edit")
+    result_dir = result_json_path.parent
+    base_name = result_json_path.stem.replace('_result', '')
     format_suffix = '_layout' if use_layout else '_reflow'
-    output_filename = f"{task_id}_translated_{lang}{format_suffix}.pdf"
+    output_filename = f"{base_name}_translated_{lang}{format_suffix}.pdf"
+    output_path = result_dir / output_filename
 
-    with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file:
-        output_path = Path(tmp_file.name)
+    # Check if PDF already exists and is newer than translation JSON
+    if output_path.exists():
+        pdf_mtime = output_path.stat().st_mtime
+        translation_mtime = translation_file.stat().st_mtime
+        if pdf_mtime >= translation_mtime:
+            # PDF is up-to-date, serve directly
+            logger.info(f"Serving cached translated PDF: {output_path}")
+            return FileResponse(
+                path=str(output_path),
+                filename=output_filename,
+                media_type="application/pdf",
+                headers={
+                    "Content-Disposition": f'attachment; filename="{output_filename}"'
+                }
+            )
 
     try:
-        # Use result_dir as image source (contains extracted images)
-        image_dir = result_json_path.parent
-
         # Choose PDF generation method based on format
         if use_layout:
             # Layout mode: preserve original positions with text wrapping
@@ -655,7 +669,7 @@ async def download_translated_pdf(
                 result_json_path=result_json_path,
                 translation_json_path=translation_file,
                 output_path=output_path,
-                source_file_path=image_dir
+                source_file_path=result_dir
             )
         else:
             # Reflow mode: flowing layout
@@ -663,7 +677,7 @@ async def download_translated_pdf(
                 result_json_path=result_json_path,
                 translation_json_path=translation_file,
                 output_path=output_path,
-                source_file_path=image_dir
+                source_file_path=result_dir
             )
 
         if not success:
@@ -672,7 +686,7 @@ async def download_translated_pdf(
                 detail="Failed to generate translated PDF"
             )
 
-        logger.info(f"Generated translated PDF for task {task_id}, lang={lang}")
+        logger.info(f"Generated translated PDF: {output_path}")
 
         return FileResponse(
             path=str(output_path),
@@ -684,14 +698,8 @@ async def download_translated_pdf(
         )
 
     except HTTPException:
-        # Clean up temp file on HTTP errors
-        if output_path.exists():
-            output_path.unlink()
         raise
     except Exception as e:
-        # Clean up temp file on unexpected errors
-        if output_path.exists():
-            output_path.unlink()
         logger.exception(f"Failed to generate translated PDF for task {task_id}")
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
diff --git a/backend/app/schemas/task.py b/backend/app/schemas/task.py
index 03ec9b4..2ed2d14 100644
--- a/backend/app/schemas/task.py
+++ b/backend/app/schemas/task.py
@@ -187,6 +187,8 @@ class TaskDetailResponse(TaskResponse):
     files: List[TaskFileResponse] = []
     # Dual-track processing field (extracted from result metadata)
     processing_track: Optional[ProcessingTrackEnum] = None
+    # Visualization availability (OCR Track only)
+    has_visualization: bool = False
 
 
 class TaskListResponse(BaseModel):
diff --git a/backend/app/services/ocr_service.py b/backend/app/services/ocr_service.py
index 3d603c0..69b14c1 100644
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -1510,14 +1510,25 @@ class OCRService:
                     'height': ocr_height
                 }]
 
-                # Generate PP-StructureV3 debug outputs if enabled
+                # Always save raw_ocr_regions.json (required for PDF generation and translation)
+                if output_dir:
+                    try:
+                        import json
+                        ocr_json_path = output_dir / f"{image_path.stem}_raw_ocr_regions.json"
+                        with open(ocr_json_path, 'w', encoding='utf-8') as f:
+                            json.dump(text_regions, f, ensure_ascii=False, indent=2)
+                        logger.info(f"Saved raw OCR regions to {ocr_json_path}")
+                    except Exception as ocr_save_error:
+                        logger.warning(f"Failed to save raw OCR regions: {ocr_save_error}")
+
+                # Generate PP-StructureV3 debug outputs if enabled (debug files only)
                 if settings.pp_structure_debug_enabled and output_dir:
                     try:
                         from app.services.pp_structure_debug import PPStructureDebug
                         debug_service = PPStructureDebug(output_dir)
 
-                        # Save raw results as JSON
-                        debug_service.save_raw_results(
+                        # Save PP-Structure raw results and summary (debug only)
+                        debug_service.save_debug_results(
                             pp_structure_results={
                                 'elements': layout_data.get('elements', []),
                                 'total_elements': layout_data.get('total_elements', 0),
@@ -2536,7 +2547,7 @@ class OCRService:
         source_file_path: Optional[Path] = None
     ) -> Tuple[Optional[Path], Optional[Path], Optional[Path]]:
         """
-        Save OCR results to JSON, Markdown, and layout-preserving PDF files
+        Save OCR results to JSON and layout-preserving PDF files
 
         Args:
             result: OCR result (UnifiedDocument or dictionary)
@@ -2546,9 +2557,11 @@ class OCRService:
 
         Returns:
             Tuple of (json_path, markdown_path, pdf_path)
+            Note: markdown_path is always None (Markdown generation removed)
         """
         try:
             output_dir.mkdir(parents=True, exist_ok=True)
+            markdown_path = None  # Markdown generation removed
 
             # Use UnifiedDocumentExporter for standardized export
             if isinstance(result, UnifiedDocument) and UnifiedDocumentExporter is not None:
@@ -2560,31 +2573,16 @@ class OCRService:
                     include_metadata=True,
                     include_statistics=True
                 )
-
-                markdown_path = output_dir / f"{file_id}_output.md"
-                UnifiedDocumentExporter.export_to_markdown(
-                    result,
-                    markdown_path,
-                    include_metadata_header=False  # Keep output clean
-                )
-
-                markdown_content = result.extract_all_text()
             else:
                 # Legacy path for dict results
                 result_dict = result if isinstance(result, dict) else result.to_dict()
-                markdown_content = result.get('markdown_content', '') if isinstance(result, dict) else ''
 
                 # Save JSON
                 json_path = output_dir / f"{file_id}_result.json"
                 with open(json_path, 'w', encoding='utf-8') as f:
                     json.dump(result_dict, f, ensure_ascii=False, indent=2)
 
-                # Save Markdown
-                markdown_path = output_dir / f"{file_id}_output.md"
-                with open(markdown_path, 'w', encoding='utf-8') as f:
-                    f.write(markdown_content)
-
-            logger.info(f"Results saved: {json_path.name}, {markdown_path.name}")
+            logger.info(f"Results saved: {json_path.name}")
 
             # Generate layout-preserving PDF
             pdf_path = None
diff --git a/backend/app/services/pp_structure_debug.py b/backend/app/services/pp_structure_debug.py
index b7c1b82..1ea6173 100644
--- a/backend/app/services/pp_structure_debug.py
+++ b/backend/app/services/pp_structure_debug.py
@@ -107,6 +107,50 @@ class PPStructureDebug:
 
         return saved_files
 
+    def save_debug_results(
+        self,
+        pp_structure_results: Dict[str, Any],
+        raw_ocr_regions: List[Dict[str, Any]],
+        filename_prefix: str = "debug"
+    ) -> Dict[str, Path]:
+        """
+        Save debug-only files (PP-Structure raw results and summary).
+        Does NOT save raw_ocr_regions.json (that's handled separately).
+
+        Args:
+            pp_structure_results: Raw PP-StructureV3 analysis results
+            raw_ocr_regions: Raw OCR text regions (for summary generation only)
+            filename_prefix: Prefix for output files
+
+        Returns:
+            Dictionary with paths to saved files
+        """
+        saved_files = {}
+
+        # Save PP-StructureV3 results
+        pp_json_path = self.output_dir / f"{filename_prefix}_pp_structure_raw.json"
+        try:
+            serializable_results = self._make_serializable(pp_structure_results)
+            with open(pp_json_path, 'w', encoding='utf-8') as f:
+                json.dump(serializable_results, f, ensure_ascii=False, indent=2)
+            saved_files['pp_structure'] = pp_json_path
+            logger.info(f"Saved PP-StructureV3 raw results to {pp_json_path}")
+        except Exception as e:
+            logger.error(f"Failed to save PP-StructureV3 results: {e}")
+
+        # Save summary comparison
+        summary_path = self.output_dir / f"{filename_prefix}_debug_summary.json"
+        try:
+            summary = self._generate_summary(pp_structure_results, raw_ocr_regions)
+            with open(summary_path, 'w', encoding='utf-8') as f:
+                json.dump(summary, f, ensure_ascii=False, indent=2)
+            saved_files['summary'] = summary_path
+            logger.info(f"Saved debug summary to {summary_path}")
+        except Exception as e:
+            logger.error(f"Failed to save debug summary: {e}")
+
+        return saved_files
+
     def generate_visualization(
         self,
         image_path: Path,
diff --git a/backend/app/services/unified_document_exporter.py b/backend/app/services/unified_document_exporter.py
index 10dd809..d3fa46a 100644
--- a/backend/app/services/unified_document_exporter.py
+++ b/backend/app/services/unified_document_exporter.py
@@ -255,14 +255,8 @@ class UnifiedDocumentExporter:
             logger.error(f"Failed to export JSON: {e}")
             results['json'] = None
 
-        # Export Markdown
-        try:
-            md_path = output_dir / f"{file_id}_output.md"
-            UnifiedDocumentExporter.export_to_markdown(document, md_path)
-            results['markdown'] = md_path
-        except Exception as e:
-            logger.error(f"Failed to export Markdown: {e}")
-            results['markdown'] = None
+        # Markdown export removed - no longer generating _output.md files
+        results['markdown'] = None
 
         # Export plain text
         try:
@@ -469,13 +463,13 @@ def save_unified_document(
         document: The UnifiedDocument to save
         output_dir: Output directory
         file_id: Base filename
-        formats: List of formats to export (default: ['json', 'markdown'])
+        formats: List of formats to export (default: ['json'])
 
     Returns:
         Dictionary mapping format names to output paths
     """
     if formats is None:
-        formats = ['json', 'markdown']
+        formats = ['json']
 
     results = {}
     output_dir = Path(output_dir)
@@ -488,9 +482,9 @@ def save_unified_document(
                 UnifiedDocumentExporter.export_to_json(document, path)
                 results['json'] = path
             elif fmt == 'markdown':
-                path = output_dir / f"{file_id}_output.md"
-                UnifiedDocumentExporter.export_to_markdown(document, path)
-                results['markdown'] = path
+                # Markdown export removed - skip silently
+                results['markdown'] = None
+                continue
             elif fmt == 'text':
                 path = output_dir / f"{file_id}_text.txt"
                 UnifiedDocumentExporter.export_to_text(document, path)
diff --git a/frontend/src/hooks/useTaskValidation.ts b/frontend/src/hooks/useTaskValidation.ts
index 851b608..806a542 100644
--- a/frontend/src/hooks/useTaskValidation.ts
+++ b/frontend/src/hooks/useTaskValidation.ts
@@ -34,6 +34,11 @@ export function useTaskValidation(options?: {
 
   const [isNotFound, setIsNotFound] = useState(false)
 
+  // Reset isNotFound when taskId changes (new upload)
+  useEffect(() => {
+    setIsNotFound(false)
+  }, [taskId])
+
   const { data: taskDetail, isLoading, error, isFetching } = useQuery({
     queryKey: ['taskDetail', taskId],
     queryFn: () => apiClientV2.getTask(taskId!),
diff --git a/frontend/src/pages/TaskDetailPage.tsx b/frontend/src/pages/TaskDetailPage.tsx
index 95b6fe9..05f93b5 100644
--- a/frontend/src/pages/TaskDetailPage.tsx
+++ b/frontend/src/pages/TaskDetailPage.tsx
@@ -300,6 +300,24 @@ export default function TaskDetailPage() {
     }
   }
 
+  const handleDownloadVisualization = async () => {
+    if (!taskId) return
+    try {
+      await apiClientV2.downloadVisualization(taskId)
+      toast({
+        title: '下載成功',
+        description: '辨識結果圖片已下載',
+        variant: 'success',
+      })
+    } catch (error: any) {
+      toast({
+        title: '下載失敗',
+        description: error.response?.data?.detail || t('errors.networkError'),
+        variant: 'destructive',
+      })
+    }
+  }
+
   const getStatusBadge = (status: string) => {
     switch (status) {
       case 'completed':
@@ -477,6 +495,19 @@ export default function TaskDetailPage() {
                 <span>流式 PDF</span>
               </Button>
             </div>
+            {/* Visualization download for OCR Track */}
+            {taskDetail?.has_visualization && (
+              <div className="mt-3 pt-3 border-t">
+                <Button
+                  onClick={handleDownloadVisualization}
+                  variant="secondary"
+                  className="w-full gap-2"
+                >
+                  <Image className="w-4 h-4" />
+                  下載辨識結果圖片 (ZIP)
+                </Button>
+              </div>
+            )}
           </CardContent>
         </Card>
       )}
diff --git a/frontend/src/pages/TaskHistoryPage.tsx b/frontend/src/pages/TaskHistoryPage.tsx
index 55dc8ac..aef1e92 100644
--- a/frontend/src/pages/TaskHistoryPage.tsx
+++ b/frontend/src/pages/TaskHistoryPage.tsx
@@ -155,17 +155,11 @@ export default function TaskHistoryPage() {
   }
 
   // Download handlers
-  const handleDownload = async (taskId: string, format: 'json' | 'markdown' | 'pdf') => {
+  const handleDownloadPDF = async (taskId: string, format: 'layout' | 'reflow') => {
     try {
-      if (format === 'json') {
-        await apiClientV2.downloadJSON(taskId)
-      } else if (format === 'markdown') {
-        await apiClientV2.downloadMarkdown(taskId)
-      } else if (format === 'pdf') {
-        await apiClientV2.downloadPDF(taskId)
-      }
+      await apiClientV2.downloadPDF(taskId, format)
     } catch (err: any) {
-      alert(err.response?.data?.detail || `下載 ${format.toUpperCase()} 檔案失敗`)
+      alert(err.response?.data?.detail || `下載 PDF 檔案失敗`)
     }
   }
 
@@ -509,39 +503,24 @@ export default function TaskHistoryPage() {
                           {/* Download actions for completed tasks */}
                           {task.status === 'completed' && (
                             <>
-                              {task.result_json_path && (
-                                <Button
-                                  variant="outline"
-                                  size="sm"
-                                  onClick={() => handleDownload(task.task_id, 'json')}
-                                  title="下載 JSON"
-                                >
-                                  <Download className="w-3 h-3 mr-1" />
-                                  JSON
-                                </Button>
-                              )}
-                              {task.result_markdown_path && (
-                                <Button
-                                  variant="outline"
-                                  size="sm"
-                                  onClick={() => handleDownload(task.task_id, 'markdown')}
-                                  title="下載 Markdown"
-                                >
-                                  <Download className="w-3 h-3 mr-1" />
-                                  MD
-                                </Button>
-                              )}
-                              {task.result_pdf_path && (
-                                <Button
-                                  variant="outline"
-                                  size="sm"
-                                  onClick={() => handleDownload(task.task_id, 'pdf')}
-                                  title="下載 PDF"
-                                >
-                                  <Download className="w-3 h-3 mr-1" />
-                                  PDF
-                                </Button>
-                              )}
+                              <Button
+                                variant="outline"
+                                size="sm"
+                                onClick={() => handleDownloadPDF(task.task_id, 'layout')}
+                                title="下載版面 PDF"
+                              >
+                                <Download className="w-3 h-3 mr-1" />
+                                版面
+                              </Button>
+                              <Button
+                                variant="outline"
+                                size="sm"
+                                onClick={() => handleDownloadPDF(task.task_id, 'reflow')}
+                                title="下載流式 PDF"
+                              >
+                                <Download className="w-3 h-3 mr-1" />
+                                流式
+                              </Button>
                               <Button
                                 variant="outline"
                                 size="sm"
diff --git a/frontend/src/services/apiV2.ts b/frontend/src/services/apiV2.ts
index 3266714..3755f93 100644
--- a/frontend/src/services/apiV2.ts
+++ b/frontend/src/services/apiV2.ts
@@ -527,6 +527,22 @@ class ApiClientV2 {
     window.URL.revokeObjectURL(link.href)
   }
 
+  /**
+   * Download visualization images as ZIP (OCR Track only)
+   */
+  async downloadVisualization(taskId: string): Promise<void> {
+    const response = await this.client.get(`/tasks/${taskId}/download/visualization`, {
+      responseType: 'blob',
+    })
+
+    const blob = new Blob([response.data], { type: 'application/zip' })
+    const link = document.createElement('a')
+    link.href = window.URL.createObjectURL(blob)
+    link.download = `${taskId}_visualization.zip`
+    link.click()
+    window.URL.revokeObjectURL(link.href)
+  }
+
   // ==================== Preprocessing Preview APIs ====================
 
   /**
diff --git a/frontend/src/types/apiV2.ts b/frontend/src/types/apiV2.ts
index bf92689..8d2e416 100644
--- a/frontend/src/types/apiV2.ts
+++ b/frontend/src/types/apiV2.ts
@@ -197,6 +197,7 @@ export interface TaskFile {
 
 export interface TaskDetail extends Task {
   files: TaskFile[]
+  has_visualization?: boolean
 }
 
 export interface TaskListResponse {
diff --git a/openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/proposal.md b/openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/proposal.md
new file mode 100644
index 0000000..1121e8f
--- /dev/null
+++ b/openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/proposal.md
@@ -0,0 +1,201 @@
+# Proposal: 優化任務檔案生成與視覺化下載
+
+## Summary
+
+優化 OCR/Direct Track 任務處理過程中的檔案生成策略，移除不必要的檔案，並提供視覺化圖片下載功能。
+
+## 檔案變更總覽
+
+### OCR Track 檔案變更
+
+| 檔案 | 目前狀態 | 變更後 | 影響 |
+|-----|---------|-------|------|
+| `*_result.json` | 生成 | **保留** | 核心資料，API/前端依賴 |
+| `*_output.md` | 生成 | **停止生成** | 移除前端下載按鈕 |
+| `*_layout.pdf` / `*_reflow.pdf` | 生成 | **保留** | 主要輸出格式 |
+| `*_raw_ocr_regions.json` | 生成 | **保留** | 翻譯服務依賴 |
+| `*_scan_page_N.png` | 生成 | **保留** | OCR 處理和 PDF 生成需要 |
+| `visualization/*.png` | 生成 | **保留** | 新增下載功能 |
+| `standalone_img_*.png` | 生成 | **保留** | result.json 引用，PDF 生成需要 |
+| `img_in_table_*.png` | 生成 | **保留** | result.json 引用，PDF 生成需要 |
+| `pp3_*.png` | 生成 | **保留** | result.json 引用，PDF 生成需要 |
+| `*_pp_structure_raw.json` | 生成 | **停止生成** | 純 Debug，預設關閉 |
+| `*_debug_summary.json` | 生成 | **停止生成** | 純 Debug，預設關閉 |
+| `*_pp_structure_viz.png` | 生成 | **停止生成** | 純 Debug，預設關閉 |
+
+### Direct Track 檔案變更
+
+| 檔案 | 目前狀態 | 變更後 | 影響 |
+|-----|---------|-------|------|
+| `*_result.json` | 生成 | **保留** | 核心資料，API/前端依賴 |
+| `*_output.md` | 生成 | **停止生成** | 移除前端下載按鈕 |
+| `*_layout.pdf` / `*_reflow.pdf` | 生成 | **保留** | 主要輸出格式 |
+| `f66673cc_p*_img*.png` | 生成 | **保留** | result.json 引用，PDF 生成需要 |
+| `f66673cc_p*_chart*.png` | 生成 | **保留** | result.json 引用，PDF 生成需要 |
+
+### 變更摘要
+
+| Track | 停止生成的檔案 | 預估節省空間 |
+|-------|--------------|-------------|
+| OCR Track | `*_output.md`, `*_pp_structure_raw.json`, `*_debug_summary.json`, `*_pp_structure_viz.png` | ~300-1500 KB/頁 |
+| Direct Track | `*_output.md` | ~1-3 KB/檔案 |
+
+## 後端變更
+
+### 1. config.py - 修改預設值
+
+```python
+# 修改前
+pp_structure_debug_enabled: bool = Field(default=True)
+pp_structure_debug_visualization: bool = Field(default=True)
+
+# 修改後
+pp_structure_debug_enabled: bool = Field(default=False)
+pp_structure_debug_visualization: bool = Field(default=False)
+```
+
+**影響**：OCR Track 不再生成 debug 檔案（`*_pp_structure_raw.json`, `*_debug_summary.json`, `*_pp_structure_viz.png`）
+
+### 2. unified_document_exporter.py - 停止生成 Markdown
+
+修改 `export_all()` 方法，不再生成 `*_output.md` 檔案。
+
+**影響**：兩個 Track 都不再生成 Markdown 檔案
+
+### 3. ocr_service.py - 更新 save_results()
+
+修改 `save_results()` 方法，不再生成 Markdown 檔案，返回值調整。
+
+### 4. tasks.py (router) - 移除 Markdown 下載端點
+
+移除或標記棄用 `GET /api/v2/tasks/{task_id}/download/markdown` 端點。
+
+### 5. tasks.py (router) - 新增 visualization 下載端點
+
+```python
+@router.get("/{task_id}/visualization-download")
+async def download_visualization_zip(task_id: str, ...):
+    """
+    Download visualization images as ZIP file.
+    Only available for OCR Track tasks with visualization folder.
+    """
+    # 檢查 visualization 資料夾是否存在
+    # 打包資料夾內所有 PNG 為 ZIP
+    # 返回 StreamingResponse (application/zip)
+```
+
+### 6. Task model/schema - 更新欄位
+
+- 移除 `result_markdown_path` 欄位使用（保留欄位但不再寫入）
+- 新增 `has_visualization: bool` 到 TaskDetail response
+
+## 前端變更
+
+### 1. TaskHistoryPage.tsx - 移除 Markdown 下載按鈕
+
+```tsx
+// 移除此段
+{task.result_markdown_path && (
+  <Button onClick={() => handleDownload(task.task_id, 'markdown')}>
+    MD
+  </Button>
+)}
+```
+
+### 2. ResultsPage.tsx - 移除 Markdown 下載按鈕
+
+```tsx
+// 移除此段
+<Button onClick={handleDownloadMarkdown}>
+  Markdown
+</Button>
+```
+
+### 3. apiV2.ts - 移除/新增 API 方法
+
+```typescript
+// 移除
+async downloadMarkdown(taskId: string): Promise<void>
+
+// 新增
+async downloadVisualization(taskId: string): Promise<Blob>
+```
+
+### 4. types/apiV2.ts - 更新 TaskDetail type
+
+```typescript
+export interface TaskDetail {
+  // ... 現有欄位
+  has_visualization?: boolean  // 新增
+}
+```
+
+### 5. TaskDetailPage.tsx - 新增 visualization 下載按鈕
+
+```tsx
+// OCR Track 且有 visualization 時顯示
+{task.has_visualization && (
+  <Button onClick={handleDownloadVisualization}>
+    <ImageIcon className="w-4 h-4 mr-2" />
+    下載辨識結果圖片
+  </Button>
+)}
+```
+
+## 依賴關係確認
+
+### 必須保留的檔案及原因
+
+| 檔案 | 依賴來源 | 用途 |
+|-----|---------|------|
+| `*_result.json` | API、前端、翻譯服務 | 核心結構化資料 |
+| `*_raw_ocr_regions.json` | `translation_service.py` | OCR Track 翻譯時讀取 |
+| `*_scan_page_N.png` | `pdf_generator_service.py` | Reflow PDF 生成 |
+| `visualization/*.png` | 使用者下載 | OCR 辨識結果視覺化 |
+| 所有提取的圖片 | `*_result.json` 中的 `saved_path` | PDF 生成時嵌入圖片 |
+
+### 可移除的檔案及原因
+
+| 檔案 | 原因 |
+|-----|------|
+| `*_output.md` | 前端移除下載按鈕後無使用場景 |
+| `*_pp_structure_raw.json` | 純 Debug 用途，生產環境不需要 |
+| `*_debug_summary.json` | 純 Debug 用途，生產環境不需要 |
+| `*_pp_structure_viz.png` | 純 Debug 用途，生產環境不需要 |
+
+## 設定說明
+
+### 後端設定 (.env.local)
+
+```bash
+# Debug 檔案生成（預設關閉）
+PP_STRUCTURE_DEBUG_ENABLED=false
+PP_STRUCTURE_DEBUG_VISUALIZATION=false
+
+# 如需開啟 debug 檔案生成
+PP_STRUCTURE_DEBUG_ENABLED=true
+PP_STRUCTURE_DEBUG_VISUALIZATION=true
+```
+
+### 前端設定
+
+無需額外設定，移除下載按鈕後自動生效。
+
+## 向後相容性
+
+1. **API 端點** - `GET /download/markdown` 可保留但返回 404 或棄用訊息
+2. **資料庫欄位** - `result_markdown_path` 欄位保留，但新任務不再寫入
+3. **舊任務** - 已存在的 Markdown 檔案不受影響，仍可下載
+
+## Implementation Plan
+
+1. 後端：修改 config.py 預設值（關閉 debug）
+2. 後端：修改 unified_document_exporter.py 停止生成 Markdown
+3. 後端：修改 ocr_service.py save_results() 不生成 Markdown
+4. 後端：新增 visualization 下載端點
+5. 後端：更新 TaskDetail response 加入 has_visualization
+6. 前端：移除 TaskHistoryPage Markdown 下載按鈕
+7. 前端：移除 ResultsPage Markdown 下載按鈕
+8. 前端：移除 apiV2.ts downloadMarkdown 方法
+9. 前端：新增 visualization 下載功能
+10. 測試並驗證
diff --git a/openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/tasks.md b/openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/tasks.md
new file mode 100644
index 0000000..c4a934f
--- /dev/null
+++ b/openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/tasks.md
@@ -0,0 +1,68 @@
+# Tasks: 優化任務檔案生成與視覺化下載
+
+## 1. 後端設定優化
+
+- [x] 1.1 修改 `config.py` debug 預設值
+  - `pp_structure_debug_enabled`: `True` → `False`
+  - `pp_structure_debug_visualization`: `True` → `False`
+
+## 2. 後端 Visualization 下載 API
+
+- [x] 2.1 在 `tasks.py` 新增 visualization 下載端點
+  - `GET /api/v2/tasks/{task_id}/download/visualization`
+  - 檢查 visualization 資料夾是否存在
+  - 打包資料夾內所有 PNG 為 ZIP
+  - 返回 StreamingResponse (application/zip)
+
+- [x] 2.2 在 TaskDetail response 中加入 `has_visualization` 欄位
+  - 檢查 task result directory 下是否有 visualization 資料夾
+  - 回傳 boolean 值
+
+## 3. 前端 Visualization 下載功能
+
+- [x] 3.1 在 `types/apiV2.ts` 更新 TaskDetail type
+  - 新增 `has_visualization?: boolean`
+
+- [x] 3.2 在 `apiV2.ts` 新增下載方法
+  - `downloadVisualization(taskId: string): Promise<void>`
+
+- [x] 3.3 在 `TaskDetailPage.tsx` 新增下載按鈕
+  - 只有 `has_visualization = true` 時顯示
+  - 點擊後下載 ZIP 檔案
+
+## 4. 停止生成 Markdown 檔案
+
+- [x] 4.1 修改 `ocr_service.py` 的 `save_results()` 方法
+  - 移除 Markdown 檔案生成
+  - 返回值中 `markdown_path` 始終為 `None`
+
+- [x] 4.2 修改 `unified_document_exporter.py`
+  - `export_all()`: 移除 Markdown 導出
+  - `export_formats()`: 移除 Markdown 支援
+
+- [x] 4.3 前端 TaskHistoryPage.tsx 移除 JSON/MD 下載按鈕
+  - 改為版面 PDF 和流式 PDF 兩個下載按鈕
+
+## 5. 確保 raw_ocr_regions.json 正常生成
+
+- [x] 5.1 將 `raw_ocr_regions.json` 生成從 debug 區塊分離
+  - 獨立於 `pp_structure_debug_enabled` 設定
+  - 此檔案為 PDF 生成和翻譯服務所必需
+
+- [x] 5.2 在 `pp_structure_debug.py` 新增 `save_debug_results()` 方法
+  - 只保存純 debug 檔案（`_pp_structure_raw.json`, `_debug_summary.json`）
+  - 不再重複保存 `_raw_ocr_regions.json`
+
+## 6. Bug 修復
+
+- [x] 6.1 修復 Processing 頁面不切換到新任務的問題
+  - 在 `useTaskValidation.ts` 中加入 taskId 變化時重置 `isNotFound` 的邏輯
+
+## 7. 測試與驗證
+
+- [x] 7.1 驗證 TypeScript 編譯通過
+- [ ] 7.2 驗證 `*_raw_ocr_regions.json` 仍正常生成
+- [ ] 7.3 驗證 visualization 資料夾仍正常生成
+- [ ] 7.4 測試 visualization 下載功能
+- [ ] 7.5 驗證 PDF 內容正常顯示
+- [ ] 7.6 驗證新任務上傳後 Processing 頁面正確切換