From efa7e4175c273d15eff5ca14ffa00b3fadc865de Mon Sep 17 00:00:00 2001 From: egg Date: Fri, 12 Dec 2025 19:11:50 +0800 Subject: [PATCH] feat: optimize task file generation and add visualization download MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend changes: - Disable PP-Structure debug file generation by default - Separate raw_ocr_regions.json generation from debug flag (critical file) - Add visualization folder download endpoint as ZIP - Add has_visualization field to TaskDetailResponse - Stop generating Markdown files - Save translated PDFs to task folder with caching Frontend changes: - Replace JSON/MD download buttons with PDF buttons in TaskHistoryPage - Add visualization download button in TaskDetailPage - Fix Processing page task switching issue (reset isNotFound) Archives two OpenSpec proposals: - optimize-task-files-and-visualization - simplify-frontend-add-billing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- backend/app/core/config.py | 4 +- backend/app/routers/tasks.py | 98 ++++++++- backend/app/routers/translate.py | 40 ++-- backend/app/schemas/task.py | 2 + backend/app/services/ocr_service.py | 38 ++-- backend/app/services/pp_structure_debug.py | 44 ++++ .../app/services/unified_document_exporter.py | 20 +- frontend/src/hooks/useTaskValidation.ts | 5 + frontend/src/pages/TaskDetailPage.tsx | 31 +++ frontend/src/pages/TaskHistoryPage.tsx | 63 ++---- frontend/src/services/apiV2.ts | 16 ++ frontend/src/types/apiV2.ts | 1 + .../proposal.md | 201 ++++++++++++++++++ .../tasks.md | 68 ++++++ 14 files changed, 534 insertions(+), 97 deletions(-) create mode 100644 openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/proposal.md create mode 100644 openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/tasks.md diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 0840c3f..91905da 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -376,8 +376,8 @@ class Settings(BaseSettings): # ===== Debug Configuration ===== # Enable debug outputs for PP-StructureV3 analysis - pp_structure_debug_enabled: bool = Field(default=True) # Save debug files for PP-StructureV3 - pp_structure_debug_visualization: bool = Field(default=True) # Generate visualization images + pp_structure_debug_enabled: bool = Field(default=False) # Save debug files for PP-StructureV3 + pp_structure_debug_visualization: bool = Field(default=False) # Generate visualization images # Performance tuning use_fp16_inference: bool = Field(default=False) # Half-precision (if supported) diff --git a/backend/app/routers/tasks.py b/backend/app/routers/tasks.py index 14b66fe..a9ab3d1 100644 --- a/backend/app/routers/tasks.py +++ b/backend/app/routers/tasks.py @@ -10,9 +10,11 @@ import shutil import hashlib from fastapi import APIRouter, Depends, HTTPException, status, Query, UploadFile, File, BackgroundTasks -from fastapi.responses import FileResponse +from fastapi.responses import FileResponse, StreamingResponse from sqlalchemy.orm import Session import json +import zipfile +import io from datetime import datetime from app.core.deps import get_db, get_current_user @@ -413,8 +415,6 @@ async def get_task( processing_track = None if task.result_json_path: try: - import json - from pathlib import Path result_path = Path(task.result_json_path) if result_path.exists(): with open(result_path) as f: @@ -430,9 +430,18 @@ async def get_task( except Exception: pass # Silently ignore errors reading the result file - # Create response with processing_track + # Check for visualization folder (OCR Track only) + has_visualization = False + result_dir = Path(settings.result_dir) / task_id + visualization_dir = result_dir / "visualization" + if visualization_dir.exists() and visualization_dir.is_dir(): + png_files = list(visualization_dir.glob("*.png")) + has_visualization = len(png_files) > 0 + + # Create response with processing_track and has_visualization response = TaskDetailResponse.model_validate(task) response.processing_track = processing_track + response.has_visualization = has_visualization return response @@ -1198,6 +1207,87 @@ async def download_unified( ) +@router.get("/{task_id}/download/visualization", summary="Download visualization images as ZIP") +async def download_visualization_zip( + task_id: str, + db: Session = Depends(get_db), + current_user: User = Depends(get_current_user) +): + """ + Download visualization images as ZIP file. + + Only available for OCR Track tasks with visualization folder. + Returns a ZIP file containing all PNG images from the visualization folder. + + - **task_id**: Task UUID + """ + try: + # Get task details + task = task_service.get_task_by_id( + db=db, + task_id=task_id, + user_id=current_user.id + ) + + if not task: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Task not found" + ) + + if task.status != TaskStatus.COMPLETED: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Task not completed" + ) + + # Check for visualization folder + result_dir = Path(settings.result_dir) / task_id + visualization_dir = result_dir / "visualization" + + if not visualization_dir.exists() or not visualization_dir.is_dir(): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Visualization folder not found. This feature is only available for OCR Track tasks." + ) + + # Get all PNG files + png_files = list(visualization_dir.glob("*.png")) + if not png_files: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No visualization images found" + ) + + # Create ZIP in memory + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: + for png_file in sorted(png_files): + # Add file with relative name (just the filename) + zip_file.write(png_file, png_file.name) + + zip_buffer.seek(0) + + logger.info(f"Created visualization ZIP for task {task_id} with {len(png_files)} images") + + return StreamingResponse( + zip_buffer, + media_type="application/zip", + headers={ + "Content-Disposition": f"attachment; filename={task_id}_visualization.zip" + } + ) + + except HTTPException: + raise + except Exception as e: + logger.exception(f"Failed to download visualization for task {task_id}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Failed to download: {str(e)}" + ) + + # ===== Preprocessing Preview Endpoints ===== @router.post("/{task_id}/preview/preprocessing", response_model=PreprocessingPreviewResponse, summary="Preview preprocessing effect") diff --git a/backend/app/routers/translate.py b/backend/app/routers/translate.py index bbe9e12..87b93d7 100644 --- a/backend/app/routers/translate.py +++ b/backend/app/routers/translate.py @@ -637,17 +637,31 @@ async def download_translated_pdf( # Validate format parameter use_layout = format.lower() == 'layout' - # Generate translated PDF to temp file + # Generate translated PDF to task result folder (not temp) + # Use base name from result JSON (e.g., "scan" or "edit") + result_dir = result_json_path.parent + base_name = result_json_path.stem.replace('_result', '') format_suffix = '_layout' if use_layout else '_reflow' - output_filename = f"{task_id}_translated_{lang}{format_suffix}.pdf" + output_filename = f"{base_name}_translated_{lang}{format_suffix}.pdf" + output_path = result_dir / output_filename - with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp_file: - output_path = Path(tmp_file.name) + # Check if PDF already exists and is newer than translation JSON + if output_path.exists(): + pdf_mtime = output_path.stat().st_mtime + translation_mtime = translation_file.stat().st_mtime + if pdf_mtime >= translation_mtime: + # PDF is up-to-date, serve directly + logger.info(f"Serving cached translated PDF: {output_path}") + return FileResponse( + path=str(output_path), + filename=output_filename, + media_type="application/pdf", + headers={ + "Content-Disposition": f'attachment; filename="{output_filename}"' + } + ) try: - # Use result_dir as image source (contains extracted images) - image_dir = result_json_path.parent - # Choose PDF generation method based on format if use_layout: # Layout mode: preserve original positions with text wrapping @@ -655,7 +669,7 @@ async def download_translated_pdf( result_json_path=result_json_path, translation_json_path=translation_file, output_path=output_path, - source_file_path=image_dir + source_file_path=result_dir ) else: # Reflow mode: flowing layout @@ -663,7 +677,7 @@ async def download_translated_pdf( result_json_path=result_json_path, translation_json_path=translation_file, output_path=output_path, - source_file_path=image_dir + source_file_path=result_dir ) if not success: @@ -672,7 +686,7 @@ async def download_translated_pdf( detail="Failed to generate translated PDF" ) - logger.info(f"Generated translated PDF for task {task_id}, lang={lang}") + logger.info(f"Generated translated PDF: {output_path}") return FileResponse( path=str(output_path), @@ -684,14 +698,8 @@ async def download_translated_pdf( ) except HTTPException: - # Clean up temp file on HTTP errors - if output_path.exists(): - output_path.unlink() raise except Exception as e: - # Clean up temp file on unexpected errors - if output_path.exists(): - output_path.unlink() logger.exception(f"Failed to generate translated PDF for task {task_id}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, diff --git a/backend/app/schemas/task.py b/backend/app/schemas/task.py index 03ec9b4..2ed2d14 100644 --- a/backend/app/schemas/task.py +++ b/backend/app/schemas/task.py @@ -187,6 +187,8 @@ class TaskDetailResponse(TaskResponse): files: List[TaskFileResponse] = [] # Dual-track processing field (extracted from result metadata) processing_track: Optional[ProcessingTrackEnum] = None + # Visualization availability (OCR Track only) + has_visualization: bool = False class TaskListResponse(BaseModel): diff --git a/backend/app/services/ocr_service.py b/backend/app/services/ocr_service.py index 3d603c0..69b14c1 100644 --- a/backend/app/services/ocr_service.py +++ b/backend/app/services/ocr_service.py @@ -1510,14 +1510,25 @@ class OCRService: 'height': ocr_height }] - # Generate PP-StructureV3 debug outputs if enabled + # Always save raw_ocr_regions.json (required for PDF generation and translation) + if output_dir: + try: + import json + ocr_json_path = output_dir / f"{image_path.stem}_raw_ocr_regions.json" + with open(ocr_json_path, 'w', encoding='utf-8') as f: + json.dump(text_regions, f, ensure_ascii=False, indent=2) + logger.info(f"Saved raw OCR regions to {ocr_json_path}") + except Exception as ocr_save_error: + logger.warning(f"Failed to save raw OCR regions: {ocr_save_error}") + + # Generate PP-StructureV3 debug outputs if enabled (debug files only) if settings.pp_structure_debug_enabled and output_dir: try: from app.services.pp_structure_debug import PPStructureDebug debug_service = PPStructureDebug(output_dir) - # Save raw results as JSON - debug_service.save_raw_results( + # Save PP-Structure raw results and summary (debug only) + debug_service.save_debug_results( pp_structure_results={ 'elements': layout_data.get('elements', []), 'total_elements': layout_data.get('total_elements', 0), @@ -2536,7 +2547,7 @@ class OCRService: source_file_path: Optional[Path] = None ) -> Tuple[Optional[Path], Optional[Path], Optional[Path]]: """ - Save OCR results to JSON, Markdown, and layout-preserving PDF files + Save OCR results to JSON and layout-preserving PDF files Args: result: OCR result (UnifiedDocument or dictionary) @@ -2546,9 +2557,11 @@ class OCRService: Returns: Tuple of (json_path, markdown_path, pdf_path) + Note: markdown_path is always None (Markdown generation removed) """ try: output_dir.mkdir(parents=True, exist_ok=True) + markdown_path = None # Markdown generation removed # Use UnifiedDocumentExporter for standardized export if isinstance(result, UnifiedDocument) and UnifiedDocumentExporter is not None: @@ -2560,31 +2573,16 @@ class OCRService: include_metadata=True, include_statistics=True ) - - markdown_path = output_dir / f"{file_id}_output.md" - UnifiedDocumentExporter.export_to_markdown( - result, - markdown_path, - include_metadata_header=False # Keep output clean - ) - - markdown_content = result.extract_all_text() else: # Legacy path for dict results result_dict = result if isinstance(result, dict) else result.to_dict() - markdown_content = result.get('markdown_content', '') if isinstance(result, dict) else '' # Save JSON json_path = output_dir / f"{file_id}_result.json" with open(json_path, 'w', encoding='utf-8') as f: json.dump(result_dict, f, ensure_ascii=False, indent=2) - # Save Markdown - markdown_path = output_dir / f"{file_id}_output.md" - with open(markdown_path, 'w', encoding='utf-8') as f: - f.write(markdown_content) - - logger.info(f"Results saved: {json_path.name}, {markdown_path.name}") + logger.info(f"Results saved: {json_path.name}") # Generate layout-preserving PDF pdf_path = None diff --git a/backend/app/services/pp_structure_debug.py b/backend/app/services/pp_structure_debug.py index b7c1b82..1ea6173 100644 --- a/backend/app/services/pp_structure_debug.py +++ b/backend/app/services/pp_structure_debug.py @@ -107,6 +107,50 @@ class PPStructureDebug: return saved_files + def save_debug_results( + self, + pp_structure_results: Dict[str, Any], + raw_ocr_regions: List[Dict[str, Any]], + filename_prefix: str = "debug" + ) -> Dict[str, Path]: + """ + Save debug-only files (PP-Structure raw results and summary). + Does NOT save raw_ocr_regions.json (that's handled separately). + + Args: + pp_structure_results: Raw PP-StructureV3 analysis results + raw_ocr_regions: Raw OCR text regions (for summary generation only) + filename_prefix: Prefix for output files + + Returns: + Dictionary with paths to saved files + """ + saved_files = {} + + # Save PP-StructureV3 results + pp_json_path = self.output_dir / f"{filename_prefix}_pp_structure_raw.json" + try: + serializable_results = self._make_serializable(pp_structure_results) + with open(pp_json_path, 'w', encoding='utf-8') as f: + json.dump(serializable_results, f, ensure_ascii=False, indent=2) + saved_files['pp_structure'] = pp_json_path + logger.info(f"Saved PP-StructureV3 raw results to {pp_json_path}") + except Exception as e: + logger.error(f"Failed to save PP-StructureV3 results: {e}") + + # Save summary comparison + summary_path = self.output_dir / f"{filename_prefix}_debug_summary.json" + try: + summary = self._generate_summary(pp_structure_results, raw_ocr_regions) + with open(summary_path, 'w', encoding='utf-8') as f: + json.dump(summary, f, ensure_ascii=False, indent=2) + saved_files['summary'] = summary_path + logger.info(f"Saved debug summary to {summary_path}") + except Exception as e: + logger.error(f"Failed to save debug summary: {e}") + + return saved_files + def generate_visualization( self, image_path: Path, diff --git a/backend/app/services/unified_document_exporter.py b/backend/app/services/unified_document_exporter.py index 10dd809..d3fa46a 100644 --- a/backend/app/services/unified_document_exporter.py +++ b/backend/app/services/unified_document_exporter.py @@ -255,14 +255,8 @@ class UnifiedDocumentExporter: logger.error(f"Failed to export JSON: {e}") results['json'] = None - # Export Markdown - try: - md_path = output_dir / f"{file_id}_output.md" - UnifiedDocumentExporter.export_to_markdown(document, md_path) - results['markdown'] = md_path - except Exception as e: - logger.error(f"Failed to export Markdown: {e}") - results['markdown'] = None + # Markdown export removed - no longer generating _output.md files + results['markdown'] = None # Export plain text try: @@ -469,13 +463,13 @@ def save_unified_document( document: The UnifiedDocument to save output_dir: Output directory file_id: Base filename - formats: List of formats to export (default: ['json', 'markdown']) + formats: List of formats to export (default: ['json']) Returns: Dictionary mapping format names to output paths """ if formats is None: - formats = ['json', 'markdown'] + formats = ['json'] results = {} output_dir = Path(output_dir) @@ -488,9 +482,9 @@ def save_unified_document( UnifiedDocumentExporter.export_to_json(document, path) results['json'] = path elif fmt == 'markdown': - path = output_dir / f"{file_id}_output.md" - UnifiedDocumentExporter.export_to_markdown(document, path) - results['markdown'] = path + # Markdown export removed - skip silently + results['markdown'] = None + continue elif fmt == 'text': path = output_dir / f"{file_id}_text.txt" UnifiedDocumentExporter.export_to_text(document, path) diff --git a/frontend/src/hooks/useTaskValidation.ts b/frontend/src/hooks/useTaskValidation.ts index 851b608..806a542 100644 --- a/frontend/src/hooks/useTaskValidation.ts +++ b/frontend/src/hooks/useTaskValidation.ts @@ -34,6 +34,11 @@ export function useTaskValidation(options?: { const [isNotFound, setIsNotFound] = useState(false) + // Reset isNotFound when taskId changes (new upload) + useEffect(() => { + setIsNotFound(false) + }, [taskId]) + const { data: taskDetail, isLoading, error, isFetching } = useQuery({ queryKey: ['taskDetail', taskId], queryFn: () => apiClientV2.getTask(taskId!), diff --git a/frontend/src/pages/TaskDetailPage.tsx b/frontend/src/pages/TaskDetailPage.tsx index 95b6fe9..05f93b5 100644 --- a/frontend/src/pages/TaskDetailPage.tsx +++ b/frontend/src/pages/TaskDetailPage.tsx @@ -300,6 +300,24 @@ export default function TaskDetailPage() { } } + const handleDownloadVisualization = async () => { + if (!taskId) return + try { + await apiClientV2.downloadVisualization(taskId) + toast({ + title: '下載成功', + description: '辨識結果圖片已下載', + variant: 'success', + }) + } catch (error: any) { + toast({ + title: '下載失敗', + description: error.response?.data?.detail || t('errors.networkError'), + variant: 'destructive', + }) + } + } + const getStatusBadge = (status: string) => { switch (status) { case 'completed': @@ -477,6 +495,19 @@ export default function TaskDetailPage() { 流式 PDF + {/* Visualization download for OCR Track */} + {taskDetail?.has_visualization && ( +
+ +
+ )} )} diff --git a/frontend/src/pages/TaskHistoryPage.tsx b/frontend/src/pages/TaskHistoryPage.tsx index 55dc8ac..aef1e92 100644 --- a/frontend/src/pages/TaskHistoryPage.tsx +++ b/frontend/src/pages/TaskHistoryPage.tsx @@ -155,17 +155,11 @@ export default function TaskHistoryPage() { } // Download handlers - const handleDownload = async (taskId: string, format: 'json' | 'markdown' | 'pdf') => { + const handleDownloadPDF = async (taskId: string, format: 'layout' | 'reflow') => { try { - if (format === 'json') { - await apiClientV2.downloadJSON(taskId) - } else if (format === 'markdown') { - await apiClientV2.downloadMarkdown(taskId) - } else if (format === 'pdf') { - await apiClientV2.downloadPDF(taskId) - } + await apiClientV2.downloadPDF(taskId, format) } catch (err: any) { - alert(err.response?.data?.detail || `下載 ${format.toUpperCase()} 檔案失敗`) + alert(err.response?.data?.detail || `下載 PDF 檔案失敗`) } } @@ -509,39 +503,24 @@ export default function TaskHistoryPage() { {/* Download actions for completed tasks */} {task.status === 'completed' && ( <> - {task.result_json_path && ( - - )} - {task.result_markdown_path && ( - - )} - {task.result_pdf_path && ( - - )} + + +)} +``` + +### 2. ResultsPage.tsx - 移除 Markdown 下載按鈕 + +```tsx +// 移除此段 + +``` + +### 3. apiV2.ts - 移除/新增 API 方法 + +```typescript +// 移除 +async downloadMarkdown(taskId: string): Promise + +// 新增 +async downloadVisualization(taskId: string): Promise +``` + +### 4. types/apiV2.ts - 更新 TaskDetail type + +```typescript +export interface TaskDetail { + // ... 現有欄位 + has_visualization?: boolean // 新增 +} +``` + +### 5. TaskDetailPage.tsx - 新增 visualization 下載按鈕 + +```tsx +// OCR Track 且有 visualization 時顯示 +{task.has_visualization && ( + +)} +``` + +## 依賴關係確認 + +### 必須保留的檔案及原因 + +| 檔案 | 依賴來源 | 用途 | +|-----|---------|------| +| `*_result.json` | API、前端、翻譯服務 | 核心結構化資料 | +| `*_raw_ocr_regions.json` | `translation_service.py` | OCR Track 翻譯時讀取 | +| `*_scan_page_N.png` | `pdf_generator_service.py` | Reflow PDF 生成 | +| `visualization/*.png` | 使用者下載 | OCR 辨識結果視覺化 | +| 所有提取的圖片 | `*_result.json` 中的 `saved_path` | PDF 生成時嵌入圖片 | + +### 可移除的檔案及原因 + +| 檔案 | 原因 | +|-----|------| +| `*_output.md` | 前端移除下載按鈕後無使用場景 | +| `*_pp_structure_raw.json` | 純 Debug 用途,生產環境不需要 | +| `*_debug_summary.json` | 純 Debug 用途,生產環境不需要 | +| `*_pp_structure_viz.png` | 純 Debug 用途,生產環境不需要 | + +## 設定說明 + +### 後端設定 (.env.local) + +```bash +# Debug 檔案生成(預設關閉) +PP_STRUCTURE_DEBUG_ENABLED=false +PP_STRUCTURE_DEBUG_VISUALIZATION=false + +# 如需開啟 debug 檔案生成 +PP_STRUCTURE_DEBUG_ENABLED=true +PP_STRUCTURE_DEBUG_VISUALIZATION=true +``` + +### 前端設定 + +無需額外設定,移除下載按鈕後自動生效。 + +## 向後相容性 + +1. **API 端點** - `GET /download/markdown` 可保留但返回 404 或棄用訊息 +2. **資料庫欄位** - `result_markdown_path` 欄位保留,但新任務不再寫入 +3. **舊任務** - 已存在的 Markdown 檔案不受影響,仍可下載 + +## Implementation Plan + +1. 後端:修改 config.py 預設值(關閉 debug) +2. 後端:修改 unified_document_exporter.py 停止生成 Markdown +3. 後端:修改 ocr_service.py save_results() 不生成 Markdown +4. 後端:新增 visualization 下載端點 +5. 後端:更新 TaskDetail response 加入 has_visualization +6. 前端:移除 TaskHistoryPage Markdown 下載按鈕 +7. 前端:移除 ResultsPage Markdown 下載按鈕 +8. 前端:移除 apiV2.ts downloadMarkdown 方法 +9. 前端:新增 visualization 下載功能 +10. 測試並驗證 diff --git a/openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/tasks.md b/openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/tasks.md new file mode 100644 index 0000000..c4a934f --- /dev/null +++ b/openspec/changes/archive/2025-12-12-optimize-task-files-and-visualization/tasks.md @@ -0,0 +1,68 @@ +# Tasks: 優化任務檔案生成與視覺化下載 + +## 1. 後端設定優化 + +- [x] 1.1 修改 `config.py` debug 預設值 + - `pp_structure_debug_enabled`: `True` → `False` + - `pp_structure_debug_visualization`: `True` → `False` + +## 2. 後端 Visualization 下載 API + +- [x] 2.1 在 `tasks.py` 新增 visualization 下載端點 + - `GET /api/v2/tasks/{task_id}/download/visualization` + - 檢查 visualization 資料夾是否存在 + - 打包資料夾內所有 PNG 為 ZIP + - 返回 StreamingResponse (application/zip) + +- [x] 2.2 在 TaskDetail response 中加入 `has_visualization` 欄位 + - 檢查 task result directory 下是否有 visualization 資料夾 + - 回傳 boolean 值 + +## 3. 前端 Visualization 下載功能 + +- [x] 3.1 在 `types/apiV2.ts` 更新 TaskDetail type + - 新增 `has_visualization?: boolean` + +- [x] 3.2 在 `apiV2.ts` 新增下載方法 + - `downloadVisualization(taskId: string): Promise` + +- [x] 3.3 在 `TaskDetailPage.tsx` 新增下載按鈕 + - 只有 `has_visualization = true` 時顯示 + - 點擊後下載 ZIP 檔案 + +## 4. 停止生成 Markdown 檔案 + +- [x] 4.1 修改 `ocr_service.py` 的 `save_results()` 方法 + - 移除 Markdown 檔案生成 + - 返回值中 `markdown_path` 始終為 `None` + +- [x] 4.2 修改 `unified_document_exporter.py` + - `export_all()`: 移除 Markdown 導出 + - `export_formats()`: 移除 Markdown 支援 + +- [x] 4.3 前端 TaskHistoryPage.tsx 移除 JSON/MD 下載按鈕 + - 改為版面 PDF 和流式 PDF 兩個下載按鈕 + +## 5. 確保 raw_ocr_regions.json 正常生成 + +- [x] 5.1 將 `raw_ocr_regions.json` 生成從 debug 區塊分離 + - 獨立於 `pp_structure_debug_enabled` 設定 + - 此檔案為 PDF 生成和翻譯服務所必需 + +- [x] 5.2 在 `pp_structure_debug.py` 新增 `save_debug_results()` 方法 + - 只保存純 debug 檔案(`_pp_structure_raw.json`, `_debug_summary.json`) + - 不再重複保存 `_raw_ocr_regions.json` + +## 6. Bug 修復 + +- [x] 6.1 修復 Processing 頁面不切換到新任務的問題 + - 在 `useTaskValidation.ts` 中加入 taskId 變化時重置 `isNotFound` 的邏輯 + +## 7. 測試與驗證 + +- [x] 7.1 驗證 TypeScript 編譯通過 +- [ ] 7.2 驗證 `*_raw_ocr_regions.json` 仍正常生成 +- [ ] 7.3 驗證 visualization 資料夾仍正常生成 +- [ ] 7.4 測試 visualization 下載功能 +- [ ] 7.5 驗證 PDF 內容正常顯示 +- [ ] 7.6 驗證新任務上傳後 Processing 頁面正確切換