From d20751d56bc07ed356629d16ddd15410a772c606 Mon Sep 17 00:00:00 2001 From: egg Date: Fri, 12 Dec 2025 17:05:16 +0800 Subject: [PATCH] feat: add batch processing for multiple file uploads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add BatchState management in taskStore with progress tracking - Implement batch processing service with concurrency control - Direct Track: max 5 parallel tasks - OCR Track: sequential processing (GPU VRAM limit) - Refactor ProcessingPage to support batch mode with BatchProcessingPanel - Update UploadPage to initialize batch state for multi-file uploads - Add i18n translations for batch processing (zh-TW, en-US) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../src/components/BatchProcessingPanel.tsx | 435 ++++++++++++++++++ frontend/src/i18n/locales/en-US.json | 35 +- frontend/src/i18n/locales/zh-TW.json | 35 +- frontend/src/pages/ProcessingPage.tsx | 21 +- frontend/src/pages/UploadPage.tsx | 18 +- frontend/src/services/batchProcessing.ts | 324 +++++++++++++ frontend/src/store/taskStore.ts | 324 ++++++++++++- .../proposal.md | 43 ++ .../specs/frontend-ui/spec.md | 100 ++++ .../2025-12-12-add-batch-processing/tasks.md | 42 ++ openspec/specs/frontend-ui/spec.md | 97 ++++ 11 files changed, 1469 insertions(+), 5 deletions(-) create mode 100644 frontend/src/components/BatchProcessingPanel.tsx create mode 100644 frontend/src/services/batchProcessing.ts create mode 100644 openspec/changes/archive/2025-12-12-add-batch-processing/proposal.md create mode 100644 openspec/changes/archive/2025-12-12-add-batch-processing/specs/frontend-ui/spec.md create mode 100644 openspec/changes/archive/2025-12-12-add-batch-processing/tasks.md diff --git a/frontend/src/components/BatchProcessingPanel.tsx b/frontend/src/components/BatchProcessingPanel.tsx new file mode 100644 index 0000000..8ca6d72 --- /dev/null +++ b/frontend/src/components/BatchProcessingPanel.tsx @@ -0,0 +1,435 @@ +/** + * BatchProcessingPanel - Batch processing UI component + * + * Displays batch processing settings, task list, and progress for multi-file processing. + */ + +import { useState, useEffect, useCallback } from 'react' +import { useNavigate } from 'react-router-dom' +import { useTranslation } from 'react-i18next' +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import { Progress } from '@/components/ui/progress' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' +import { useToast } from '@/components/ui/toast' +import { + Play, + Pause, + CheckCircle, + XCircle, + FileText, + Loader2, + Settings, + ListChecks, + Zap, + Eye, + RotateCcw, +} from 'lucide-react' +import { + useBatchState, + useBatchProgress, + useBatchOptions, + useTaskStore, + type BatchStrategy, +} from '@/store/taskStore' +import { analyzeBatchTasks, processBatch, cancelBatch } from '@/services/batchProcessing' +import type { LayoutModel, PreprocessingMode } from '@/types/apiV2' + +export default function BatchProcessingPanel() { + const { t } = useTranslation() + const navigate = useNavigate() + const { toast } = useToast() + + const batchState = useBatchState() + const progress = useBatchProgress() + const batchOptions = useBatchOptions() + const { setBatchOptions, clearBatch } = useTaskStore() + + const [isAnalyzing, setIsAnalyzing] = useState(false) + const [isProcessing, setIsProcessing] = useState(false) + + // Calculate progress percentage + const progressPercentage = + progress.total > 0 ? Math.round(((progress.completed + progress.failed) / progress.total) * 100) : 0 + + // Analyze all tasks when component mounts + useEffect(() => { + const runAnalysis = async () => { + if (batchState.taskIds.length > 0 && !isAnalyzing) { + // Check if any task needs analysis + const needsAnalysis = batchState.taskIds.some( + (taskId) => !batchState.taskStates[taskId]?.recommendedTrack + ) + if (needsAnalysis) { + setIsAnalyzing(true) + try { + await analyzeBatchTasks(batchState.taskIds) + } finally { + setIsAnalyzing(false) + } + } + } + } + runAnalysis() + }, [batchState.taskIds.length]) // Only run once when tasks are loaded + + // Handle start processing + const handleStartProcessing = useCallback(async () => { + setIsProcessing(true) + try { + await processBatch() + toast({ + title: t('batch.processingComplete', { defaultValue: '批次處理完成' }), + description: t('batch.processingCompleteDesc', { + defaultValue: `已完成 ${progress.completed} 個任務`, + completed: progress.completed, + }), + variant: 'success', + }) + } catch (error) { + toast({ + title: t('batch.processingError', { defaultValue: '批次處理錯誤' }), + description: error instanceof Error ? error.message : '未知錯誤', + variant: 'destructive', + }) + } finally { + setIsProcessing(false) + } + }, [progress.completed, t, toast]) + + // Handle cancel processing + const handleCancelProcessing = useCallback(async () => { + try { + await cancelBatch() + toast({ + title: t('batch.processingCancelled', { defaultValue: '批次處理已取消' }), + variant: 'default', + }) + } catch (error) { + console.error('Cancel error:', error) + } + }, [t, toast]) + + // Handle clear batch and go to upload + const handleClearAndUpload = useCallback(() => { + clearBatch() + navigate('/upload') + }, [clearBatch, navigate]) + + // Handle view results + const handleViewResults = useCallback(() => { + navigate('/tasks') + }, [navigate]) + + // Get task status icon + const getStatusIcon = (status: string) => { + switch (status) { + case 'completed': + return + case 'processing': + return + case 'failed': + return + default: + return + } + } + + // Get status badge + const getStatusBadge = (status: string) => { + switch (status) { + case 'completed': + return {t('processing.completed', { defaultValue: '已完成' })} + case 'processing': + return {t('processing.processing', { defaultValue: '處理中' })} + case 'failed': + return {t('processing.failed', { defaultValue: '失敗' })} + default: + return {t('processing.pending', { defaultValue: '等待中' })} + } + } + + // Get track badge + const getTrackBadge = (track: string | null) => { + if (!track) return null + const label = track === 'direct' ? 'Direct' : track === 'ocr' ? 'OCR' : track.toUpperCase() + const variant = track === 'direct' ? 'outline' : 'secondary' + return {label} + } + + const allCompleted = progress.completed + progress.failed === progress.total && progress.total > 0 + const canStart = !isAnalyzing && !isProcessing && !batchState.isProcessing && progress.pending > 0 + + return ( +
+ {/* Page Header */} +
+
+
+

{t('batch.title', { defaultValue: '批次處理' })}

+

+ {t('batch.subtitle', { + defaultValue: '共 {{count}} 個檔案', + count: progress.total, + })} +

+
+ {allCompleted && ( +
+ + + {t('batch.allComplete', { defaultValue: '全部完成' })} + +
+ )} +
+
+ + {/* Overall Progress */} + + +
+
+
+ +
+ {t('batch.progress', { defaultValue: '批次進度' })} +
+
+ {progress.completed > 0 && ( + + {t('batch.completed', { defaultValue: '已完成' })}: {progress.completed} + + )} + {progress.processing > 0 && ( + + {t('batch.processing', { defaultValue: '處理中' })}: {progress.processing} + + )} + {progress.failed > 0 && ( + + {t('batch.failed', { defaultValue: '失敗' })}: {progress.failed} + + )} + {progress.pending > 0 && ( + + {t('batch.pending', { defaultValue: '等待中' })}: {progress.pending} + + )} +
+
+
+ + {/* Progress bar */} +
+
+ + {t('batch.overallProgress', { defaultValue: '整體進度' })} + + {progressPercentage}% +
+ +
+ + {/* Action buttons */} +
+ {!allCompleted && canStart && ( + + )} + + {(isProcessing || batchState.isProcessing) && ( + + )} + + {allCompleted && ( + <> + + + + )} + + {isAnalyzing && ( +
+ + {t('batch.analyzing', { defaultValue: '分析文件中...' })} +
+ )} +
+
+
+ + {/* Batch Settings */} + + +
+
+ +
+ {t('batch.settings', { defaultValue: '批次設定' })} +
+
+ + {/* Processing Strategy */} +
+ +
+ {[ + { value: 'auto', label: t('batch.strategyAuto', { defaultValue: '自動判斷' }), desc: '系統自動選擇最佳處理方式' }, + { value: 'force_ocr', label: t('batch.strategyOcr', { defaultValue: '全部 OCR' }), desc: '強制使用 OCR 處理' }, + { value: 'force_direct', label: t('batch.strategyDirect', { defaultValue: '全部 Direct' }), desc: '強制使用 Direct 處理' }, + ].map((option) => ( + + ))} +
+
+ + {/* Layout Model (for OCR) */} + {batchOptions.strategy !== 'force_direct' && ( +
+ +
+ {[ + { value: 'chinese', label: 'Chinese', desc: '中文文件最佳' }, + { value: 'default', label: 'Default', desc: '英文學術論文' }, + { value: 'cdla', label: 'CDLA', desc: '中文版面分析' }, + ].map((option) => ( + + ))} +
+
+ )} + + {/* Preprocessing Mode (for OCR) */} + {batchOptions.strategy !== 'force_direct' && ( +
+ +
+ {[ + { value: 'auto', label: t('batch.preprocessAuto', { defaultValue: '自動' }), desc: '自動分析並套用' }, + { value: 'manual', label: t('batch.preprocessManual', { defaultValue: '手動' }), desc: '使用預設配置' }, + { value: 'disabled', label: t('batch.preprocessDisabled', { defaultValue: '停用' }), desc: '不進行預處理' }, + ].map((option) => ( + + ))} +
+
+ )} + + {/* Concurrency Info */} +
+
+ + + {t('batch.concurrencyInfo', { + defaultValue: 'Direct Track 最多 5 並行處理,OCR Track 依序處理 (GPU 限制)', + })} + +
+
+
+
+ + {/* Task List */} + + +
+
+ +
+ {t('batch.taskList', { defaultValue: '任務列表' })} +
+
+ +
+ {batchState.taskIds.map((taskId) => { + const taskState = batchState.taskStates[taskId] + if (!taskState) return null + + return ( +
+ {/* Status icon */} +
{getStatusIcon(taskState.status)}
+ + {/* File info */} +
+

+ {taskState.filename || taskId} +

+ {taskState.error && ( +

{taskState.error}

+ )} +
+ + {/* Track badge */} +
+ {getTrackBadge(taskState.track || taskState.recommendedTrack)} +
+ + {/* Status badge */} +
{getStatusBadge(taskState.status)}
+
+ ) + })} +
+
+
+
+ ) +} diff --git a/frontend/src/i18n/locales/en-US.json b/frontend/src/i18n/locales/en-US.json index 4020eb6..ce7d35e 100644 --- a/frontend/src/i18n/locales/en-US.json +++ b/frontend/src/i18n/locales/en-US.json @@ -40,7 +40,8 @@ "fileCount": "{{count}} file(s) selected", "clearAll": "Clear All", "removeFile": "Remove", - "selectedFiles": "Selected Files" + "selectedFiles": "Selected Files", + "filesUploaded": "Successfully uploaded {{count}} file(s)" }, "processing": { "title": "OCR Processing", @@ -234,5 +235,37 @@ "title": "Translation", "comingSoon": "Coming Soon", "description": "Document translation feature is under development" + }, + "batch": { + "title": "Batch Processing", + "subtitle": "{{count}} files in total", + "progress": "Batch Progress", + "overallProgress": "Overall Progress", + "settings": "Batch Settings", + "taskList": "Task List", + "strategy": "Processing Strategy", + "strategyAuto": "Auto Detect", + "strategyOcr": "All OCR", + "strategyDirect": "All Direct", + "layoutModel": "Layout Model (OCR)", + "preprocessingMode": "Preprocessing Mode (OCR)", + "preprocessAuto": "Auto", + "preprocessManual": "Manual", + "preprocessDisabled": "Disabled", + "startProcessing": "Start Batch Processing", + "cancelProcessing": "Cancel Processing", + "viewResults": "View Results", + "uploadMore": "Upload More", + "analyzing": "Analyzing documents...", + "allComplete": "All Complete", + "completed": "Completed", + "processing": "Processing", + "failed": "Failed", + "pending": "Pending", + "processingComplete": "Batch Processing Complete", + "processingCompleteDesc": "Completed {{completed}} task(s)", + "processingError": "Batch Processing Error", + "processingCancelled": "Batch Processing Cancelled", + "concurrencyInfo": "Direct Track: max 5 parallel, OCR Track: sequential (GPU limitation)" } } diff --git a/frontend/src/i18n/locales/zh-TW.json b/frontend/src/i18n/locales/zh-TW.json index 4c27486..d477de6 100644 --- a/frontend/src/i18n/locales/zh-TW.json +++ b/frontend/src/i18n/locales/zh-TW.json @@ -40,7 +40,8 @@ "fileCount": "已選擇 {{count}} 個檔案", "clearAll": "清除全部", "removeFile": "移除", - "selectedFiles": "已選擇的檔案" + "selectedFiles": "已選擇的檔案", + "filesUploaded": "成功上傳 {{count}} 個檔案" }, "processing": { "title": "OCR 處理中", @@ -234,5 +235,37 @@ "title": "翻譯功能", "comingSoon": "即將推出", "description": "文件翻譯功能正在開發中,敬請期待" + }, + "batch": { + "title": "批次處理", + "subtitle": "共 {{count}} 個檔案", + "progress": "批次進度", + "overallProgress": "整體進度", + "settings": "批次設定", + "taskList": "任務列表", + "strategy": "處理策略", + "strategyAuto": "自動判斷", + "strategyOcr": "全部 OCR", + "strategyDirect": "全部 Direct", + "layoutModel": "版面模型 (OCR)", + "preprocessingMode": "預處理模式 (OCR)", + "preprocessAuto": "自動", + "preprocessManual": "手動", + "preprocessDisabled": "停用", + "startProcessing": "開始批次處理", + "cancelProcessing": "取消處理", + "viewResults": "查看結果", + "uploadMore": "上傳更多", + "analyzing": "分析文件中...", + "allComplete": "全部完成", + "completed": "已完成", + "processing": "處理中", + "failed": "失敗", + "pending": "等待中", + "processingComplete": "批次處理完成", + "processingCompleteDesc": "已完成 {{completed}} 個任務", + "processingError": "批次處理錯誤", + "processingCancelled": "批次處理已取消", + "concurrencyInfo": "Direct Track 最多 5 並行處理,OCR Track 依序處理 (GPU 限制)" } } diff --git a/frontend/src/pages/ProcessingPage.tsx b/frontend/src/pages/ProcessingPage.tsx index 0cadcf7..d5eb9e2 100644 --- a/frontend/src/pages/ProcessingPage.tsx +++ b/frontend/src/pages/ProcessingPage.tsx @@ -14,11 +14,30 @@ import PreprocessingSettings from '@/components/PreprocessingSettings' import PreprocessingPreview from '@/components/PreprocessingPreview' import ProcessingTrackSelector from '@/components/ProcessingTrackSelector' import TaskNotFound from '@/components/TaskNotFound' +import BatchProcessingPanel from '@/components/BatchProcessingPanel' import { useTaskValidation } from '@/hooks/useTaskValidation' -import { useTaskStore, useProcessingState } from '@/store/taskStore' +import { useTaskStore, useProcessingState, useIsBatchMode } from '@/store/taskStore' import type { LayoutModel, ProcessingOptions, PreprocessingMode, PreprocessingConfig, ProcessingTrack } from '@/types/apiV2' +/** + * ProcessingPage - Main entry point + * Routes to batch or single task processing based on state + */ export default function ProcessingPage() { + const isBatchMode = useIsBatchMode() + + // Route to appropriate component + if (isBatchMode) { + return + } + + return +} + +/** + * SingleTaskProcessing - Original single task processing UI + */ +function SingleTaskProcessing() { const { t } = useTranslation() const navigate = useNavigate() const { toast } = useToast() diff --git a/frontend/src/pages/UploadPage.tsx b/frontend/src/pages/UploadPage.tsx index 5bb10de..94684c8 100644 --- a/frontend/src/pages/UploadPage.tsx +++ b/frontend/src/pages/UploadPage.tsx @@ -7,6 +7,7 @@ import { Button } from '@/components/ui/button' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { useToast } from '@/components/ui/toast' import { useUploadStore } from '@/store/uploadStore' +import { useTaskStore } from '@/store/taskStore' import { apiClientV2 } from '@/services/apiV2' import { FileText, X, Upload, Trash2, CheckCircle2, ArrowRight } from 'lucide-react' @@ -16,6 +17,8 @@ export default function UploadPage() { const { toast } = useToast() const [selectedFiles, setSelectedFiles] = useState([]) const { setBatchId, setUploadProgress } = useUploadStore() + const initBatch = useTaskStore((state) => state.initBatch) + const setCurrentTask = useTaskStore((state) => state.setCurrentTask) const uploadMutation = useMutation({ mutationFn: async (files: File[]) => { @@ -33,9 +36,22 @@ export default function UploadPage() { if (tasks.length > 0) { setBatchId(tasks[0].task_id as unknown as number) } + + // Initialize batch state with all uploaded tasks + if (tasks.length > 1) { + // Multiple files: use batch mode + initBatch(tasks.map(t => ({ + taskId: t.task_id, + filename: t.filename, + }))) + } else if (tasks.length === 1) { + // Single file: use single task mode + setCurrentTask(tasks[0].task_id, tasks[0].filename) + } + toast({ title: t('upload.uploadSuccess'), - description: `成功上傳 ${tasks.length} 個檔案`, + description: t('upload.filesUploaded', { count: tasks.length }), variant: 'success', }) navigate('/processing') diff --git a/frontend/src/services/batchProcessing.ts b/frontend/src/services/batchProcessing.ts new file mode 100644 index 0000000..4ad8820 --- /dev/null +++ b/frontend/src/services/batchProcessing.ts @@ -0,0 +1,324 @@ +/** + * Batch Processing Service + * + * Handles batch processing of multiple tasks with parallel execution for Direct Track + * and queue processing for OCR Track (GPU VRAM limitation). + * + * Concurrency limits: + * - Direct Track: Max 5 concurrent tasks (CPU-bound) + * - OCR Track: Max 1 concurrent task (GPU VRAM limited) + */ + +import { apiClientV2 } from '@/services/apiV2' +import { useTaskStore, type BatchTaskState, type BatchStrategy } from '@/store/taskStore' +import type { ProcessingTrack, ProcessingOptions } from '@/types/apiV2' + +// Concurrency limits +const DIRECT_TRACK_CONCURRENCY = 5 +const OCR_TRACK_CONCURRENCY = 1 + +// Polling interval for task status (ms) +const POLL_INTERVAL = 2000 + +/** + * Analyze all tasks in batch to determine recommended processing track + */ +export async function analyzeBatchTasks(taskIds: string[]): Promise { + const { updateBatchTaskState, setBatchAnalyzing } = useTaskStore.getState() + + setBatchAnalyzing(true) + + try { + // Analyze all tasks in parallel (analysis is lightweight) + const analysisPromises = taskIds.map(async (taskId) => { + try { + const result = await apiClientV2.analyzeDocument(taskId) + updateBatchTaskState(taskId, { + recommendedTrack: result.recommended_track, + analysisResult: result, + }) + return { taskId, success: true, result } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Analysis failed' + updateBatchTaskState(taskId, { + error: errorMessage, + }) + return { taskId, success: false, error: errorMessage } + } + }) + + await Promise.all(analysisPromises) + } finally { + setBatchAnalyzing(false) + } +} + +/** + * Determine the actual track to use based on strategy and recommendation + */ +function determineTrack( + strategy: BatchStrategy, + recommendedTrack: ProcessingTrack | null +): ProcessingTrack { + switch (strategy) { + case 'force_ocr': + return 'ocr' + case 'force_direct': + return 'direct' + case 'auto': + default: + // Use recommended track from analysis, fallback to 'auto' + return recommendedTrack || 'auto' + } +} + +/** + * Build processing options for a task + */ +function buildProcessingOptions( + track: ProcessingTrack, + batchOptions: { + layoutModel: 'chinese' | 'default' | 'cdla' + preprocessingMode: 'auto' | 'manual' | 'disabled' + language: string + } +): ProcessingOptions { + const options: ProcessingOptions = { + use_dual_track: true, + force_track: track, + language: batchOptions.language, + } + + // Only add OCR-specific options for OCR track + if (track === 'ocr') { + options.layout_model = batchOptions.layoutModel + options.preprocessing_mode = batchOptions.preprocessingMode + } + + return options +} + +/** + * Process a single task and poll for completion + */ +async function processTask( + taskId: string, + options: ProcessingOptions, + onStatusUpdate: (status: BatchTaskState['status'], error?: string) => void +): Promise { + try { + // Start processing + onStatusUpdate('processing') + await apiClientV2.startTask(taskId, options) + + // Poll for completion + while (true) { + await sleep(POLL_INTERVAL) + + const task = await apiClientV2.getTask(taskId) + + if (task.status === 'completed') { + onStatusUpdate('completed') + return true + } + + if (task.status === 'failed') { + onStatusUpdate('failed', task.error_message || 'Processing failed') + return false + } + + // Still processing, continue polling + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : 'Unknown error' + onStatusUpdate('failed', errorMessage) + return false + } +} + +/** + * Sleep utility + */ +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +/** + * Process tasks with concurrency limit + */ +async function processWithConcurrency( + tasks: Array<{ taskId: string; options: ProcessingOptions }>, + concurrency: number, + onTaskUpdate: (taskId: string, status: BatchTaskState['status'], error?: string) => void +): Promise { + const queue = [...tasks] + const running: Promise[] = [] + + while (queue.length > 0 || running.length > 0) { + // Start new tasks up to concurrency limit + while (running.length < concurrency && queue.length > 0) { + const task = queue.shift()! + const promise = processTask(task.taskId, task.options, (status, error) => { + onTaskUpdate(task.taskId, status, error) + }).then(() => { + // Remove from running when done + const index = running.indexOf(promise) + if (index > -1) { + running.splice(index, 1) + } + }) + running.push(promise) + } + + // Wait for at least one task to complete if we're at capacity + if (running.length >= concurrency || (queue.length === 0 && running.length > 0)) { + await Promise.race(running) + } + } +} + +/** + * Main batch processing function + * Processes Direct Track tasks in parallel (max 5) and OCR Track tasks sequentially (max 1) + */ +export async function processBatch(): Promise { + const store = useTaskStore.getState() + const { batchState, updateBatchTaskState, startBatchProcessing, stopBatchProcessing } = store + + if (!batchState.isActive || batchState.taskIds.length === 0) { + console.warn('No batch to process') + return + } + + // Start batch processing + startBatchProcessing() + + const { taskIds, taskStates, processingOptions } = batchState + + // Separate tasks by track + const directTasks: Array<{ taskId: string; options: ProcessingOptions }> = [] + const ocrTasks: Array<{ taskId: string; options: ProcessingOptions }> = [] + + for (const taskId of taskIds) { + const taskState = taskStates[taskId] + if (!taskState || taskState.status !== 'pending') continue + + const track = determineTrack( + processingOptions.strategy, + taskState.recommendedTrack + ) + + const options = buildProcessingOptions(track, processingOptions) + + // Update task with determined track + updateBatchTaskState(taskId, { track }) + + if (track === 'direct') { + directTasks.push({ taskId, options }) + } else { + // OCR, hybrid, or auto all go through OCR queue + ocrTasks.push({ taskId, options }) + } + } + + const onTaskUpdate = (taskId: string, status: BatchTaskState['status'], error?: string) => { + const updates: Partial = { status } + if (status === 'processing') { + updates.startedAt = new Date().toISOString() + } else if (status === 'completed' || status === 'failed') { + updates.completedAt = new Date().toISOString() + if (error) updates.error = error + } + updateBatchTaskState(taskId, updates) + } + + try { + // Process Direct and OCR tracks concurrently + // Direct: up to 5 parallel + // OCR: sequential (1 at a time) + await Promise.all([ + processWithConcurrency(directTasks, DIRECT_TRACK_CONCURRENCY, onTaskUpdate), + processWithConcurrency(ocrTasks, OCR_TRACK_CONCURRENCY, onTaskUpdate), + ]) + } finally { + stopBatchProcessing() + } +} + +/** + * Cancel batch processing + * Note: This cancels pending tasks but cannot cancel already-running tasks + */ +export async function cancelBatch(): Promise { + const store = useTaskStore.getState() + const { batchState, updateBatchTaskState, stopBatchProcessing } = store + + // Cancel all processing tasks + for (const taskId of batchState.taskIds) { + const taskState = batchState.taskStates[taskId] + if (taskState?.status === 'processing') { + try { + await apiClientV2.cancelTask(taskId) + updateBatchTaskState(taskId, { + status: 'failed', + error: 'Cancelled by user', + completedAt: new Date().toISOString(), + }) + } catch (error) { + console.error(`Failed to cancel task ${taskId}:`, error) + } + } + } + + stopBatchProcessing() +} + +/** + * Get batch processing summary + */ +export function getBatchSummary(): { + total: number + directCount: number + ocrCount: number + pendingCount: number + completedCount: number + failedCount: number +} { + const { batchState } = useTaskStore.getState() + const { taskIds, taskStates } = batchState + + let directCount = 0 + let ocrCount = 0 + let pendingCount = 0 + let completedCount = 0 + let failedCount = 0 + + for (const taskId of taskIds) { + const taskState = taskStates[taskId] + if (!taskState) continue + + if (taskState.track === 'direct') directCount++ + else if (taskState.track === 'ocr') ocrCount++ + + switch (taskState.status) { + case 'pending': + pendingCount++ + break + case 'completed': + completedCount++ + break + case 'failed': + failedCount++ + break + } + } + + return { + total: taskIds.length, + directCount, + ocrCount, + pendingCount, + completedCount, + failedCount, + } +} diff --git a/frontend/src/store/taskStore.ts b/frontend/src/store/taskStore.ts index 324b40c..d56999f 100644 --- a/frontend/src/store/taskStore.ts +++ b/frontend/src/store/taskStore.ts @@ -1,6 +1,6 @@ import { create } from 'zustand' import { persist } from 'zustand/middleware' -import type { Task, TaskStatus, ProcessingTrack, ProcessingOptions } from '@/types/apiV2' +import type { Task, TaskStatus, ProcessingTrack, ProcessingOptions, DocumentAnalysisResponse } from '@/types/apiV2' /** * Processing state for tracking ongoing operations @@ -12,6 +12,76 @@ export interface ProcessingState { options: ProcessingOptions | null } +// ==================== Batch Processing Types ==================== + +/** + * Processing strategy for batch operations + */ +export type BatchStrategy = 'auto' | 'force_ocr' | 'force_direct' + +/** + * Individual task state within a batch + */ +export interface BatchTaskState { + taskId: string + filename: string | null + status: TaskStatus + track: ProcessingTrack | null + recommendedTrack: ProcessingTrack | null + analysisResult: DocumentAnalysisResponse | null + error: string | null + startedAt: string | null + completedAt: string | null +} + +/** + * Batch progress tracking + */ +export interface BatchProgress { + total: number + pending: number + analyzing: number + processing: number + completed: number + failed: number +} + +/** + * Batch processing options (unified settings for all tasks) + */ +export interface BatchProcessingOptions { + strategy: BatchStrategy + layoutModel: 'chinese' | 'default' | 'cdla' + preprocessingMode: 'auto' | 'manual' | 'disabled' + language: string +} + +/** + * Batch state for managing multiple tasks + */ +export interface BatchState { + // Whether batch mode is active + isActive: boolean + + // All task IDs in this batch + taskIds: string[] + + // State for each task + taskStates: Record + + // Progress summary + progress: BatchProgress + + // Unified processing options + processingOptions: BatchProcessingOptions + + // Batch processing status + isProcessing: boolean + isAnalyzing: boolean + startedAt: string | null + completedAt: string | null +} + /** * Cached task info for quick display without API calls */ @@ -23,6 +93,43 @@ export interface CachedTask { processingTrack?: ProcessingTrack } +/** + * Default batch processing options + */ +const defaultBatchProcessingOptions: BatchProcessingOptions = { + strategy: 'auto', + layoutModel: 'chinese', + preprocessingMode: 'auto', + language: 'ch', +} + +/** + * Default batch progress + */ +const defaultBatchProgress: BatchProgress = { + total: 0, + pending: 0, + analyzing: 0, + processing: 0, + completed: 0, + failed: 0, +} + +/** + * Default batch state + */ +const defaultBatchState: BatchState = { + isActive: false, + taskIds: [], + taskStates: {}, + progress: defaultBatchProgress, + processingOptions: defaultBatchProcessingOptions, + isProcessing: false, + isAnalyzing: false, + startedAt: null, + completedAt: null, +} + /** * Task Store State * Centralized state management for task operations @@ -37,6 +144,9 @@ interface TaskState { // Recently accessed tasks cache (max 20) recentTasks: CachedTask[] + // Batch processing state + batchState: BatchState + // Actions setCurrentTask: (taskId: string | null, filename?: string | null) => void clearCurrentTask: () => void @@ -53,6 +163,16 @@ interface TaskState { // Get cached task getCachedTask: (taskId: string) => CachedTask | undefined + + // Batch processing actions + initBatch: (tasks: Array<{ taskId: string; filename: string | null }>) => void + clearBatch: () => void + setBatchOptions: (options: Partial) => void + updateBatchTaskState: (taskId: string, updates: Partial) => void + setBatchAnalyzing: (isAnalyzing: boolean) => void + startBatchProcessing: () => void + stopBatchProcessing: () => void + recalculateBatchProgress: () => void } /** @@ -76,6 +196,7 @@ export const useTaskStore = create()( options: null, }, recentTasks: [], + batchState: { ...defaultBatchState }, // Set current task setCurrentTask: (taskId, filename) => { @@ -207,6 +328,179 @@ export const useTaskStore = create()( getCachedTask: (taskId) => { return get().recentTasks.find(t => t.taskId === taskId) }, + + // ==================== Batch Processing Actions ==================== + + // Initialize batch with uploaded tasks + initBatch: (tasks) => { + const taskIds = tasks.map(t => t.taskId) + const taskStates: Record = {} + + for (const task of tasks) { + taskStates[task.taskId] = { + taskId: task.taskId, + filename: task.filename, + status: 'pending', + track: null, + recommendedTrack: null, + analysisResult: null, + error: null, + startedAt: null, + completedAt: null, + } + } + + set({ + batchState: { + isActive: true, + taskIds, + taskStates, + progress: { + total: tasks.length, + pending: tasks.length, + analyzing: 0, + processing: 0, + completed: 0, + failed: 0, + }, + processingOptions: { ...defaultBatchProcessingOptions }, + isProcessing: false, + isAnalyzing: false, + startedAt: null, + completedAt: null, + }, + }) + }, + + // Clear batch state + clearBatch: () => { + set({ batchState: { ...defaultBatchState } }) + }, + + // Update batch processing options + setBatchOptions: (options) => { + set((state) => ({ + batchState: { + ...state.batchState, + processingOptions: { + ...state.batchState.processingOptions, + ...options, + }, + }, + })) + }, + + // Update individual task state within batch + updateBatchTaskState: (taskId, updates) => { + set((state) => { + const currentTask = state.batchState.taskStates[taskId] + if (!currentTask) return state + + const updatedTaskStates = { + ...state.batchState.taskStates, + [taskId]: { + ...currentTask, + ...updates, + }, + } + + return { + batchState: { + ...state.batchState, + taskStates: updatedTaskStates, + }, + } + }) + + // Recalculate progress after update + get().recalculateBatchProgress() + }, + + // Set batch analyzing state + setBatchAnalyzing: (isAnalyzing) => { + set((state) => ({ + batchState: { + ...state.batchState, + isAnalyzing, + }, + })) + }, + + // Start batch processing + startBatchProcessing: () => { + set((state) => ({ + batchState: { + ...state.batchState, + isProcessing: true, + startedAt: new Date().toISOString(), + completedAt: null, + }, + })) + }, + + // Stop batch processing + stopBatchProcessing: () => { + set((state) => ({ + batchState: { + ...state.batchState, + isProcessing: false, + completedAt: new Date().toISOString(), + }, + })) + }, + + // Recalculate batch progress from task states + recalculateBatchProgress: () => { + set((state) => { + const { taskStates, taskIds } = state.batchState + let pending = 0 + let analyzing = 0 + let processing = 0 + let completed = 0 + let failed = 0 + + for (const taskId of taskIds) { + const taskState = taskStates[taskId] + if (!taskState) continue + + switch (taskState.status) { + case 'pending': + // Check if we're analyzing (has recommendedTrack means analyzed) + if (taskState.recommendedTrack !== null) { + pending++ + } else if (state.batchState.isAnalyzing) { + analyzing++ + } else { + pending++ + } + break + case 'processing': + processing++ + break + case 'completed': + completed++ + break + case 'failed': + failed++ + break + } + } + + return { + batchState: { + ...state.batchState, + progress: { + total: taskIds.length, + pending, + analyzing, + processing, + completed, + failed, + }, + }, + } + }) + }, }), { name: 'tool-ocr-task-store', @@ -236,3 +530,31 @@ export function useCurrentTask() { export function useProcessingState() { return useTaskStore((state) => state.processingState) } + +/** + * Helper hook for batch state + */ +export function useBatchState() { + return useTaskStore((state) => state.batchState) +} + +/** + * Helper hook for batch progress + */ +export function useBatchProgress() { + return useTaskStore((state) => state.batchState.progress) +} + +/** + * Helper hook for batch processing options + */ +export function useBatchOptions() { + return useTaskStore((state) => state.batchState.processingOptions) +} + +/** + * Helper hook to check if batch mode is active + */ +export function useIsBatchMode() { + return useTaskStore((state) => state.batchState.isActive && state.batchState.taskIds.length > 1) +} diff --git a/openspec/changes/archive/2025-12-12-add-batch-processing/proposal.md b/openspec/changes/archive/2025-12-12-add-batch-processing/proposal.md new file mode 100644 index 0000000..13acf31 --- /dev/null +++ b/openspec/changes/archive/2025-12-12-add-batch-processing/proposal.md @@ -0,0 +1,43 @@ +# Change: 新增批次處理功能 + +## Why + +目前系統支援批次上傳多個檔案,但處理時需要使用者逐一點選每個任務進行處理。這對於大量文件的處理場景非常不便。需要新增批次處理功能,讓使用者可以一次設定並啟動所有上傳的任務。 + +## What Changes + +### 1. 批次狀態管理 +- 擴展 taskStore 支援批次任務追蹤 +- 新增批次進度狀態(總數、已完成、處理中、失敗) +- 儲存批次統一設定 + +### 2. 批次處理邏輯 +- 上傳完成後分析所有檔案決定處理軌道 +- 根據軌道類型分流處理: + - Direct Track:最多 5 個並行(CPU 運算) + - OCR Track:單一佇列(GPU VRAM 限制) +- 兩類任務可同時進行 + +### 3. 批次設定 UI +- 修改 ProcessingPage 支援多任務模式 +- 統一設定介面: + - 處理策略(自動判斷/強制 OCR/強制 Direct) + - Layout Model(OCR 專用) + - 預處理模式(OCR 專用) +- 批次進度顯示(整體進度 + 各任務狀態) + +### 4. 處理策略 +- **自動判斷**(推薦):系統分析每個檔案後自動選擇最佳 track +- **全部 OCR**:強制所有檔案使用 OCR track +- **全部 Direct**:強制所有 PDF 使用 Direct track + +## Impact + +- Affected specs: frontend-ui (修改) +- Affected code: + - `frontend/src/store/taskStore.ts` - 擴展批次狀態 + - `frontend/src/pages/ProcessingPage.tsx` - 支援多任務處理 + - `frontend/src/pages/UploadPage.tsx` - 傳遞多任務 ID + - `frontend/src/services/apiV2.ts` - 新增批次處理輔助函數 + - `frontend/src/i18n/locales/*.json` - 新增翻譯 +- 後端無需改動(利用現有 API) diff --git a/openspec/changes/archive/2025-12-12-add-batch-processing/specs/frontend-ui/spec.md b/openspec/changes/archive/2025-12-12-add-batch-processing/specs/frontend-ui/spec.md new file mode 100644 index 0000000..1fa95fe --- /dev/null +++ b/openspec/changes/archive/2025-12-12-add-batch-processing/specs/frontend-ui/spec.md @@ -0,0 +1,100 @@ +# Frontend UI Specification - Batch Processing + +## ADDED Requirements + +### Requirement: Batch Processing Support + +The system SHALL support batch processing of multiple uploaded files with a single configuration. + +After uploading multiple files, the user SHALL be able to: +- Configure processing settings once for all files +- Start processing all files with one action +- Monitor progress of all files in a unified view + +#### Scenario: Multiple files uploaded +- **WHEN** user uploads multiple files +- **AND** navigates to processing page +- **THEN** the system displays batch processing mode +- **AND** shows all pending tasks in a list + +#### Scenario: Batch configuration +- **WHEN** user is in batch processing mode +- **THEN** user can select a processing strategy (auto/OCR/Direct) +- **AND** user can configure layout model for OCR tasks +- **AND** user can configure preprocessing for OCR tasks +- **AND** settings apply to all applicable tasks + +--- + +### Requirement: Batch Processing Strategy + +The system SHALL support three batch processing strategies: + +1. **Auto Detection** (default): System analyzes each file and selects optimal track +2. **Force OCR**: All files processed with OCR track +3. **Force Direct**: All PDF files processed with Direct track + +#### Scenario: Auto detection strategy +- **WHEN** user selects auto detection strategy +- **THEN** the system analyzes each file before processing +- **AND** assigns OCR or Direct track based on file characteristics + +#### Scenario: Force OCR strategy +- **WHEN** user selects force OCR strategy +- **THEN** all files are processed using OCR track +- **AND** layout model and preprocessing settings are applied + +#### Scenario: Force Direct strategy +- **WHEN** user selects force Direct strategy +- **AND** file is a PDF +- **THEN** the file is processed using Direct track + +--- + +### Requirement: Parallel Processing Limits + +The system SHALL enforce different parallelism limits based on processing track: + +- Direct Track: Maximum 5 concurrent tasks (CPU-based) +- OCR Track: Maximum 1 concurrent task (GPU VRAM constraint) + +Direct and OCR tasks MAY run simultaneously as they use different resources. + +#### Scenario: Direct track parallelism +- **WHEN** batch contains multiple Direct track tasks +- **THEN** up to 5 tasks process concurrently +- **AND** remaining tasks wait in queue + +#### Scenario: OCR track serialization +- **WHEN** batch contains multiple OCR track tasks +- **THEN** only 1 task processes at a time +- **AND** remaining tasks wait in queue + +#### Scenario: Mixed track processing +- **WHEN** batch contains both Direct and OCR tasks +- **THEN** Direct tasks run in parallel pool (max 5) +- **AND** OCR tasks run in serial queue (max 1) +- **AND** both pools operate simultaneously + +--- + +### Requirement: Batch Progress Display + +The system SHALL display unified progress for batch processing. + +Progress display SHALL include: +- Overall progress (completed / total) +- Count by status (processing, completed, failed) +- Individual task status list +- Estimated time remaining (optional) + +#### Scenario: Batch progress monitoring +- **WHEN** batch processing is in progress +- **THEN** user sees overall completion percentage +- **AND** user sees count of tasks in each status +- **AND** user sees status of each individual task + +#### Scenario: Batch completion +- **WHEN** all tasks in batch are completed or failed +- **THEN** user sees final summary +- **AND** user can navigate to results page diff --git a/openspec/changes/archive/2025-12-12-add-batch-processing/tasks.md b/openspec/changes/archive/2025-12-12-add-batch-processing/tasks.md new file mode 100644 index 0000000..8f6af21 --- /dev/null +++ b/openspec/changes/archive/2025-12-12-add-batch-processing/tasks.md @@ -0,0 +1,42 @@ +# Tasks: 新增批次處理功能 + +## 1. 批次狀態管理 + +- [x] 1.1 擴展 taskStore 新增批次狀態介面(BatchState) +- [x] 1.2 實作批次任務追蹤(taskIds、taskStates) +- [x] 1.3 實作批次進度計算(total、completed、processing、failed) +- [x] 1.4 實作批次設定儲存(processingOptions) + +## 2. 批次處理邏輯 + +- [x] 2.1 新增批次分析函數(分析所有任務決定 track) +- [x] 2.2 實作 Direct Track 並行處理(最多 5 並行) +- [x] 2.3 實作 OCR Track 佇列處理(單一佇列) +- [x] 2.4 實作混合模式處理(Direct 和 OCR 同時進行) +- [x] 2.5 實作任務狀態輪詢與更新 + +## 3. 上傳頁面調整 + +- [x] 3.1 修改 UploadPage 上傳完成後儲存所有 taskIds +- [x] 3.2 導航至 ProcessingPage 時傳遞批次模式標記 + +## 4. 處理頁面重構 + +- [x] 4.1 修改 ProcessingPage 支援批次模式 +- [x] 4.2 新增批次設定區塊(策略選擇、統一設定) +- [x] 4.3 新增批次進度顯示元件 +- [x] 4.4 新增任務列表顯示(各任務狀態) +- [x] 4.5 實作批次開始處理按鈕 + +## 5. i18n 翻譯 + +- [x] 5.1 新增批次處理相關中文翻譯 +- [x] 5.2 新增批次處理相關英文翻譯 + +## 6. 測試與驗證 + +- [x] 6.1 測試單檔案處理(向下相容) +- [x] 6.2 測試多檔案 Direct Track 並行 +- [x] 6.3 測試多檔案 OCR Track 佇列 +- [x] 6.4 測試混合模式處理 +- [x] 6.5 驗證 TypeScript 編譯通過 diff --git a/openspec/specs/frontend-ui/spec.md b/openspec/specs/frontend-ui/spec.md index 8c35d36..980442b 100644 --- a/openspec/specs/frontend-ui/spec.md +++ b/openspec/specs/frontend-ui/spec.md @@ -89,3 +89,100 @@ The application MAY display: - **WHEN** any page describes product features - **THEN** the descriptions are factual and verifiable +### Requirement: Batch Processing Support + +The system SHALL support batch processing of multiple uploaded files with a single configuration. + +After uploading multiple files, the user SHALL be able to: +- Configure processing settings once for all files +- Start processing all files with one action +- Monitor progress of all files in a unified view + +#### Scenario: Multiple files uploaded +- **WHEN** user uploads multiple files +- **AND** navigates to processing page +- **THEN** the system displays batch processing mode +- **AND** shows all pending tasks in a list + +#### Scenario: Batch configuration +- **WHEN** user is in batch processing mode +- **THEN** user can select a processing strategy (auto/OCR/Direct) +- **AND** user can configure layout model for OCR tasks +- **AND** user can configure preprocessing for OCR tasks +- **AND** settings apply to all applicable tasks + +--- + +### Requirement: Batch Processing Strategy + +The system SHALL support three batch processing strategies: + +1. **Auto Detection** (default): System analyzes each file and selects optimal track +2. **Force OCR**: All files processed with OCR track +3. **Force Direct**: All PDF files processed with Direct track + +#### Scenario: Auto detection strategy +- **WHEN** user selects auto detection strategy +- **THEN** the system analyzes each file before processing +- **AND** assigns OCR or Direct track based on file characteristics + +#### Scenario: Force OCR strategy +- **WHEN** user selects force OCR strategy +- **THEN** all files are processed using OCR track +- **AND** layout model and preprocessing settings are applied + +#### Scenario: Force Direct strategy +- **WHEN** user selects force Direct strategy +- **AND** file is a PDF +- **THEN** the file is processed using Direct track + +--- + +### Requirement: Parallel Processing Limits + +The system SHALL enforce different parallelism limits based on processing track: + +- Direct Track: Maximum 5 concurrent tasks (CPU-based) +- OCR Track: Maximum 1 concurrent task (GPU VRAM constraint) + +Direct and OCR tasks MAY run simultaneously as they use different resources. + +#### Scenario: Direct track parallelism +- **WHEN** batch contains multiple Direct track tasks +- **THEN** up to 5 tasks process concurrently +- **AND** remaining tasks wait in queue + +#### Scenario: OCR track serialization +- **WHEN** batch contains multiple OCR track tasks +- **THEN** only 1 task processes at a time +- **AND** remaining tasks wait in queue + +#### Scenario: Mixed track processing +- **WHEN** batch contains both Direct and OCR tasks +- **THEN** Direct tasks run in parallel pool (max 5) +- **AND** OCR tasks run in serial queue (max 1) +- **AND** both pools operate simultaneously + +--- + +### Requirement: Batch Progress Display + +The system SHALL display unified progress for batch processing. + +Progress display SHALL include: +- Overall progress (completed / total) +- Count by status (processing, completed, failed) +- Individual task status list +- Estimated time remaining (optional) + +#### Scenario: Batch progress monitoring +- **WHEN** batch processing is in progress +- **THEN** user sees overall completion percentage +- **AND** user sees count of tasks in each status +- **AND** user sees status of each individual task + +#### Scenario: Batch completion +- **WHEN** all tasks in batch are completed or failed +- **THEN** user sees final summary +- **AND** user can navigate to results page +