feat: add batch processing for multiple file uploads
- Add BatchState management in taskStore with progress tracking - Implement batch processing service with concurrency control - Direct Track: max 5 parallel tasks - OCR Track: sequential processing (GPU VRAM limit) - Refactor ProcessingPage to support batch mode with BatchProcessingPanel - Update UploadPage to initialize batch state for multi-file uploads - Add i18n translations for batch processing (zh-TW, en-US) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
324
frontend/src/services/batchProcessing.ts
Normal file
324
frontend/src/services/batchProcessing.ts
Normal file
@@ -0,0 +1,324 @@
|
||||
/**
|
||||
* Batch Processing Service
|
||||
*
|
||||
* Handles batch processing of multiple tasks with parallel execution for Direct Track
|
||||
* and queue processing for OCR Track (GPU VRAM limitation).
|
||||
*
|
||||
* Concurrency limits:
|
||||
* - Direct Track: Max 5 concurrent tasks (CPU-bound)
|
||||
* - OCR Track: Max 1 concurrent task (GPU VRAM limited)
|
||||
*/
|
||||
|
||||
import { apiClientV2 } from '@/services/apiV2'
|
||||
import { useTaskStore, type BatchTaskState, type BatchStrategy } from '@/store/taskStore'
|
||||
import type { ProcessingTrack, ProcessingOptions } from '@/types/apiV2'
|
||||
|
||||
// Concurrency limits
|
||||
const DIRECT_TRACK_CONCURRENCY = 5
|
||||
const OCR_TRACK_CONCURRENCY = 1
|
||||
|
||||
// Polling interval for task status (ms)
|
||||
const POLL_INTERVAL = 2000
|
||||
|
||||
/**
|
||||
* Analyze all tasks in batch to determine recommended processing track
|
||||
*/
|
||||
export async function analyzeBatchTasks(taskIds: string[]): Promise<void> {
|
||||
const { updateBatchTaskState, setBatchAnalyzing } = useTaskStore.getState()
|
||||
|
||||
setBatchAnalyzing(true)
|
||||
|
||||
try {
|
||||
// Analyze all tasks in parallel (analysis is lightweight)
|
||||
const analysisPromises = taskIds.map(async (taskId) => {
|
||||
try {
|
||||
const result = await apiClientV2.analyzeDocument(taskId)
|
||||
updateBatchTaskState(taskId, {
|
||||
recommendedTrack: result.recommended_track,
|
||||
analysisResult: result,
|
||||
})
|
||||
return { taskId, success: true, result }
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'Analysis failed'
|
||||
updateBatchTaskState(taskId, {
|
||||
error: errorMessage,
|
||||
})
|
||||
return { taskId, success: false, error: errorMessage }
|
||||
}
|
||||
})
|
||||
|
||||
await Promise.all(analysisPromises)
|
||||
} finally {
|
||||
setBatchAnalyzing(false)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the actual track to use based on strategy and recommendation
|
||||
*/
|
||||
function determineTrack(
|
||||
strategy: BatchStrategy,
|
||||
recommendedTrack: ProcessingTrack | null
|
||||
): ProcessingTrack {
|
||||
switch (strategy) {
|
||||
case 'force_ocr':
|
||||
return 'ocr'
|
||||
case 'force_direct':
|
||||
return 'direct'
|
||||
case 'auto':
|
||||
default:
|
||||
// Use recommended track from analysis, fallback to 'auto'
|
||||
return recommendedTrack || 'auto'
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build processing options for a task
|
||||
*/
|
||||
function buildProcessingOptions(
|
||||
track: ProcessingTrack,
|
||||
batchOptions: {
|
||||
layoutModel: 'chinese' | 'default' | 'cdla'
|
||||
preprocessingMode: 'auto' | 'manual' | 'disabled'
|
||||
language: string
|
||||
}
|
||||
): ProcessingOptions {
|
||||
const options: ProcessingOptions = {
|
||||
use_dual_track: true,
|
||||
force_track: track,
|
||||
language: batchOptions.language,
|
||||
}
|
||||
|
||||
// Only add OCR-specific options for OCR track
|
||||
if (track === 'ocr') {
|
||||
options.layout_model = batchOptions.layoutModel
|
||||
options.preprocessing_mode = batchOptions.preprocessingMode
|
||||
}
|
||||
|
||||
return options
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single task and poll for completion
|
||||
*/
|
||||
async function processTask(
|
||||
taskId: string,
|
||||
options: ProcessingOptions,
|
||||
onStatusUpdate: (status: BatchTaskState['status'], error?: string) => void
|
||||
): Promise<boolean> {
|
||||
try {
|
||||
// Start processing
|
||||
onStatusUpdate('processing')
|
||||
await apiClientV2.startTask(taskId, options)
|
||||
|
||||
// Poll for completion
|
||||
while (true) {
|
||||
await sleep(POLL_INTERVAL)
|
||||
|
||||
const task = await apiClientV2.getTask(taskId)
|
||||
|
||||
if (task.status === 'completed') {
|
||||
onStatusUpdate('completed')
|
||||
return true
|
||||
}
|
||||
|
||||
if (task.status === 'failed') {
|
||||
onStatusUpdate('failed', task.error_message || 'Processing failed')
|
||||
return false
|
||||
}
|
||||
|
||||
// Still processing, continue polling
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'Unknown error'
|
||||
onStatusUpdate('failed', errorMessage)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep utility
|
||||
*/
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms))
|
||||
}
|
||||
|
||||
/**
|
||||
* Process tasks with concurrency limit
|
||||
*/
|
||||
async function processWithConcurrency(
|
||||
tasks: Array<{ taskId: string; options: ProcessingOptions }>,
|
||||
concurrency: number,
|
||||
onTaskUpdate: (taskId: string, status: BatchTaskState['status'], error?: string) => void
|
||||
): Promise<void> {
|
||||
const queue = [...tasks]
|
||||
const running: Promise<void>[] = []
|
||||
|
||||
while (queue.length > 0 || running.length > 0) {
|
||||
// Start new tasks up to concurrency limit
|
||||
while (running.length < concurrency && queue.length > 0) {
|
||||
const task = queue.shift()!
|
||||
const promise = processTask(task.taskId, task.options, (status, error) => {
|
||||
onTaskUpdate(task.taskId, status, error)
|
||||
}).then(() => {
|
||||
// Remove from running when done
|
||||
const index = running.indexOf(promise)
|
||||
if (index > -1) {
|
||||
running.splice(index, 1)
|
||||
}
|
||||
})
|
||||
running.push(promise)
|
||||
}
|
||||
|
||||
// Wait for at least one task to complete if we're at capacity
|
||||
if (running.length >= concurrency || (queue.length === 0 && running.length > 0)) {
|
||||
await Promise.race(running)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main batch processing function
|
||||
* Processes Direct Track tasks in parallel (max 5) and OCR Track tasks sequentially (max 1)
|
||||
*/
|
||||
export async function processBatch(): Promise<void> {
|
||||
const store = useTaskStore.getState()
|
||||
const { batchState, updateBatchTaskState, startBatchProcessing, stopBatchProcessing } = store
|
||||
|
||||
if (!batchState.isActive || batchState.taskIds.length === 0) {
|
||||
console.warn('No batch to process')
|
||||
return
|
||||
}
|
||||
|
||||
// Start batch processing
|
||||
startBatchProcessing()
|
||||
|
||||
const { taskIds, taskStates, processingOptions } = batchState
|
||||
|
||||
// Separate tasks by track
|
||||
const directTasks: Array<{ taskId: string; options: ProcessingOptions }> = []
|
||||
const ocrTasks: Array<{ taskId: string; options: ProcessingOptions }> = []
|
||||
|
||||
for (const taskId of taskIds) {
|
||||
const taskState = taskStates[taskId]
|
||||
if (!taskState || taskState.status !== 'pending') continue
|
||||
|
||||
const track = determineTrack(
|
||||
processingOptions.strategy,
|
||||
taskState.recommendedTrack
|
||||
)
|
||||
|
||||
const options = buildProcessingOptions(track, processingOptions)
|
||||
|
||||
// Update task with determined track
|
||||
updateBatchTaskState(taskId, { track })
|
||||
|
||||
if (track === 'direct') {
|
||||
directTasks.push({ taskId, options })
|
||||
} else {
|
||||
// OCR, hybrid, or auto all go through OCR queue
|
||||
ocrTasks.push({ taskId, options })
|
||||
}
|
||||
}
|
||||
|
||||
const onTaskUpdate = (taskId: string, status: BatchTaskState['status'], error?: string) => {
|
||||
const updates: Partial<BatchTaskState> = { status }
|
||||
if (status === 'processing') {
|
||||
updates.startedAt = new Date().toISOString()
|
||||
} else if (status === 'completed' || status === 'failed') {
|
||||
updates.completedAt = new Date().toISOString()
|
||||
if (error) updates.error = error
|
||||
}
|
||||
updateBatchTaskState(taskId, updates)
|
||||
}
|
||||
|
||||
try {
|
||||
// Process Direct and OCR tracks concurrently
|
||||
// Direct: up to 5 parallel
|
||||
// OCR: sequential (1 at a time)
|
||||
await Promise.all([
|
||||
processWithConcurrency(directTasks, DIRECT_TRACK_CONCURRENCY, onTaskUpdate),
|
||||
processWithConcurrency(ocrTasks, OCR_TRACK_CONCURRENCY, onTaskUpdate),
|
||||
])
|
||||
} finally {
|
||||
stopBatchProcessing()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel batch processing
|
||||
* Note: This cancels pending tasks but cannot cancel already-running tasks
|
||||
*/
|
||||
export async function cancelBatch(): Promise<void> {
|
||||
const store = useTaskStore.getState()
|
||||
const { batchState, updateBatchTaskState, stopBatchProcessing } = store
|
||||
|
||||
// Cancel all processing tasks
|
||||
for (const taskId of batchState.taskIds) {
|
||||
const taskState = batchState.taskStates[taskId]
|
||||
if (taskState?.status === 'processing') {
|
||||
try {
|
||||
await apiClientV2.cancelTask(taskId)
|
||||
updateBatchTaskState(taskId, {
|
||||
status: 'failed',
|
||||
error: 'Cancelled by user',
|
||||
completedAt: new Date().toISOString(),
|
||||
})
|
||||
} catch (error) {
|
||||
console.error(`Failed to cancel task ${taskId}:`, error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stopBatchProcessing()
|
||||
}
|
||||
|
||||
/**
|
||||
* Get batch processing summary
|
||||
*/
|
||||
export function getBatchSummary(): {
|
||||
total: number
|
||||
directCount: number
|
||||
ocrCount: number
|
||||
pendingCount: number
|
||||
completedCount: number
|
||||
failedCount: number
|
||||
} {
|
||||
const { batchState } = useTaskStore.getState()
|
||||
const { taskIds, taskStates } = batchState
|
||||
|
||||
let directCount = 0
|
||||
let ocrCount = 0
|
||||
let pendingCount = 0
|
||||
let completedCount = 0
|
||||
let failedCount = 0
|
||||
|
||||
for (const taskId of taskIds) {
|
||||
const taskState = taskStates[taskId]
|
||||
if (!taskState) continue
|
||||
|
||||
if (taskState.track === 'direct') directCount++
|
||||
else if (taskState.track === 'ocr') ocrCount++
|
||||
|
||||
switch (taskState.status) {
|
||||
case 'pending':
|
||||
pendingCount++
|
||||
break
|
||||
case 'completed':
|
||||
completedCount++
|
||||
break
|
||||
case 'failed':
|
||||
failedCount++
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
total: taskIds.length,
|
||||
directCount,
|
||||
ocrCount,
|
||||
pendingCount,
|
||||
completedCount,
|
||||
failedCount,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user