feat: refactor dual-track architecture (Phase 1-5)

## Backend Changes
- **Service Layer Refactoring**:
  - Add ProcessingOrchestrator for unified document processing
  - Add PDFTableRenderer for table rendering extraction
  - Add PDFFontManager for font management with CJK support
  - Add MemoryPolicyEngine (73% code reduction from MemoryGuard)

- **Bug Fixes**:
  - Fix Direct Track table row span calculation
  - Fix OCR Track image path handling
  - Add cell_boxes coordinate validation
  - Filter out small decorative images
  - Add covering image detection

## Frontend Changes
- **State Management**:
  - Add TaskStore for centralized task state management
  - Add localStorage persistence for recent tasks
  - Add processing state tracking

- **Type Consolidation**:
  - Merge shared types from api.ts to apiV2.ts
  - Update imports in authStore, uploadStore, ResultsTable, SettingsPage

- **Page Integration**:
  - Integrate TaskStore in ProcessingPage and TaskDetailPage
  - Update useTaskValidation hook with cache sync

## Testing
- Direct Track: edit.pdf (3 pages, 1.281s), edit3.pdf (2 pages, 0.203s)
- Cell boxes validation: 43 valid, 0 invalid
- Table merging: 12 merged cells verified

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-07 07:18:27 +08:00
parent 8265be1741
commit eff9b0bcd5
19 changed files with 3637 additions and 173 deletions

View File

@@ -2,7 +2,7 @@ import { useTranslation } from 'react-i18next'
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from '@/components/ui/table'
import { Badge } from '@/components/ui/badge'
import { Button } from '@/components/ui/button'
import type { FileResult } from '@/types/api'
import type { FileResult } from '@/types/apiV2'
interface ResultsTableProps {
files: FileResult[]

View File

@@ -1,6 +1,7 @@
import { useEffect, useState } from 'react'
import { useQuery } from '@tanstack/react-query'
import { useUploadStore } from '@/store/uploadStore'
import { useTaskStore } from '@/store/taskStore'
import { apiClientV2 } from '@/services/apiV2'
import type { TaskDetail } from '@/types/apiV2'
@@ -15,13 +16,21 @@ interface UseTaskValidationResult {
/**
* Hook for validating task existence and handling deleted tasks gracefully.
* Shows loading state first, then either returns task data or marks as not found.
*
* This hook integrates with both uploadStore (legacy) and taskStore (new).
* The taskId is sourced from uploadStore.batchId for backward compatibility,
* while task metadata is synced to taskStore for caching and state management.
*/
export function useTaskValidation(options?: {
refetchInterval?: number | false | ((query: any) => number | false)
}): UseTaskValidationResult {
// Legacy: Get taskId from uploadStore
const { batchId, clearUpload } = useUploadStore()
const taskId = batchId ? String(batchId) : null
// New: Use taskStore for caching and state management
const { updateTaskCache, removeFromCache, clearCurrentTask } = useTaskStore()
const [isNotFound, setIsNotFound] = useState(false)
const { data: taskDetail, isLoading, error, isFetching } = useQuery({
@@ -40,16 +49,27 @@ export function useTaskValidation(options?: {
staleTime: 0,
})
// Handle 404 error - mark as not found immediately
// Sync task details to taskStore cache when data changes
useEffect(() => {
if (taskDetail) {
updateTaskCache(taskDetail)
}
}, [taskDetail, updateTaskCache])
// Handle 404 error - mark as not found and clean up cache
useEffect(() => {
if (error && (error as any)?.response?.status === 404) {
setIsNotFound(true)
if (taskId) {
removeFromCache(taskId)
}
}
}, [error])
}, [error, taskId, removeFromCache])
// Clear state and store
const clearAndReset = () => {
clearUpload()
clearUpload() // Legacy store
clearCurrentTask() // New store
setIsNotFound(false)
}

View File

@@ -16,6 +16,7 @@ import TableDetectionSelector from '@/components/TableDetectionSelector'
import ProcessingTrackSelector from '@/components/ProcessingTrackSelector'
import TaskNotFound from '@/components/TaskNotFound'
import { useTaskValidation } from '@/hooks/useTaskValidation'
import { useTaskStore, useProcessingState } from '@/store/taskStore'
import type { LayoutModel, ProcessingOptions, PreprocessingMode, PreprocessingConfig, TableDetectionConfig, ProcessingTrack } from '@/types/apiV2'
export default function ProcessingPage() {
@@ -23,6 +24,10 @@ export default function ProcessingPage() {
const navigate = useNavigate()
const { toast } = useToast()
// Use TaskStore for processing state management
const { startProcessing, stopProcessing, updateTaskStatus } = useTaskStore()
const processingState = useProcessingState()
// Use shared hook for task validation
const { taskId, taskDetail, isLoading: isValidating, isNotFound, clearAndReset } = useTaskValidation({
refetchInterval: (query) => {
@@ -93,9 +98,16 @@ export default function ProcessingPage() {
table_detection: tableDetectionConfig,
}
// Update TaskStore processing state
startProcessing(forceTrack, options)
return apiClientV2.startTask(taskId!, options)
},
onSuccess: () => {
// Update task status in cache
if (taskId) {
updateTaskStatus(taskId, 'processing', forceTrack || undefined)
}
toast({
title: '開始處理',
description: 'OCR 處理已開始',
@@ -103,6 +115,8 @@ export default function ProcessingPage() {
})
},
onError: (error: any) => {
// Stop processing state on error
stopProcessing()
toast({
title: t('errors.processingFailed'),
description: error.response?.data?.detail || t('errors.networkError'),
@@ -111,14 +125,25 @@ export default function ProcessingPage() {
},
})
// Auto-redirect when completed
// Handle task status changes - update store and redirect when completed
useEffect(() => {
if (taskDetail?.status === 'completed') {
// Stop processing state and update cache
stopProcessing()
if (taskId) {
updateTaskStatus(taskId, 'completed', taskDetail.processing_track)
}
setTimeout(() => {
navigate('/tasks')
}, 1000)
} else if (taskDetail?.status === 'failed') {
// Stop processing state on failure
stopProcessing()
if (taskId) {
updateTaskStatus(taskId, 'failed')
}
}
}, [taskDetail?.status, navigate])
}, [taskDetail?.status, taskDetail?.processing_track, taskId, navigate, stopProcessing, updateTaskStatus])
const handleStartProcessing = () => {
processOCRMutation.mutate()

View File

@@ -5,7 +5,7 @@ import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { useToast } from '@/components/ui/toast'
import { apiClient } from '@/services/api'
import type { ExportRule } from '@/types/api'
import type { ExportRule } from '@/types/apiV2'
export default function SettingsPage() {
const { t } = useTranslation()

View File

@@ -7,6 +7,7 @@ import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import PDFViewer from '@/components/PDFViewer'
import { useToast } from '@/components/ui/toast'
import { apiClientV2 } from '@/services/apiV2'
import { useTaskStore } from '@/store/taskStore'
import {
FileText,
Download,
@@ -63,6 +64,9 @@ export default function TaskDetailPage() {
const { toast } = useToast()
const queryClient = useQueryClient()
// TaskStore for caching
const { updateTaskCache } = useTaskStore()
// Translation state
const [targetLang, setTargetLang] = useState('en')
const [isTranslating, setIsTranslating] = useState(false)
@@ -84,6 +88,13 @@ export default function TaskDetailPage() {
},
})
// Sync task details to TaskStore cache
useEffect(() => {
if (taskDetail) {
updateTaskCache(taskDetail)
}
}, [taskDetail, updateTaskCache])
// Get processing metadata for completed tasks
const { data: processingMetadata } = useQuery({
queryKey: ['processingMetadata', taskId],

View File

@@ -13,8 +13,6 @@ import type { AxiosInstance } from 'axios'
import type {
LoginRequest,
ApiError,
} from '@/types/api'
import type {
LoginResponseV2,
UserInfo,
TaskCreate,

View File

@@ -1,6 +1,6 @@
import { create } from 'zustand'
import { persist } from 'zustand/middleware'
import type { User } from '@/types/api'
import type { User } from '@/types/apiV2'
interface AuthState {
user: User | null

View File

@@ -0,0 +1,234 @@
import { create } from 'zustand'
import { persist } from 'zustand/middleware'
import type { Task, TaskStatus, ProcessingTrack, ProcessingOptions } from '@/types/apiV2'
/**
* Processing state for tracking ongoing operations
*/
export interface ProcessingState {
isProcessing: boolean
startedAt: string | null
track: ProcessingTrack | null
options: ProcessingOptions | null
}
/**
* Cached task info for quick display without API calls
*/
export interface CachedTask {
taskId: string
filename: string | null
status: TaskStatus
updatedAt: string
processingTrack?: ProcessingTrack
}
/**
* Task Store State
* Centralized state management for task operations
*/
interface TaskState {
// Current active task
currentTaskId: string | null
// Processing state for current task
processingState: ProcessingState
// Recently accessed tasks cache (max 20)
recentTasks: CachedTask[]
// Actions
setCurrentTask: (taskId: string | null, filename?: string | null) => void
clearCurrentTask: () => void
// Processing state actions
startProcessing: (track: ProcessingTrack | null, options?: ProcessingOptions) => void
stopProcessing: () => void
// Cache management
updateTaskCache: (task: Task | CachedTask) => void
updateTaskStatus: (taskId: string, status: TaskStatus, track?: ProcessingTrack) => void
removeFromCache: (taskId: string) => void
clearCache: () => void
// Get cached task
getCachedTask: (taskId: string) => CachedTask | undefined
}
/**
* Maximum number of recent tasks to cache
*/
const MAX_RECENT_TASKS = 20
/**
* Task Store
* Manages task state with localStorage persistence
*/
export const useTaskStore = create<TaskState>()(
persist(
(set, get) => ({
// Initial state
currentTaskId: null,
processingState: {
isProcessing: false,
startedAt: null,
track: null,
options: null,
},
recentTasks: [],
// Set current task
setCurrentTask: (taskId, filename) => {
set({ currentTaskId: taskId })
// Add to cache if we have task info
if (taskId && filename !== undefined) {
const existing = get().recentTasks.find(t => t.taskId === taskId)
if (!existing) {
get().updateTaskCache({
taskId,
filename,
status: 'pending',
updatedAt: new Date().toISOString(),
})
}
}
},
// Clear current task
clearCurrentTask: () => {
set({
currentTaskId: null,
processingState: {
isProcessing: false,
startedAt: null,
track: null,
options: null,
},
})
},
// Start processing
startProcessing: (track, options) => {
set({
processingState: {
isProcessing: true,
startedAt: new Date().toISOString(),
track,
options: options || null,
},
})
// Update cache status
const currentTaskId = get().currentTaskId
if (currentTaskId) {
get().updateTaskStatus(currentTaskId, 'processing', track || undefined)
}
},
// Stop processing
stopProcessing: () => {
set((state) => ({
processingState: {
...state.processingState,
isProcessing: false,
},
}))
},
// Update task in cache
updateTaskCache: (task) => {
set((state) => {
const taskId = 'task_id' in task ? task.task_id : task.taskId
const cached: CachedTask = {
taskId,
filename: task.filename || null,
status: task.status,
updatedAt: new Date().toISOString(),
processingTrack: 'processing_track' in task ? task.processing_track : task.processingTrack,
}
// Remove existing entry if present
const filtered = state.recentTasks.filter(t => t.taskId !== taskId)
// Add to front and limit size
const updated = [cached, ...filtered].slice(0, MAX_RECENT_TASKS)
return { recentTasks: updated }
})
},
// Update task status in cache
updateTaskStatus: (taskId, status, track) => {
set((state) => {
const updated = state.recentTasks.map(t => {
if (t.taskId === taskId) {
return {
...t,
status,
processingTrack: track || t.processingTrack,
updatedAt: new Date().toISOString(),
}
}
return t
})
return { recentTasks: updated }
})
},
// Remove task from cache
removeFromCache: (taskId) => {
set((state) => ({
recentTasks: state.recentTasks.filter(t => t.taskId !== taskId),
// Also clear current task if it matches
currentTaskId: state.currentTaskId === taskId ? null : state.currentTaskId,
}))
},
// Clear all cached tasks
clearCache: () => {
set({
recentTasks: [],
currentTaskId: null,
processingState: {
isProcessing: false,
startedAt: null,
track: null,
options: null,
},
})
},
// Get cached task by ID
getCachedTask: (taskId) => {
return get().recentTasks.find(t => t.taskId === taskId)
},
}),
{
name: 'tool-ocr-task-store',
// Only persist essential state, not processing state
partialize: (state) => ({
currentTaskId: state.currentTaskId,
recentTasks: state.recentTasks,
}),
}
)
)
/**
* Helper hook to get current task from cache
*/
export function useCurrentTask() {
const currentTaskId = useTaskStore((state) => state.currentTaskId)
const recentTasks = useTaskStore((state) => state.recentTasks)
if (!currentTaskId) return null
return recentTasks.find(t => t.taskId === currentTaskId) || null
}
/**
* Helper hook for processing state
*/
export function useProcessingState() {
return useTaskStore((state) => state.processingState)
}

View File

@@ -1,6 +1,6 @@
import { create } from 'zustand'
import { persist } from 'zustand/middleware'
import type { FileInfo } from '@/types/api'
import type { FileInfo } from '@/types/apiV2'
interface UploadState {
batchId: number | null

View File

@@ -374,3 +374,102 @@ export interface TranslationResult {
statistics: TranslationStatistics
translations: Record<string, any>
}
// ==================== Shared Types (from api.ts) ====================
/**
* Authentication request for login
*/
export interface LoginRequest {
username: string
password: string
}
/**
* Legacy login response (V1 API)
* @deprecated Use LoginResponseV2 for V2 API
*/
export interface LoginResponse {
access_token: string
token_type: string
expires_in: number
}
/**
* User information (used by authStore)
*/
export interface User {
id: number
username: string
email?: string
displayName?: string | null
}
/**
* File information for upload tracking
*/
export interface FileInfo {
id: number
filename: string
file_size: number
file_format: string
status: 'pending' | 'processing' | 'completed' | 'failed'
}
/**
* File result for batch processing display
*/
export interface FileResult {
id: number
filename: string
status: 'pending' | 'processing' | 'completed' | 'failed'
processing_time?: number
error?: string
}
/**
* Export configuration rule
*/
export interface ExportRule {
id: number
rule_name: string
config_json: Record<string, any>
css_template?: string
created_at: string
}
/**
* Export request options
*/
export interface ExportRequest {
batch_id: number
format: 'txt' | 'json' | 'excel' | 'markdown' | 'pdf'
rule_id?: number
options?: ExportOptions
}
/**
* Export additional options
*/
export interface ExportOptions {
confidence_threshold?: number
include_metadata?: boolean
filename_pattern?: string
css_template?: string
}
/**
* CSS template for export styling
*/
export interface CSSTemplate {
name: string
description: string
}
/**
* API error response
*/
export interface ApiError {
detail: string
status_code: number
}