feat: enable document orientation detection for scanned PDFs

- Enable PP-StructureV3's use_doc_orientation_classify feature
- Detect rotation angle from doc_preprocessor_res.angle
- Swap page dimensions (width <-> height) for 90°/270° rotations
- Output PDF now correctly displays landscape-scanned content

Also includes:
- Archive completed openspec proposals
- Add simplify-frontend-ocr-config proposal (pending)
- Code cleanup and frontend simplification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-11 17:13:46 +08:00
parent 57070af307
commit cfe65158a3
58 changed files with 1271 additions and 3048 deletions

View File

@@ -1,358 +0,0 @@
import { useState } from 'react'
import { cn } from '@/lib/utils'
import { Check, ChevronDown, ChevronUp, FileText, Table, Settings, FileEdit, Layers, Cog } from 'lucide-react'
import { useTranslation } from 'react-i18next'
import type { OCRPreset, OCRConfig, TableParsingMode, OCRPresetInfo } from '@/types/apiV2'
interface OCRPresetSelectorProps {
value: OCRPreset
onChange: (preset: OCRPreset) => void
customConfig?: OCRConfig
onCustomConfigChange?: (config: OCRConfig) => void
disabled?: boolean
className?: string
}
// Preset icons mapping
const PRESET_ICONS: Record<OCRPreset, React.ReactNode> = {
text_heavy: <FileText className="w-5 h-5" />,
datasheet: <Table className="w-5 h-5" />,
table_heavy: <Layers className="w-5 h-5" />,
form: <FileEdit className="w-5 h-5" />,
mixed: <Settings className="w-5 h-5" />,
custom: <Cog className="w-5 h-5" />,
}
// Preset configurations (matching backend OCR_PRESET_CONFIGS)
const PRESET_CONFIGS: Record<OCRPreset, OCRConfig> = {
text_heavy: {
table_parsing_mode: 'disabled',
enable_wired_table: false,
enable_wireless_table: false,
enable_chart_recognition: false,
enable_formula_recognition: false,
},
datasheet: {
table_parsing_mode: 'conservative',
enable_wired_table: true,
enable_wireless_table: false,
},
table_heavy: {
table_parsing_mode: 'full',
enable_wired_table: true,
enable_wireless_table: true,
},
form: {
table_parsing_mode: 'conservative',
enable_wired_table: true,
enable_wireless_table: false,
},
mixed: {
table_parsing_mode: 'classification_only',
enable_wired_table: true,
enable_wireless_table: false,
},
custom: {},
}
// Preset info for display
const PRESET_INFO: Record<OCRPreset, { displayName: string; description: string }> = {
text_heavy: {
displayName: '純文字文件',
description: '報告、文章、手冊等以文字為主的文件。禁用表格識別以提高處理速度。',
},
datasheet: {
displayName: '技術規格書',
description: '產品規格書、技術數據表 (TDS)。使用保守模式避免過度分割。',
},
table_heavy: {
displayName: '表格密集文件',
description: '財務報表、試算表。啟用完整表格識別以捕捉所有表格。',
},
form: {
displayName: '表單',
description: '申請表、問卷調查。識別表單欄位但避免過度分割。',
},
mixed: {
displayName: '混合內容',
description: '一般文件。只做表格區域分類,不做細胞分割。',
},
custom: {
displayName: '自訂設定',
description: '進階使用者可自行調整所有 PP-Structure 參數。',
},
}
export default function OCRPresetSelector({
value,
onChange,
customConfig,
onCustomConfigChange,
disabled = false,
className,
}: OCRPresetSelectorProps) {
const { t } = useTranslation()
const [showAdvanced, setShowAdvanced] = useState(false)
const presets: OCRPreset[] = ['datasheet', 'text_heavy', 'table_heavy', 'form', 'mixed', 'custom']
const getPresetInfo = (preset: OCRPreset) => PRESET_INFO[preset]
// Get effective config (preset config merged with custom overrides)
const getEffectiveConfig = (): OCRConfig => {
if (value === 'custom') {
return customConfig || {}
}
return { ...PRESET_CONFIGS[value], ...customConfig }
}
const handleCustomConfigChange = (key: keyof OCRConfig, val: any) => {
if (onCustomConfigChange) {
onCustomConfigChange({
...customConfig,
[key]: val,
})
}
}
return (
<div className={cn('border rounded-lg p-4 bg-white', className)}>
{/* Header */}
<div className="flex items-center justify-between mb-4">
<div className="flex items-center gap-2">
<Settings className="w-5 h-5 text-gray-600" />
<h3 className="text-lg font-semibold text-gray-900">OCR </h3>
</div>
<button
type="button"
onClick={() => setShowAdvanced(!showAdvanced)}
className="text-sm text-blue-600 hover:text-blue-800 flex items-center gap-1"
disabled={disabled}
>
{showAdvanced ? (
<>
<ChevronUp className="w-4 h-4" />
</>
) : (
<>
<ChevronDown className="w-4 h-4" />
</>
)}
</button>
</div>
{/* Preset Grid */}
<div className="grid grid-cols-2 md:grid-cols-3 gap-3">
{presets.map((preset) => {
const info = getPresetInfo(preset)
const isSelected = value === preset
return (
<button
key={preset}
type="button"
disabled={disabled}
onClick={() => onChange(preset)}
className={cn(
'flex flex-col items-center gap-2 p-4 rounded-lg border-2 transition-all text-center',
isSelected
? 'border-blue-500 bg-blue-50'
: 'border-gray-200 hover:border-gray-300 hover:bg-gray-50',
disabled && 'opacity-50 cursor-not-allowed'
)}
>
{/* Icon */}
<div
className={cn(
'p-3 rounded-lg',
isSelected ? 'bg-blue-100 text-blue-600' : 'bg-gray-100 text-gray-500'
)}
>
{PRESET_ICONS[preset]}
</div>
{/* Label */}
<div className="flex items-center gap-1">
<span
className={cn(
'font-medium text-sm',
isSelected ? 'text-blue-700' : 'text-gray-900'
)}
>
{info.displayName}
</span>
{isSelected && <Check className="w-4 h-4 text-blue-600" />}
</div>
{/* Recommended badge */}
{preset === 'datasheet' && (
<span className="text-xs bg-green-100 text-green-700 px-2 py-0.5 rounded-full">
</span>
)}
</button>
)
})}
</div>
{/* Selected Preset Description */}
<div className="mt-4 p-3 bg-gray-50 border border-gray-200 rounded-md">
<p className="text-sm text-gray-700">
<span className="font-medium">{PRESET_INFO[value].displayName}</span>
{PRESET_INFO[value].description}
</p>
</div>
{/* Advanced Settings Panel */}
{showAdvanced && (
<div className="mt-4 p-4 bg-gray-50 border border-gray-200 rounded-lg space-y-4">
<h4 className="font-medium text-gray-900 flex items-center gap-2">
<Cog className="w-4 h-4" />
</h4>
{/* Table Parsing Mode */}
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
</label>
<select
value={getEffectiveConfig().table_parsing_mode || 'conservative'}
onChange={(e) => handleCustomConfigChange('table_parsing_mode', e.target.value as TableParsingMode)}
disabled={disabled || value !== 'custom'}
className={cn(
'w-full px-3 py-2 border border-gray-300 rounded-md text-sm',
(disabled || value !== 'custom') && 'bg-gray-100 cursor-not-allowed'
)}
>
<option value="full"> ()</option>
<option value="conservative"> ()</option>
<option value="classification_only"></option>
<option value="disabled"></option>
</select>
<p className="mt-1 text-xs text-gray-500">
{value !== 'custom' && '選擇「自訂設定」預設以調整此參數'}
</p>
</div>
{/* Table Detection Options */}
<div className="grid grid-cols-2 gap-4">
<label className="flex items-center gap-2">
<input
type="checkbox"
checked={getEffectiveConfig().enable_wired_table ?? true}
onChange={(e) => handleCustomConfigChange('enable_wired_table', e.target.checked)}
disabled={disabled || value !== 'custom'}
className="rounded border-gray-300"
/>
<span className="text-sm text-gray-700"></span>
</label>
<label className="flex items-center gap-2">
<input
type="checkbox"
checked={getEffectiveConfig().enable_wireless_table ?? false}
onChange={(e) => handleCustomConfigChange('enable_wireless_table', e.target.checked)}
disabled={disabled || value !== 'custom'}
className="rounded border-gray-300"
/>
<span className="text-sm text-gray-700"></span>
</label>
</div>
{/* Recognition Modules */}
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
</label>
<div className="grid grid-cols-2 gap-4">
<label className="flex items-center gap-2">
<input
type="checkbox"
checked={getEffectiveConfig().enable_chart_recognition ?? true}
onChange={(e) => handleCustomConfigChange('enable_chart_recognition', e.target.checked)}
disabled={disabled || value !== 'custom'}
className="rounded border-gray-300"
/>
<span className="text-sm text-gray-700"></span>
</label>
<label className="flex items-center gap-2">
<input
type="checkbox"
checked={getEffectiveConfig().enable_formula_recognition ?? true}
onChange={(e) => handleCustomConfigChange('enable_formula_recognition', e.target.checked)}
disabled={disabled || value !== 'custom'}
className="rounded border-gray-300"
/>
<span className="text-sm text-gray-700"></span>
</label>
<label className="flex items-center gap-2">
<input
type="checkbox"
checked={getEffectiveConfig().enable_seal_recognition ?? false}
onChange={(e) => handleCustomConfigChange('enable_seal_recognition', e.target.checked)}
disabled={disabled || value !== 'custom'}
className="rounded border-gray-300"
/>
<span className="text-sm text-gray-700"></span>
</label>
<label className="flex items-center gap-2">
<input
type="checkbox"
checked={getEffectiveConfig().enable_region_detection ?? true}
onChange={(e) => handleCustomConfigChange('enable_region_detection', e.target.checked)}
disabled={disabled || value !== 'custom'}
className="rounded border-gray-300"
/>
<span className="text-sm text-gray-700"></span>
</label>
</div>
</div>
{/* Preprocessing Options */}
<div>
<label className="block text-sm font-medium text-gray-700 mb-2">
</label>
<div className="grid grid-cols-2 gap-4">
<label className="flex items-center gap-2">
<input
type="checkbox"
checked={getEffectiveConfig().use_doc_orientation_classify ?? true}
onChange={(e) => handleCustomConfigChange('use_doc_orientation_classify', e.target.checked)}
disabled={disabled || value !== 'custom'}
className="rounded border-gray-300"
/>
<span className="text-sm text-gray-700"></span>
</label>
<label className="flex items-center gap-2">
<input
type="checkbox"
checked={getEffectiveConfig().use_doc_unwarping ?? false}
onChange={(e) => handleCustomConfigChange('use_doc_unwarping', e.target.checked)}
disabled={disabled || value !== 'custom'}
className="rounded border-gray-300"
/>
<span className="text-sm text-gray-700"></span>
</label>
</div>
</div>
{value !== 'custom' && (
<p className="text-xs text-amber-600 bg-amber-50 p-2 rounded">
調
</p>
)}
</div>
)}
{/* Info Note */}
<div className="mt-4 p-3 bg-blue-50 border border-blue-200 rounded-md">
<p className="text-sm text-blue-800">
PP-Structure
</p>
</div>
</div>
)
}

View File

@@ -1,6 +1,8 @@
import { useState, useCallback, useMemo, useRef, useEffect } from 'react'
import { Document, Page, pdfjs } from 'react-pdf'
import type { PDFDocumentProxy } from 'pdfjs-dist'
// Type alias for PDFDocumentProxy to avoid direct pdfjs-dist import issues
type PDFDocumentProxy = ReturnType<typeof pdfjs.getDocument> extends Promise<infer T> ? T : never
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut, Loader2 } from 'lucide-react'

View File

@@ -1,124 +0,0 @@
import { cn } from '@/lib/utils'
import { Checkbox } from '@/components/ui/checkbox'
import { Table, Grid3X3, Rows3 } from 'lucide-react'
import { useTranslation } from 'react-i18next'
import type { TableDetectionConfig } from '@/types/apiV2'
interface TableDetectionSelectorProps {
value: TableDetectionConfig
onChange: (config: TableDetectionConfig) => void
disabled?: boolean
className?: string
}
interface DetectionOption {
key: keyof TableDetectionConfig
icon: React.ReactNode
labelKey: string
descKey: string
}
const DETECTION_OPTIONS: DetectionOption[] = [
{
key: 'enable_wired_table',
icon: <Grid3X3 className="w-5 h-5" />,
labelKey: 'processing.tableDetection.wired',
descKey: 'processing.tableDetection.wiredDesc',
},
{
key: 'enable_wireless_table',
icon: <Rows3 className="w-5 h-5" />,
labelKey: 'processing.tableDetection.wireless',
descKey: 'processing.tableDetection.wirelessDesc',
},
{
key: 'enable_region_detection',
icon: <Table className="w-5 h-5" />,
labelKey: 'processing.tableDetection.region',
descKey: 'processing.tableDetection.regionDesc',
},
]
export default function TableDetectionSelector({
value,
onChange,
disabled = false,
className,
}: TableDetectionSelectorProps) {
const { t } = useTranslation()
const handleOptionChange = (key: keyof TableDetectionConfig, checked: boolean) => {
onChange({
...value,
[key]: checked,
})
}
return (
<div className={cn('border rounded-lg p-4 bg-white', className)}>
{/* Header */}
<div className="flex items-center gap-2 mb-4">
<Table className="w-5 h-5 text-gray-600" />
<h3 className="text-lg font-semibold text-gray-900">{t('processing.tableDetection.title')}</h3>
</div>
{/* Detection Options */}
<div className="space-y-3">
{DETECTION_OPTIONS.map((option) => {
const isChecked = value[option.key]
return (
<label
key={option.key}
className={cn(
'flex items-start gap-4 p-4 rounded-lg border-2 transition-all cursor-pointer',
isChecked
? 'border-blue-500 bg-blue-50'
: 'border-gray-200 hover:border-gray-300 hover:bg-gray-50',
disabled && 'opacity-50 cursor-not-allowed'
)}
>
{/* Checkbox */}
<Checkbox
checked={isChecked}
onCheckedChange={(checked) => handleOptionChange(option.key, checked)}
disabled={disabled}
className="mt-0.5"
/>
{/* Icon */}
<div
className={cn(
'p-2 rounded-lg flex-shrink-0',
isChecked ? 'bg-blue-100 text-blue-600' : 'bg-gray-100 text-gray-500'
)}
>
{option.icon}
</div>
{/* Content */}
<div className="flex-1 min-w-0">
<span
className={cn(
'font-medium',
isChecked ? 'text-blue-700' : 'text-gray-900'
)}
>
{t(option.labelKey)}
</span>
<p className="text-sm text-gray-500 mt-1">{t(option.descKey)}</p>
</div>
</label>
)
})}
</div>
{/* Info Note */}
<div className="mt-4 p-3 bg-amber-50 border border-amber-200 rounded-md">
<p className="text-sm text-amber-800">
{t('processing.tableDetection.note')}
</p>
</div>
</div>
)
}

View File

@@ -1,5 +1,6 @@
import { useEffect, useState } from 'react'
import { useQuery } from '@tanstack/react-query'
import { AxiosError } from 'axios'
import { useUploadStore } from '@/store/uploadStore'
import { useTaskStore } from '@/store/taskStore'
import { apiClientV2 } from '@/services/apiV2'
@@ -58,7 +59,7 @@ export function useTaskValidation(options?: {
// Handle 404 error - mark as not found and clean up cache
useEffect(() => {
if (error && (error as any)?.response?.status === 404) {
if (error && error instanceof AxiosError && error.response?.status === 404) {
setIsNotFound(true)
if (taskId) {
removeFromCache(taskId)

View File

@@ -29,7 +29,7 @@ import {
TableRow,
} from '@/components/ui/table'
import { Badge } from '@/components/ui/badge'
import { Select } from '@/components/ui/select'
import { NativeSelect } from '@/components/ui/select'
export default function AuditLogsPage() {
const navigate = useNavigate()
@@ -145,9 +145,9 @@ export default function AuditLogsPage() {
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-2"></label>
<Select
<NativeSelect
value={categoryFilter}
onChange={(e) => {
onChange={(e: React.ChangeEvent<HTMLSelectElement>) => {
setCategoryFilter(e.target.value)
handleFilterChange()
}}
@@ -164,9 +164,9 @@ export default function AuditLogsPage() {
<div>
<label className="block text-sm font-medium text-gray-700 mb-2"></label>
<Select
<NativeSelect
value={successFilter}
onChange={(e) => {
onChange={(e: React.ChangeEvent<HTMLSelectElement>) => {
setSuccessFilter(e.target.value)
handleFilterChange()
}}

View File

@@ -1,4 +1,4 @@
import { useState, useEffect } from 'react'
import { useState } from 'react'
import { useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { useQuery } from '@tanstack/react-query'
@@ -13,7 +13,6 @@ import {
FileJson,
FileType,
AlertCircle,
Settings,
CheckCircle2,
ArrowLeft,
Loader2

View File

@@ -12,13 +12,11 @@ import { Play, CheckCircle, FileText, AlertCircle, Clock, Activity, Loader2 } fr
import LayoutModelSelector from '@/components/LayoutModelSelector'
import PreprocessingSettings from '@/components/PreprocessingSettings'
import PreprocessingPreview from '@/components/PreprocessingPreview'
import TableDetectionSelector from '@/components/TableDetectionSelector'
import ProcessingTrackSelector from '@/components/ProcessingTrackSelector'
import OCRPresetSelector from '@/components/OCRPresetSelector'
import TaskNotFound from '@/components/TaskNotFound'
import { useTaskValidation } from '@/hooks/useTaskValidation'
import { useTaskStore, useProcessingState } from '@/store/taskStore'
import type { LayoutModel, ProcessingOptions, PreprocessingMode, PreprocessingConfig, TableDetectionConfig, ProcessingTrack, OCRPreset, OCRConfig } from '@/types/apiV2'
import type { LayoutModel, ProcessingOptions, PreprocessingMode, PreprocessingConfig, ProcessingTrack } from '@/types/apiV2'
export default function ProcessingPage() {
const { t } = useTranslation()
@@ -27,7 +25,9 @@ export default function ProcessingPage() {
// Use TaskStore for processing state management
const { startProcessing, stopProcessing, updateTaskStatus } = useTaskStore()
const processingState = useProcessingState()
// processingState is available for future use (e.g., displaying global processing status)
const _processingState = useProcessingState()
void _processingState // Suppress unused variable warning
// Use shared hook for task validation
const { taskId, taskDetail, isLoading: isValidating, isNotFound, clearAndReset } = useTaskValidation({
@@ -56,20 +56,9 @@ export default function ProcessingPage() {
})
const [showPreview, setShowPreview] = useState(false)
// Table detection state
const [tableDetectionConfig, setTableDetectionConfig] = useState<TableDetectionConfig>({
enable_wired_table: true,
enable_wireless_table: true,
enable_region_detection: true,
})
// Processing track override state (null = use system recommendation)
const [forceTrack, setForceTrack] = useState<ProcessingTrack | null>(null)
// OCR Preset state (default to 'datasheet' for best balance)
const [ocrPreset, setOcrPreset] = useState<OCRPreset>('datasheet')
const [ocrConfig, setOcrConfig] = useState<OCRConfig>({})
// Analyze document to determine if OCR is needed (only for pending tasks)
const { data: documentAnalysis, isLoading: isAnalyzing } = useQuery({
queryKey: ['documentAnalysis', taskId],
@@ -91,6 +80,8 @@ export default function ProcessingPage() {
))
// Start OCR processing
// NOTE: Simple OCR mode - using backend defaults for table/chart/formula recognition
// Only layout_model and preprocessing options are configurable from frontend
const processOCRMutation = useMutation({
mutationFn: () => {
const options: ProcessingOptions = {
@@ -100,9 +91,7 @@ export default function ProcessingPage() {
layout_model: layoutModel,
preprocessing_mode: preprocessingMode,
preprocessing_config: preprocessingMode === 'manual' ? preprocessingConfig : undefined,
table_detection: tableDetectionConfig,
ocr_preset: ocrPreset,
ocr_config: ocrPreset === 'custom' ? ocrConfig : undefined,
// NOTE: table_detection, ocr_preset, ocr_config removed - using backend defaults
}
// Update TaskStore processing state
@@ -448,15 +437,6 @@ export default function ProcessingPage() {
{/* OCR Track Options - Only show when document needs OCR */}
{needsOcrTrack && !isAnalyzing && (
<>
{/* OCR Processing Preset - Primary selection */}
<OCRPresetSelector
value={ocrPreset}
onChange={setOcrPreset}
customConfig={ocrConfig}
onCustomConfigChange={setOcrConfig}
disabled={processOCRMutation.isPending}
/>
{/* Layout Model Selection */}
<LayoutModelSelector
value={layoutModel}
@@ -464,13 +444,6 @@ export default function ProcessingPage() {
disabled={processOCRMutation.isPending}
/>
{/* Table Detection Settings */}
<TableDetectionSelector
value={tableDetectionConfig}
onChange={setTableDetectionConfig}
disabled={processOCRMutation.isPending}
/>
{/* Preprocessing Settings */}
<PreprocessingSettings
mode={preprocessingMode}

View File

@@ -1,7 +1,7 @@
import { useMemo, useState, useEffect } from 'react'
import { useParams, useNavigate } from 'react-router-dom'
import { useTranslation } from 'react-i18next'
import { useQuery, useQueryClient } from '@tanstack/react-query'
import { useQuery } from '@tanstack/react-query'
import { Button } from '@/components/ui/button'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import PDFViewer from '@/components/PDFViewer'
@@ -62,7 +62,6 @@ export default function TaskDetailPage() {
const { t } = useTranslation()
const navigate = useNavigate()
const { toast } = useToast()
const queryClient = useQueryClient()
// TaskStore for caching
const { updateTaskCache } = useTaskStore()

View File

@@ -334,7 +334,7 @@ export default function TaskHistoryPage() {
<NativeSelect
value={statusFilter}
onChange={(e) => {
setStatusFilter(e.target.value as any)
setStatusFilter(e.target.value as TaskStatus | 'all')
handleFilterChange()
}}
options={[

View File

@@ -15,7 +15,7 @@ export default function UploadPage() {
const navigate = useNavigate()
const { toast } = useToast()
const [selectedFiles, setSelectedFiles] = useState<File[]>([])
const { setBatchId, setFiles, setUploadProgress } = useUploadStore()
const { setBatchId, setUploadProgress } = useUploadStore()
const uploadMutation = useMutation({
mutationFn: async (files: File[]) => {
@@ -28,10 +28,10 @@ export default function UploadPage() {
return tasks
},
onSuccess: (tasks) => {
// For now, just use the first task_id as batch_id
// TODO: Update store to handle multiple tasks
// Use the first task_id as the current batch identifier
// Note: Type assertion needed - store expects number but API returns string UUID
if (tasks.length > 0) {
setBatchId(tasks[0].task_id as any) // temporary workaround
setBatchId(tasks[0].task_id as unknown as number)
}
toast({
title: t('upload.uploadSuccess'),

View File

@@ -25,7 +25,6 @@ import type {
SystemStats,
UserWithStats,
TopUser,
AuditLog,
AuditLogListResponse,
UserActivitySummary,
ProcessingOptions,
@@ -55,7 +54,7 @@ class ApiClientV2 {
private token: string | null = null
private userInfo: UserInfo | null = null
private tokenExpiresAt: number | null = null
private refreshTimer: NodeJS.Timeout | null = null
private refreshTimer: ReturnType<typeof setTimeout> | null = null
private isRefreshing: boolean = false
private refreshFailed: boolean = false

View File

@@ -140,12 +140,16 @@ export const useTaskStore = create<TaskState>()(
updateTaskCache: (task) => {
set((state) => {
const taskId = 'task_id' in task ? task.task_id : task.taskId
// Extract processing track from either Task (snake_case) or CachedTask (camelCase)
const track = 'processing_track' in task
? task.processing_track
: ('processingTrack' in task ? task.processingTrack : undefined)
const cached: CachedTask = {
taskId,
filename: task.filename || null,
status: task.status,
updatedAt: new Date().toISOString(),
processingTrack: 'processing_track' in task ? task.processing_track : task.processingTrack,
processingTrack: track,
}
// Remove existing entry if present

View File

@@ -111,71 +111,6 @@ export interface PreprocessingConfig {
remove_scan_artifacts: boolean // Remove horizontal scan line artifacts
}
/**
* Table detection configuration for PP-StructureV3.
* Controls which table detection modes to enable.
* - enable_wired_table: Tables with visible cell borders/grid lines
* - enable_wireless_table: Tables without visible borders
* - enable_region_detection: Detect table-like regions for better cell structure
*/
export interface TableDetectionConfig {
enable_wired_table: boolean
enable_wireless_table: boolean
enable_region_detection: boolean
}
/**
* OCR processing preset for different document types.
* Presets provide optimized PP-Structure configurations for common document types.
*/
export type OCRPreset = 'text_heavy' | 'datasheet' | 'table_heavy' | 'form' | 'mixed' | 'custom'
/**
* Table parsing mode controlling how aggressively tables are parsed.
* - full: Full table recognition with cell segmentation (aggressive)
* - conservative: Disable wireless tables to prevent cell explosion
* - classification_only: Only classify table regions, no cell segmentation
* - disabled: Completely disable table recognition
*/
export type TableParsingMode = 'full' | 'conservative' | 'classification_only' | 'disabled'
/**
* OCR processing configuration for PP-Structure.
* Allows fine-grained control over PP-Structure parameters.
*/
export interface OCRConfig {
// Table Processing
table_parsing_mode?: TableParsingMode
enable_wired_table?: boolean
enable_wireless_table?: boolean
// Layout Detection
layout_threshold?: number | null // 0.0-1.0
layout_nms_threshold?: number | null // 0.0-1.0
// Preprocessing
use_doc_orientation_classify?: boolean
use_doc_unwarping?: boolean
use_textline_orientation?: boolean
// Recognition Modules
enable_chart_recognition?: boolean
enable_formula_recognition?: boolean
enable_seal_recognition?: boolean
enable_region_detection?: boolean
}
/**
* Preset configuration metadata for display purposes.
*/
export interface OCRPresetInfo {
name: OCRPreset
displayName: string
description: string
icon: string
config: OCRConfig
}
/**
* Image quality metrics from auto-analysis.
*/
@@ -213,9 +148,6 @@ export interface ProcessingOptions {
layout_model?: LayoutModel // Layout detection model selection (OCR track only)
preprocessing_mode?: PreprocessingMode // Preprocessing mode (OCR track only)
preprocessing_config?: PreprocessingConfig // Manual preprocessing config
table_detection?: TableDetectionConfig // Table detection options (OCR track only)
ocr_preset?: OCRPreset // OCR processing preset (OCR track only)
ocr_config?: OCRConfig // Custom OCR config (overrides preset values)
}
export interface TaskCreate {