feat: add table detection options and scan artifact removal

- Add TableDetectionSelector component for wired/wireless/region detection
- Add CV-based table line detector module (disabled due to poor performance)
- Add scan artifact removal preprocessing step (removes faint horizontal lines)
- Add PreprocessingConfig schema with remove_scan_artifacts option
- Update frontend PreprocessingSettings with scan artifact toggle
- Integrate table detection config into ProcessingPage
- Archive extract-table-cell-boxes proposal

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-30 13:21:50 +08:00
parent f5a2c8a750
commit 95ae1f1bdb
17 changed files with 1906 additions and 344 deletions

View File

@@ -12,9 +12,10 @@ import { Play, CheckCircle, FileText, AlertCircle, Clock, Activity, Loader2, Inf
import LayoutModelSelector from '@/components/LayoutModelSelector'
import PreprocessingSettings from '@/components/PreprocessingSettings'
import PreprocessingPreview from '@/components/PreprocessingPreview'
import TableDetectionSelector from '@/components/TableDetectionSelector'
import TaskNotFound from '@/components/TaskNotFound'
import { useTaskValidation } from '@/hooks/useTaskValidation'
import type { LayoutModel, ProcessingOptions, PreprocessingMode, PreprocessingConfig, DocumentAnalysisResponse } from '@/types/apiV2'
import type { LayoutModel, ProcessingOptions, PreprocessingMode, PreprocessingConfig, TableDetectionConfig, DocumentAnalysisResponse } from '@/types/apiV2'
export default function ProcessingPage() {
const { t } = useTranslation()
@@ -44,9 +45,17 @@ export default function ProcessingPage() {
sharpen: true,
sharpen_strength: 1.0,
binarize: false,
remove_scan_artifacts: true,
})
const [showPreview, setShowPreview] = useState(false)
// Table detection state
const [tableDetectionConfig, setTableDetectionConfig] = useState<TableDetectionConfig>({
enable_wired_table: true,
enable_wireless_table: true,
enable_region_detection: true,
})
// Analyze document to determine if OCR is needed (only for pending tasks)
const { data: documentAnalysis, isLoading: isAnalyzing } = useQuery({
queryKey: ['documentAnalysis', taskId],
@@ -70,6 +79,7 @@ export default function ProcessingPage() {
layout_model: layoutModel,
preprocessing_mode: preprocessingMode,
preprocessing_config: preprocessingMode === 'manual' ? preprocessingConfig : undefined,
table_detection: tableDetectionConfig,
}
return apiClientV2.startTask(taskId!, options)
@@ -441,6 +451,13 @@ export default function ProcessingPage() {
disabled={processOCRMutation.isPending}
/>
{/* Table Detection Settings */}
<TableDetectionSelector
value={tableDetectionConfig}
onChange={setTableDetectionConfig}
disabled={processOCRMutation.isPending}
/>
{/* Preprocessing Settings */}
<PreprocessingSettings
mode={preprocessingMode}