feat: simplify layout model selection and archive proposals

Changes:
- Replace PP-Structure 7-slider parameter UI with simple 3-option layout model selector
- Add layout model mapping: chinese (PP-DocLayout-S), default (PubLayNet), cdla
- Add LayoutModelSelector component and zh-TW translations
- Fix "default" model behavior with sentinel value for PubLayNet
- Add gap filling service for OCR track coverage improvement
- Add PP-Structure debug utilities
- Archive completed/incomplete proposals:
  - add-ocr-track-gap-filling (complete)
  - fix-ocr-track-table-rendering (incomplete)
  - simplify-ppstructure-model-selection (22/25 tasks)
- Add new layout model tests, archive old PP-Structure param tests
- Update OpenSpec ocr-processing spec with layout model requirements

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-27 13:27:00 +08:00
parent c65df754cf
commit 59206a6ab8
35 changed files with 3621 additions and 658 deletions

View File

@@ -9,10 +9,10 @@ import { Badge } from '@/components/ui/badge'
import { useToast } from '@/components/ui/toast'
import { apiClientV2 } from '@/services/apiV2'
import { Play, CheckCircle, FileText, AlertCircle, Clock, Activity, Loader2 } from 'lucide-react'
import PPStructureParams from '@/components/PPStructureParams'
import LayoutModelSelector from '@/components/LayoutModelSelector'
import TaskNotFound from '@/components/TaskNotFound'
import { useTaskValidation } from '@/hooks/useTaskValidation'
import type { PPStructureV3Params, ProcessingOptions } from '@/types/apiV2'
import type { LayoutModel, ProcessingOptions } from '@/types/apiV2'
export default function ProcessingPage() {
const { t } = useTranslation()
@@ -31,8 +31,8 @@ export default function ProcessingPage() {
},
})
// PP-StructureV3 parameters state
const [ppStructureParams, setPpStructureParams] = useState<PPStructureV3Params>({})
// Layout model state (default to 'chinese' for best Chinese document support)
const [layoutModel, setLayoutModel] = useState<LayoutModel>('chinese')
// Start OCR processing
const processOCRMutation = useMutation({
@@ -40,11 +40,7 @@ export default function ProcessingPage() {
const options: ProcessingOptions = {
use_dual_track: true,
language: 'ch',
}
// Only include pp_structure_params if user has customized them
if (Object.keys(ppStructureParams).length > 0) {
options.pp_structure_params = ppStructureParams
layout_model: layoutModel,
}
return apiClientV2.startTask(taskId!, options)
@@ -346,11 +342,11 @@ export default function ProcessingPage() {
</Card>
)}
{/* PP-StructureV3 Parameters (only show when task is pending) */}
{/* Layout Model Selection (only show when task is pending) */}
{isPending && (
<PPStructureParams
value={ppStructureParams}
onChange={setPpStructureParams}
<LayoutModelSelector
value={layoutModel}
onChange={setLayoutModel}
disabled={processOCRMutation.isPending}
/>
)}