feat: enable document orientation detection for scanned PDFs

- Enable PP-StructureV3's use_doc_orientation_classify feature
- Detect rotation angle from doc_preprocessor_res.angle
- Swap page dimensions (width <-> height) for 90°/270° rotations
- Output PDF now correctly displays landscape-scanned content

Also includes:
- Archive completed openspec proposals
- Add simplify-frontend-ocr-config proposal (pending)
- Code cleanup and frontend simplification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-11 17:13:46 +08:00
parent 57070af307
commit cfe65158a3
58 changed files with 1271 additions and 3048 deletions

View File

@@ -12,13 +12,11 @@ import { Play, CheckCircle, FileText, AlertCircle, Clock, Activity, Loader2 } fr
import LayoutModelSelector from '@/components/LayoutModelSelector'
import PreprocessingSettings from '@/components/PreprocessingSettings'
import PreprocessingPreview from '@/components/PreprocessingPreview'
import TableDetectionSelector from '@/components/TableDetectionSelector'
import ProcessingTrackSelector from '@/components/ProcessingTrackSelector'
import OCRPresetSelector from '@/components/OCRPresetSelector'
import TaskNotFound from '@/components/TaskNotFound'
import { useTaskValidation } from '@/hooks/useTaskValidation'
import { useTaskStore, useProcessingState } from '@/store/taskStore'
import type { LayoutModel, ProcessingOptions, PreprocessingMode, PreprocessingConfig, TableDetectionConfig, ProcessingTrack, OCRPreset, OCRConfig } from '@/types/apiV2'
import type { LayoutModel, ProcessingOptions, PreprocessingMode, PreprocessingConfig, ProcessingTrack } from '@/types/apiV2'
export default function ProcessingPage() {
const { t } = useTranslation()
@@ -27,7 +25,9 @@ export default function ProcessingPage() {
// Use TaskStore for processing state management
const { startProcessing, stopProcessing, updateTaskStatus } = useTaskStore()
const processingState = useProcessingState()
// processingState is available for future use (e.g., displaying global processing status)
const _processingState = useProcessingState()
void _processingState // Suppress unused variable warning
// Use shared hook for task validation
const { taskId, taskDetail, isLoading: isValidating, isNotFound, clearAndReset } = useTaskValidation({
@@ -56,20 +56,9 @@ export default function ProcessingPage() {
})
const [showPreview, setShowPreview] = useState(false)
// Table detection state
const [tableDetectionConfig, setTableDetectionConfig] = useState<TableDetectionConfig>({
enable_wired_table: true,
enable_wireless_table: true,
enable_region_detection: true,
})
// Processing track override state (null = use system recommendation)
const [forceTrack, setForceTrack] = useState<ProcessingTrack | null>(null)
// OCR Preset state (default to 'datasheet' for best balance)
const [ocrPreset, setOcrPreset] = useState<OCRPreset>('datasheet')
const [ocrConfig, setOcrConfig] = useState<OCRConfig>({})
// Analyze document to determine if OCR is needed (only for pending tasks)
const { data: documentAnalysis, isLoading: isAnalyzing } = useQuery({
queryKey: ['documentAnalysis', taskId],
@@ -91,6 +80,8 @@ export default function ProcessingPage() {
))
// Start OCR processing
// NOTE: Simple OCR mode - using backend defaults for table/chart/formula recognition
// Only layout_model and preprocessing options are configurable from frontend
const processOCRMutation = useMutation({
mutationFn: () => {
const options: ProcessingOptions = {
@@ -100,9 +91,7 @@ export default function ProcessingPage() {
layout_model: layoutModel,
preprocessing_mode: preprocessingMode,
preprocessing_config: preprocessingMode === 'manual' ? preprocessingConfig : undefined,
table_detection: tableDetectionConfig,
ocr_preset: ocrPreset,
ocr_config: ocrPreset === 'custom' ? ocrConfig : undefined,
// NOTE: table_detection, ocr_preset, ocr_config removed - using backend defaults
}
// Update TaskStore processing state
@@ -448,15 +437,6 @@ export default function ProcessingPage() {
{/* OCR Track Options - Only show when document needs OCR */}
{needsOcrTrack && !isAnalyzing && (
<>
{/* OCR Processing Preset - Primary selection */}
<OCRPresetSelector
value={ocrPreset}
onChange={setOcrPreset}
customConfig={ocrConfig}
onCustomConfigChange={setOcrConfig}
disabled={processOCRMutation.isPending}
/>
{/* Layout Model Selection */}
<LayoutModelSelector
value={layoutModel}
@@ -464,13 +444,6 @@ export default function ProcessingPage() {
disabled={processOCRMutation.isPending}
/>
{/* Table Detection Settings */}
<TableDetectionSelector
value={tableDetectionConfig}
onChange={setTableDetectionConfig}
disabled={processOCRMutation.isPending}
/>
{/* Preprocessing Settings */}
<PreprocessingSettings
mode={preprocessingMode}