chore: backup before code cleanup
Backup commit before executing remove-unused-code proposal. This includes all pending changes and new features. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,227 @@
|
||||
# Design: OCR Processing Presets
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Frontend │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────────┐ ┌──────────────────────────────────┐ │
|
||||
│ │ Preset Selector │───▶│ Advanced Parameter Panel │ │
|
||||
│ │ (Simple Mode) │ │ (Expert Mode) │ │
|
||||
│ └──────────────────┘ └──────────────────────────────────┘ │
|
||||
│ │ │ │
|
||||
│ └───────────┬───────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────┐ │
|
||||
│ │ OCR Config JSON │ │
|
||||
│ └─────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼ POST /api/v2/tasks
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Backend │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────────┐ ┌──────────────────────────────────┐ │
|
||||
│ │ Preset Resolver │───▶│ OCR Config Validator │ │
|
||||
│ └──────────────────┘ └──────────────────────────────────┘ │
|
||||
│ │ │ │
|
||||
│ └───────────┬───────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────┐ │
|
||||
│ │ OCRService │ │
|
||||
│ │ (with config) │ │
|
||||
│ └─────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────┐ │
|
||||
│ │ PPStructureV3 │ │
|
||||
│ │ (configured) │ │
|
||||
│ └─────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Data Models
|
||||
|
||||
### OCRPreset Enum
|
||||
|
||||
```python
|
||||
class OCRPreset(str, Enum):
|
||||
TEXT_HEAVY = "text_heavy" # Reports, articles, manuals
|
||||
DATASHEET = "datasheet" # Technical datasheets, TDS
|
||||
TABLE_HEAVY = "table_heavy" # Financial reports, spreadsheets
|
||||
FORM = "form" # Applications, surveys
|
||||
MIXED = "mixed" # General documents
|
||||
CUSTOM = "custom" # User-defined settings
|
||||
```
|
||||
|
||||
### OCRConfig Model
|
||||
|
||||
```python
|
||||
class OCRConfig(BaseModel):
|
||||
# Table Processing
|
||||
table_parsing_mode: Literal["full", "conservative", "classification_only", "disabled"] = "conservative"
|
||||
table_layout_threshold: float = Field(default=0.65, ge=0.0, le=1.0)
|
||||
enable_wired_table: bool = True
|
||||
enable_wireless_table: bool = False # Disabled by default (aggressive)
|
||||
|
||||
# Layout Detection
|
||||
layout_detection_model: Optional[str] = "PP-DocLayout_plus-L"
|
||||
layout_threshold: Optional[float] = Field(default=None, ge=0.0, le=1.0)
|
||||
layout_nms_threshold: Optional[float] = Field(default=None, ge=0.0, le=1.0)
|
||||
layout_merge_mode: Optional[Literal["large", "small", "union"]] = "union"
|
||||
|
||||
# Preprocessing
|
||||
use_doc_orientation_classify: bool = True
|
||||
use_doc_unwarping: bool = False # Causes distortion
|
||||
use_textline_orientation: bool = True
|
||||
|
||||
# Recognition Modules
|
||||
enable_chart_recognition: bool = True
|
||||
enable_formula_recognition: bool = True
|
||||
enable_seal_recognition: bool = False
|
||||
enable_region_detection: bool = True
|
||||
```
|
||||
|
||||
### Preset Definitions
|
||||
|
||||
```python
|
||||
PRESET_CONFIGS: Dict[OCRPreset, OCRConfig] = {
|
||||
OCRPreset.TEXT_HEAVY: OCRConfig(
|
||||
table_parsing_mode="disabled",
|
||||
table_layout_threshold=0.7,
|
||||
enable_wired_table=False,
|
||||
enable_wireless_table=False,
|
||||
enable_chart_recognition=False,
|
||||
enable_formula_recognition=False,
|
||||
),
|
||||
OCRPreset.DATASHEET: OCRConfig(
|
||||
table_parsing_mode="conservative",
|
||||
table_layout_threshold=0.65,
|
||||
enable_wired_table=True,
|
||||
enable_wireless_table=False, # Key: disable aggressive wireless
|
||||
),
|
||||
OCRPreset.TABLE_HEAVY: OCRConfig(
|
||||
table_parsing_mode="full",
|
||||
table_layout_threshold=0.5,
|
||||
enable_wired_table=True,
|
||||
enable_wireless_table=True,
|
||||
),
|
||||
OCRPreset.FORM: OCRConfig(
|
||||
table_parsing_mode="conservative",
|
||||
table_layout_threshold=0.6,
|
||||
enable_wired_table=True,
|
||||
enable_wireless_table=False,
|
||||
),
|
||||
OCRPreset.MIXED: OCRConfig(
|
||||
table_parsing_mode="classification_only",
|
||||
table_layout_threshold=0.55,
|
||||
),
|
||||
}
|
||||
```
|
||||
|
||||
## API Design
|
||||
|
||||
### Task Creation with OCR Config
|
||||
|
||||
```http
|
||||
POST /api/v2/tasks
|
||||
Content-Type: multipart/form-data
|
||||
|
||||
file: <binary>
|
||||
processing_track: "ocr"
|
||||
ocr_preset: "datasheet" # Optional: use preset
|
||||
ocr_config: { # Optional: override specific params
|
||||
"table_layout_threshold": 0.7
|
||||
}
|
||||
```
|
||||
|
||||
### Get Available Presets
|
||||
|
||||
```http
|
||||
GET /api/v2/ocr/presets
|
||||
|
||||
Response:
|
||||
{
|
||||
"presets": [
|
||||
{
|
||||
"name": "datasheet",
|
||||
"display_name": "Technical Datasheet",
|
||||
"description": "Optimized for product specifications and technical documents",
|
||||
"icon": "description",
|
||||
"config": { ... }
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Frontend Components
|
||||
|
||||
### PresetSelector Component
|
||||
|
||||
```tsx
|
||||
interface PresetSelectorProps {
|
||||
value: OCRPreset;
|
||||
onChange: (preset: OCRPreset) => void;
|
||||
showAdvanced: boolean;
|
||||
onToggleAdvanced: () => void;
|
||||
}
|
||||
|
||||
// Visual preset cards with icons:
|
||||
// 📄 Text Heavy - Reports & Articles
|
||||
// 📊 Datasheet - Technical Documents
|
||||
// 📈 Table Heavy - Financial Reports
|
||||
// 📝 Form - Applications & Surveys
|
||||
// 📑 Mixed - General Documents
|
||||
// ⚙️ Custom - Expert Settings
|
||||
```
|
||||
|
||||
### AdvancedConfigPanel Component
|
||||
|
||||
```tsx
|
||||
interface AdvancedConfigPanelProps {
|
||||
config: OCRConfig;
|
||||
onChange: (config: Partial<OCRConfig>) => void;
|
||||
preset: OCRPreset; // To show which values differ from preset
|
||||
}
|
||||
|
||||
// Sections:
|
||||
// - Table Processing (collapsed by default)
|
||||
// - Layout Detection (collapsed by default)
|
||||
// - Preprocessing (collapsed by default)
|
||||
// - Recognition Modules (collapsed by default)
|
||||
```
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
### 1. Preset as Default, Custom as Exception
|
||||
|
||||
Users should start with presets. Only expose advanced panel when:
|
||||
- User explicitly clicks "Advanced Settings"
|
||||
- User selects "Custom" preset
|
||||
- User has previously saved custom settings
|
||||
|
||||
### 2. Conservative Defaults
|
||||
|
||||
All presets default to conservative settings:
|
||||
- `enable_wireless_table: false` (most aggressive, causes cell explosion)
|
||||
- `table_layout_threshold: 0.6+` (reduce false table detection)
|
||||
- `use_doc_unwarping: false` (causes distortion)
|
||||
|
||||
### 3. Config Inheritance
|
||||
|
||||
Custom config inherits from preset, only specified fields override:
|
||||
```python
|
||||
final_config = PRESET_CONFIGS[preset].copy()
|
||||
final_config.update(custom_overrides)
|
||||
```
|
||||
|
||||
### 4. No Patch Behaviors
|
||||
|
||||
All post-processing patches are disabled by default:
|
||||
- `cell_validation_enabled: false`
|
||||
- `gap_filling_enabled: false`
|
||||
- `table_content_rebuilder_enabled: false`
|
||||
|
||||
Focus on getting PP-Structure output right with proper configuration.
|
||||
Reference in New Issue
Block a user