feat: implement layout preprocessing backend
Backend implementation for add-layout-preprocessing proposal:
- Add LayoutPreprocessingService with CLAHE, sharpen, binarize
- Add auto-detection: analyze_image_quality() for contrast/edge metrics
- Integrate preprocessing into OCR pipeline (analyze_layout)
- Add Preview API: POST /api/v2/tasks/{id}/preview/preprocessing
- Add config options: layout_preprocessing_mode, thresholds
- Add schemas: PreprocessingConfig, PreprocessingPreviewResponse
Preprocessing only affects layout detection input.
Original images preserved for element extraction.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -37,6 +37,79 @@ class LayoutModelEnum(str, Enum):
|
||||
CDLA = "cdla" # CDLA model - Alternative for Chinese layout
|
||||
|
||||
|
||||
class PreprocessingModeEnum(str, Enum):
|
||||
"""Preprocessing mode for layout detection enhancement.
|
||||
|
||||
- AUTO: Analyze image quality and automatically apply optimal preprocessing
|
||||
- MANUAL: Use user-specified preprocessing configuration
|
||||
- DISABLED: Skip preprocessing entirely
|
||||
"""
|
||||
AUTO = "auto" # Analyze and apply automatically (default)
|
||||
MANUAL = "manual" # Use specified configuration
|
||||
DISABLED = "disabled" # Skip preprocessing
|
||||
|
||||
|
||||
class PreprocessingContrastEnum(str, Enum):
|
||||
"""Contrast enhancement method for preprocessing.
|
||||
|
||||
- NONE: No contrast enhancement
|
||||
- HISTOGRAM: Standard histogram equalization
|
||||
- CLAHE: Contrast Limited Adaptive Histogram Equalization (recommended)
|
||||
"""
|
||||
NONE = "none"
|
||||
HISTOGRAM = "histogram"
|
||||
CLAHE = "clahe"
|
||||
|
||||
|
||||
class PreprocessingConfig(BaseModel):
|
||||
"""Preprocessing configuration for layout detection enhancement.
|
||||
|
||||
Used to configure image preprocessing before PP-Structure layout detection.
|
||||
Preprocessing helps detect tables with faint lines or low contrast borders.
|
||||
Original image is preserved for element extraction.
|
||||
"""
|
||||
contrast: PreprocessingContrastEnum = Field(
|
||||
default=PreprocessingContrastEnum.CLAHE,
|
||||
description="Contrast enhancement method"
|
||||
)
|
||||
sharpen: bool = Field(
|
||||
default=True,
|
||||
description="Enable sharpening for faint lines"
|
||||
)
|
||||
binarize: bool = Field(
|
||||
default=False,
|
||||
description="Enable binarization (aggressive, for very low contrast)"
|
||||
)
|
||||
|
||||
|
||||
class ImageQualityMetrics(BaseModel):
|
||||
"""Image quality metrics from auto-analysis."""
|
||||
contrast: float = Field(..., description="Contrast level (std dev of grayscale)")
|
||||
edge_strength: float = Field(..., description="Edge strength (Sobel gradient mean)")
|
||||
|
||||
|
||||
class PreprocessingPreviewRequest(BaseModel):
|
||||
"""Request for preprocessing preview."""
|
||||
page: int = Field(default=1, ge=1, description="Page number to preview")
|
||||
mode: PreprocessingModeEnum = Field(
|
||||
default=PreprocessingModeEnum.AUTO,
|
||||
description="Preprocessing mode"
|
||||
)
|
||||
config: Optional[PreprocessingConfig] = Field(
|
||||
None,
|
||||
description="Manual configuration (only used when mode='manual')"
|
||||
)
|
||||
|
||||
|
||||
class PreprocessingPreviewResponse(BaseModel):
|
||||
"""Response for preprocessing preview."""
|
||||
original_url: str = Field(..., description="URL to original image")
|
||||
preprocessed_url: str = Field(..., description="URL to preprocessed image")
|
||||
quality_metrics: ImageQualityMetrics = Field(..., description="Image quality analysis")
|
||||
auto_config: PreprocessingConfig = Field(..., description="Auto-detected configuration")
|
||||
mode_used: PreprocessingModeEnum = Field(..., description="Mode that was applied")
|
||||
|
||||
|
||||
class TaskCreate(BaseModel):
|
||||
"""Task creation request"""
|
||||
filename: Optional[str] = Field(None, description="Original filename")
|
||||
@@ -195,6 +268,16 @@ class ProcessingOptions(BaseModel):
|
||||
description="Layout detection model: 'chinese' (recommended for Chinese docs), 'default' (English docs), 'cdla' (Chinese layout)"
|
||||
)
|
||||
|
||||
# Layout preprocessing (OCR track only)
|
||||
preprocessing_mode: PreprocessingModeEnum = Field(
|
||||
default=PreprocessingModeEnum.AUTO,
|
||||
description="Preprocessing mode: 'auto' (analyze and apply), 'manual' (use config), 'disabled'"
|
||||
)
|
||||
preprocessing_config: Optional[PreprocessingConfig] = Field(
|
||||
None,
|
||||
description="Manual preprocessing config (only used when preprocessing_mode='manual')"
|
||||
)
|
||||
|
||||
|
||||
class AnalyzeRequest(BaseModel):
|
||||
"""Document analysis request"""
|
||||
|
||||
Reference in New Issue
Block a user