feat: implement layout preprocessing backend

Backend implementation for add-layout-preprocessing proposal:
- Add LayoutPreprocessingService with CLAHE, sharpen, binarize
- Add auto-detection: analyze_image_quality() for contrast/edge metrics
- Integrate preprocessing into OCR pipeline (analyze_layout)
- Add Preview API: POST /api/v2/tasks/{id}/preview/preprocessing
- Add config options: layout_preprocessing_mode, thresholds
- Add schemas: PreprocessingConfig, PreprocessingPreviewResponse

Preprocessing only affects layout detection input.
Original images preserved for element extraction.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-27 15:17:20 +08:00
parent 06a5973f2e
commit ea0dd7456c
7 changed files with 800 additions and 22 deletions

View File

@@ -37,6 +37,79 @@ class LayoutModelEnum(str, Enum):
CDLA = "cdla" # CDLA model - Alternative for Chinese layout
class PreprocessingModeEnum(str, Enum):
"""Preprocessing mode for layout detection enhancement.
- AUTO: Analyze image quality and automatically apply optimal preprocessing
- MANUAL: Use user-specified preprocessing configuration
- DISABLED: Skip preprocessing entirely
"""
AUTO = "auto" # Analyze and apply automatically (default)
MANUAL = "manual" # Use specified configuration
DISABLED = "disabled" # Skip preprocessing
class PreprocessingContrastEnum(str, Enum):
"""Contrast enhancement method for preprocessing.
- NONE: No contrast enhancement
- HISTOGRAM: Standard histogram equalization
- CLAHE: Contrast Limited Adaptive Histogram Equalization (recommended)
"""
NONE = "none"
HISTOGRAM = "histogram"
CLAHE = "clahe"
class PreprocessingConfig(BaseModel):
"""Preprocessing configuration for layout detection enhancement.
Used to configure image preprocessing before PP-Structure layout detection.
Preprocessing helps detect tables with faint lines or low contrast borders.
Original image is preserved for element extraction.
"""
contrast: PreprocessingContrastEnum = Field(
default=PreprocessingContrastEnum.CLAHE,
description="Contrast enhancement method"
)
sharpen: bool = Field(
default=True,
description="Enable sharpening for faint lines"
)
binarize: bool = Field(
default=False,
description="Enable binarization (aggressive, for very low contrast)"
)
class ImageQualityMetrics(BaseModel):
"""Image quality metrics from auto-analysis."""
contrast: float = Field(..., description="Contrast level (std dev of grayscale)")
edge_strength: float = Field(..., description="Edge strength (Sobel gradient mean)")
class PreprocessingPreviewRequest(BaseModel):
"""Request for preprocessing preview."""
page: int = Field(default=1, ge=1, description="Page number to preview")
mode: PreprocessingModeEnum = Field(
default=PreprocessingModeEnum.AUTO,
description="Preprocessing mode"
)
config: Optional[PreprocessingConfig] = Field(
None,
description="Manual configuration (only used when mode='manual')"
)
class PreprocessingPreviewResponse(BaseModel):
"""Response for preprocessing preview."""
original_url: str = Field(..., description="URL to original image")
preprocessed_url: str = Field(..., description="URL to preprocessed image")
quality_metrics: ImageQualityMetrics = Field(..., description="Image quality analysis")
auto_config: PreprocessingConfig = Field(..., description="Auto-detected configuration")
mode_used: PreprocessingModeEnum = Field(..., description="Mode that was applied")
class TaskCreate(BaseModel):
"""Task creation request"""
filename: Optional[str] = Field(None, description="Original filename")
@@ -195,6 +268,16 @@ class ProcessingOptions(BaseModel):
description="Layout detection model: 'chinese' (recommended for Chinese docs), 'default' (English docs), 'cdla' (Chinese layout)"
)
# Layout preprocessing (OCR track only)
preprocessing_mode: PreprocessingModeEnum = Field(
default=PreprocessingModeEnum.AUTO,
description="Preprocessing mode: 'auto' (analyze and apply), 'manual' (use config), 'disabled'"
)
preprocessing_config: Optional[PreprocessingConfig] = Field(
None,
description="Manual preprocessing config (only used when preprocessing_mode='manual')"
)
class AnalyzeRequest(BaseModel):
"""Document analysis request"""