feat: implement layout preprocessing backend

Backend implementation for add-layout-preprocessing proposal: - Add LayoutPreprocessingService with CLAHE, sharpen, binarize - Add auto-detection: analyze_image_quality() for contrast/edge metrics - Integrate preprocessing into OCR pipeline (analyze_layout) - Add Preview API: POST /api/v2/tasks/{id}/preview/preprocessing - Add config options: layout_preprocessing_mode, thresholds - Add schemas: PreprocessingConfig, PreprocessingPreviewResponse Preprocessing only affects layout detection input. Original images preserved for element extraction. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 15:17:20 +08:00
parent 06a5973f2e
commit ea0dd7456c
7 changed files with 800 additions and 22 deletions
--- a/backend/app/schemas/task.py
+++ b/backend/app/schemas/task.py
@@ -37,6 +37,79 @@ class LayoutModelEnum(str, Enum):
    CDLA = "cdla"         # CDLA model - Alternative for Chinese layout


+class PreprocessingModeEnum(str, Enum):
+    """Preprocessing mode for layout detection enhancement.
+
+    - AUTO: Analyze image quality and automatically apply optimal preprocessing
+    - MANUAL: Use user-specified preprocessing configuration
+    - DISABLED: Skip preprocessing entirely
+    """
+    AUTO = "auto"         # Analyze and apply automatically (default)
+    MANUAL = "manual"     # Use specified configuration
+    DISABLED = "disabled" # Skip preprocessing
+
+
+class PreprocessingContrastEnum(str, Enum):
+    """Contrast enhancement method for preprocessing.
+
+    - NONE: No contrast enhancement
+    - HISTOGRAM: Standard histogram equalization
+    - CLAHE: Contrast Limited Adaptive Histogram Equalization (recommended)
+    """
+    NONE = "none"
+    HISTOGRAM = "histogram"
+    CLAHE = "clahe"
+
+
+class PreprocessingConfig(BaseModel):
+    """Preprocessing configuration for layout detection enhancement.
+
+    Used to configure image preprocessing before PP-Structure layout detection.
+    Preprocessing helps detect tables with faint lines or low contrast borders.
+    Original image is preserved for element extraction.
+    """
+    contrast: PreprocessingContrastEnum = Field(
+        default=PreprocessingContrastEnum.CLAHE,
+        description="Contrast enhancement method"
+    )
+    sharpen: bool = Field(
+        default=True,
+        description="Enable sharpening for faint lines"
+    )
+    binarize: bool = Field(
+        default=False,
+        description="Enable binarization (aggressive, for very low contrast)"
+    )
+
+
+class ImageQualityMetrics(BaseModel):
+    """Image quality metrics from auto-analysis."""
+    contrast: float = Field(..., description="Contrast level (std dev of grayscale)")
+    edge_strength: float = Field(..., description="Edge strength (Sobel gradient mean)")
+
+
+class PreprocessingPreviewRequest(BaseModel):
+    """Request for preprocessing preview."""
+    page: int = Field(default=1, ge=1, description="Page number to preview")
+    mode: PreprocessingModeEnum = Field(
+        default=PreprocessingModeEnum.AUTO,
+        description="Preprocessing mode"
+    )
+    config: Optional[PreprocessingConfig] = Field(
+        None,
+        description="Manual configuration (only used when mode='manual')"
+    )
+
+
+class PreprocessingPreviewResponse(BaseModel):
+    """Response for preprocessing preview."""
+    original_url: str = Field(..., description="URL to original image")
+    preprocessed_url: str = Field(..., description="URL to preprocessed image")
+    quality_metrics: ImageQualityMetrics = Field(..., description="Image quality analysis")
+    auto_config: PreprocessingConfig = Field(..., description="Auto-detected configuration")
+    mode_used: PreprocessingModeEnum = Field(..., description="Mode that was applied")
+
+
 class TaskCreate(BaseModel):
    """Task creation request"""
    filename: Optional[str] = Field(None, description="Original filename")
@@ -195,6 +268,16 @@ class ProcessingOptions(BaseModel):
        description="Layout detection model: 'chinese' (recommended for Chinese docs), 'default' (English docs), 'cdla' (Chinese layout)"
    )

+    # Layout preprocessing (OCR track only)
+    preprocessing_mode: PreprocessingModeEnum = Field(
+        default=PreprocessingModeEnum.AUTO,
+        description="Preprocessing mode: 'auto' (analyze and apply), 'manual' (use config), 'disabled'"
+    )
+    preprocessing_config: Optional[PreprocessingConfig] = Field(
+        None,
+        description="Manual preprocessing config (only used when preprocessing_mode='manual')"
+    )
+

 class AnalyzeRequest(BaseModel):
    """Document analysis request"""