feat: implement layout preprocessing backend
Backend implementation for add-layout-preprocessing proposal:
- Add LayoutPreprocessingService with CLAHE, sharpen, binarize
- Add auto-detection: analyze_image_quality() for contrast/edge metrics
- Integrate preprocessing into OCR pipeline (analyze_layout)
- Add Preview API: POST /api/v2/tasks/{id}/preview/preprocessing
- Add config options: layout_preprocessing_mode, thresholds
- Add schemas: PreprocessingConfig, PreprocessingPreviewResponse
Preprocessing only affects layout detection input.
Original images preserved for element extraction.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -20,6 +20,8 @@ except ImportError:
|
||||
|
||||
import paddle
|
||||
from paddleocr import PPStructureV3
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
from app.models.unified_document import ElementType
|
||||
from app.core.config import settings
|
||||
from app.services.memory_manager import prediction_context
|
||||
@@ -78,15 +80,19 @@ class PPStructureEnhanced:
|
||||
self,
|
||||
image_path: Path,
|
||||
output_dir: Optional[Path] = None,
|
||||
current_page: int = 0
|
||||
current_page: int = 0,
|
||||
preprocessed_image: Optional[Image.Image] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze document with full PP-StructureV3 capabilities.
|
||||
|
||||
Args:
|
||||
image_path: Path to image file
|
||||
image_path: Path to original image file (used for cropping extracted images)
|
||||
output_dir: Optional output directory for saving extracted content
|
||||
current_page: Current page number (0-based)
|
||||
preprocessed_image: Optional preprocessed PIL Image for layout detection.
|
||||
If provided, this is used for PP-Structure prediction,
|
||||
but original image_path is still used for cropping images.
|
||||
|
||||
Returns:
|
||||
Dictionary with complete structure information including:
|
||||
@@ -97,6 +103,8 @@ class PPStructureEnhanced:
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Enhanced PP-StructureV3 analysis on {image_path.name}")
|
||||
if preprocessed_image:
|
||||
logger.info("Using preprocessed image for layout detection")
|
||||
|
||||
# Perform structure analysis with semaphore control
|
||||
# This prevents OOM errors from multiple simultaneous predictions
|
||||
@@ -113,7 +121,16 @@ class PPStructureEnhanced:
|
||||
'error': 'Prediction slot timeout'
|
||||
}
|
||||
|
||||
results = self.structure_engine.predict(str(image_path))
|
||||
# Use preprocessed image if provided, otherwise use original path
|
||||
if preprocessed_image is not None:
|
||||
# Convert PIL to numpy array (BGR format for PP-Structure)
|
||||
predict_input = np.array(preprocessed_image)
|
||||
if len(predict_input.shape) == 3 and predict_input.shape[2] == 3:
|
||||
# Convert RGB to BGR
|
||||
predict_input = predict_input[:, :, ::-1]
|
||||
results = self.structure_engine.predict(predict_input)
|
||||
else:
|
||||
results = self.structure_engine.predict(str(image_path))
|
||||
|
||||
all_elements = []
|
||||
all_images = []
|
||||
|
||||
Reference in New Issue
Block a user