Files
OCR/backend/app/services/layout_preprocessing_service.py
egg 95ae1f1bdb feat: add table detection options and scan artifact removal
- Add TableDetectionSelector component for wired/wireless/region detection
- Add CV-based table line detector module (disabled due to poor performance)
- Add scan artifact removal preprocessing step (removes faint horizontal lines)
- Add PreprocessingConfig schema with remove_scan_artifacts option
- Update frontend PreprocessingSettings with scan artifact toggle
- Integrate table detection config into ProcessingPage
- Archive extract-table-cell-boxes proposal

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-30 13:21:50 +08:00

782 lines
29 KiB
Python

"""
Tool_OCR - Layout Preprocessing Service
Image preprocessing to enhance layout detection for documents with faint lines/borders.
This service provides:
1. Image quality analysis (contrast, edge strength)
2. Contrast enhancement (histogram equalization, CLAHE)
3. Sharpening for faint lines
4. Optional binarization for very low contrast documents
IMPORTANT: Preprocessing only affects layout detection input.
Original images are preserved for element extraction.
"""
import logging
from pathlib import Path
from typing import Optional, Tuple, Union
from dataclasses import dataclass
import cv2
import numpy as np
from PIL import Image
from app.core.config import settings
from app.schemas.task import (
PreprocessingConfig,
PreprocessingContrastEnum,
PreprocessingModeEnum,
ImageQualityMetrics,
)
logger = logging.getLogger(__name__)
@dataclass
class ScalingInfo:
"""Information about image scaling applied for layout detection."""
was_scaled: bool
scale_factor: float # Factor to multiply bbox coords to get original size (1.0 / actual_scale)
original_size: Tuple[int, int] # (width, height) of original image
scaled_size: Tuple[int, int] # (width, height) after scaling
@dataclass
class PreprocessingResult:
"""Result of preprocessing operation."""
image: np.ndarray
config_used: PreprocessingConfig
quality_metrics: ImageQualityMetrics
was_processed: bool
scaling_info: Optional[ScalingInfo] = None # Info about any scaling applied
class LayoutPreprocessingService:
"""
Service for preprocessing images to improve layout detection.
The preprocessing pipeline:
1. Analyze image quality (contrast, edge strength)
2. Apply contrast enhancement if needed (CLAHE or histogram)
3. Apply sharpening if edge strength is low
4. Apply binarization if contrast is very low (optional)
All operations preserve the original color image dimensions.
"""
def __init__(self):
# Load thresholds from config
self.contrast_threshold = settings.layout_preprocessing_contrast_threshold
self.edge_threshold = settings.layout_preprocessing_edge_threshold
self.binarize_threshold = settings.layout_preprocessing_binarize_threshold
# Image scaling settings for layout detection (bidirectional)
self.scaling_enabled = settings.layout_image_scaling_enabled
self.scaling_max_dimension = settings.layout_image_scaling_max_dimension
self.scaling_min_dimension = settings.layout_image_scaling_min_dimension
self.scaling_target_dimension = settings.layout_image_scaling_target_dimension
# CLAHE parameters
self.clahe_clip_limit = 2.0
self.clahe_tile_grid_size = (8, 8)
# Document-specific CLAHE parameters (larger tiles for documents)
self.document_clahe_clip_limit = 3.0
self.document_clahe_tile_grid_size = (16, 16)
# Background normalization parameters for scanned documents
self.background_kernel_size = 51 # Morphological kernel size
# Sharpening kernel (unsharp mask style)
self.sharpen_kernel = np.array([
[0, -1, 0],
[-1, 5, -1],
[0, -1, 0]
], dtype=np.float32)
logger.info(
f"LayoutPreprocessingService initialized with thresholds: "
f"contrast={self.contrast_threshold}, edge={self.edge_threshold}, "
f"binarize={self.binarize_threshold}, "
f"scaling={'enabled' if self.scaling_enabled else 'disabled'} "
f"(min={self.scaling_min_dimension}, max={self.scaling_max_dimension}, target={self.scaling_target_dimension})"
)
def analyze_image_quality(self, image: np.ndarray) -> ImageQualityMetrics:
"""
Analyze image quality to determine preprocessing needs.
Args:
image: Input image (BGR or grayscale)
Returns:
ImageQualityMetrics with contrast and edge_strength
"""
# Convert to grayscale if needed
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Calculate contrast (standard deviation of pixel values)
contrast = float(np.std(gray))
# Calculate edge strength (mean of Sobel gradient magnitude)
sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
edge_strength = float(np.mean(np.sqrt(sobel_x**2 + sobel_y**2)))
return ImageQualityMetrics(
contrast=round(contrast, 2),
edge_strength=round(edge_strength, 2)
)
def _normalize_background(self, gray: np.ndarray) -> np.ndarray:
"""
Normalize image background to remove uneven illumination.
This is particularly effective for scanned documents where scanner
lighting may be uneven, or where paper has yellowed/stained areas.
Method:
1. Estimate background using morphological closing (fills in text/details)
2. Divide original by background estimate
3. Rescale to full 0-255 range
Args:
gray: Grayscale image (L channel or grayscale)
Returns:
Normalized grayscale image with uniform background
"""
# Create structuring element for morphological operations
kernel_size = self.background_kernel_size
# Ensure kernel size is odd
if kernel_size % 2 == 0:
kernel_size += 1
kernel = cv2.getStructuringElement(
cv2.MORPH_ELLIPSE,
(kernel_size, kernel_size)
)
# Morphological closing estimates the background
# (dilate then erode - fills in dark features like text)
background = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
# Apply Gaussian blur to smooth the background estimate
background = cv2.GaussianBlur(background, (kernel_size, kernel_size), 0)
# Avoid division by zero
background = np.maximum(background, 1).astype(np.float32)
# Normalize: divide by background and rescale to 0-255
# This removes uneven illumination while preserving text/content
normalized = (gray.astype(np.float32) / background) * 255.0
# Clip and convert back to uint8
normalized = np.clip(normalized, 0, 255).astype(np.uint8)
logger.debug(
f"Background normalization applied: kernel={kernel_size}, "
f"background range=[{background.min():.0f}, {background.max():.0f}]"
)
return normalized
def remove_scan_artifacts(
self,
image: np.ndarray,
line_thickness: int = 5,
min_line_length_ratio: float = 0.3,
faint_threshold: int = 30
) -> np.ndarray:
"""
Remove horizontal scan line artifacts from scanned documents.
Scanner light bar artifacts appear as FAINT horizontal lines across the image.
Key distinction from table borders:
- Scan artifacts are LIGHT/FAINT (close to background color)
- Table borders are DARK/BOLD (high contrast)
Method:
1. Detect horizontal edges using Sobel filter
2. Filter to keep only FAINT edges (low contrast)
3. Find continuous horizontal segments
4. Remove only faint horizontal lines while preserving bold table borders
Args:
image: Input image (BGR)
line_thickness: Maximum thickness of lines to remove (pixels)
min_line_length_ratio: Minimum line length as ratio of image width (0.0-1.0)
faint_threshold: Maximum edge strength for "faint" lines (0-255)
Returns:
Image with scan artifacts removed (BGR)
"""
h, w = image.shape[:2]
min_line_length = int(w * min_line_length_ratio)
# Convert to grayscale for detection
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# Step 1: Detect horizontal edges using Sobel (vertical gradient)
# Scan artifacts will have weak gradients, table borders will have strong gradients
sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
sobel_abs = np.abs(sobel_y).astype(np.uint8)
# Step 2: Find FAINT horizontal edges only (low gradient magnitude)
# Strong edges (table borders) have high sobel values
# Faint edges (scan artifacts) have low sobel values
faint_edges = (sobel_abs > 5) & (sobel_abs < faint_threshold)
faint_edges = faint_edges.astype(np.uint8) * 255
# Step 3: Use horizontal morphological operations to find continuous lines
horizontal_kernel = cv2.getStructuringElement(
cv2.MORPH_RECT,
(min_line_length, 1)
)
# Opening removes short segments, keeping only long horizontal lines
horizontal_lines = cv2.morphologyEx(
faint_edges, cv2.MORPH_OPEN, horizontal_kernel, iterations=1
)
# Dilate slightly to cover the full artifact width
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, line_thickness))
line_mask = cv2.dilate(horizontal_lines, dilate_kernel, iterations=1)
# Check if any artifacts were detected
artifact_pixels = np.sum(line_mask > 0)
if artifact_pixels < 100:
logger.debug("No faint scan artifacts detected")
return image
# Calculate artifact coverage
total_pixels = h * w
coverage_ratio = artifact_pixels / total_pixels
# Faint artifacts should cover a small portion of the image
if coverage_ratio > 0.05: # More than 5% is suspicious
logger.debug(f"Faint artifact detection: coverage={coverage_ratio:.2%} (processing anyway)")
# Only process if coverage is not excessive
if coverage_ratio > 0.15: # More than 15% is definitely too much
logger.debug(f"Artifact detection rejected: coverage too high ({coverage_ratio:.2%})")
return image
# Use inpainting to remove artifacts
result = cv2.inpaint(image, line_mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
logger.info(
f"Scan artifacts removed: {artifact_pixels} pixels ({coverage_ratio:.2%}), faint_threshold={faint_threshold}"
)
return result
def scale_for_layout_detection(
self,
image: np.ndarray,
force_scale: bool = False
) -> Tuple[np.ndarray, ScalingInfo]:
"""
Apply bidirectional scaling for optimal layout detection.
PP-Structure's layout detection model (RT-DETR based) works best with images
around 1600px on the longest side. Both too-large and too-small images
reduce detection accuracy:
- Too large (>2000px): Model's receptive field cannot capture entire structures
- Too small (<1200px): Insufficient detail for accurate detection
Scaling behavior:
- max_dim > max_dimension (2000): Scale DOWN to target (1600)
- max_dim < min_dimension (1200): Scale UP to target (1600)
- min_dimension <= max_dim <= max_dimension: No scaling (optimal range)
Args:
image: Input image (BGR)
force_scale: Force scaling to target even if in optimal range
Returns:
Tuple of (scaled_image, ScalingInfo)
ScalingInfo.scale_factor is the multiplier to convert scaled bbox
coordinates back to original image coordinates.
"""
h, w = image.shape[:2]
original_size = (w, h)
max_dim = max(h, w)
# Determine if scaling is needed and direction
should_downscale = self.scaling_enabled and max_dim > self.scaling_max_dimension
should_upscale = self.scaling_enabled and max_dim < self.scaling_min_dimension
should_scale = should_downscale or should_upscale or force_scale
if not should_scale:
return image, ScalingInfo(
was_scaled=False,
scale_factor=1.0,
original_size=original_size,
scaled_size=original_size
)
# Calculate scale factor to reach target dimension
actual_scale = self.scaling_target_dimension / max_dim
new_w = int(w * actual_scale)
new_h = int(h * actual_scale)
# Choose interpolation method based on scale direction
if actual_scale < 1.0:
# Downscaling: INTER_AREA is best for shrinking (anti-aliasing)
interpolation = cv2.INTER_AREA
direction = "DOWN"
else:
# Upscaling: INTER_CUBIC provides smooth enlargement
interpolation = cv2.INTER_CUBIC
direction = "UP"
scaled_image = cv2.resize(image, (new_w, new_h), interpolation=interpolation)
# scale_factor is the inverse - used to scale bbox coords back to original
scale_factor = 1.0 / actual_scale
logger.info(
f"Scaled {direction} for layout detection: {w}x{h} -> {new_w}x{new_h} "
f"(scale_factor={scale_factor:.3f} to restore original coords)"
)
return scaled_image, ScalingInfo(
was_scaled=True,
scale_factor=scale_factor,
original_size=original_size,
scaled_size=(new_w, new_h)
)
@staticmethod
def scale_bbox_to_original(
bbox: Tuple[float, float, float, float],
scale_factor: float
) -> Tuple[float, float, float, float]:
"""
Scale a bounding box from scaled coordinates back to original image coordinates.
Args:
bbox: Bounding box as (x1, y1, x2, y2) in scaled image coordinates
scale_factor: Factor to multiply (from ScalingInfo.scale_factor)
Returns:
Bounding box in original image coordinates
"""
x1, y1, x2, y2 = bbox
return (
x1 * scale_factor,
y1 * scale_factor,
x2 * scale_factor,
y2 * scale_factor
)
@staticmethod
def scale_bboxes_to_original(
bboxes: list,
scale_factor: float
) -> list:
"""
Scale multiple bounding boxes from scaled coordinates to original.
Args:
bboxes: List of bounding boxes, each as (x1, y1, x2, y2)
scale_factor: Factor to multiply (from ScalingInfo.scale_factor)
Returns:
List of bounding boxes in original image coordinates
"""
return [
LayoutPreprocessingService.scale_bbox_to_original(bbox, scale_factor)
for bbox in bboxes
]
def get_auto_config(self, metrics: ImageQualityMetrics) -> PreprocessingConfig:
"""
Determine optimal preprocessing config based on image quality.
Auto-detection calculates appropriate strength values:
- Lower image contrast → Higher contrast_strength
- Lower edge strength → Higher sharpen_strength
- Binarization is disabled by default (rarely beneficial)
Args:
metrics: Image quality metrics from analyze_image_quality()
Returns:
PreprocessingConfig with recommended settings
"""
# Determine contrast enhancement and strength
if metrics.contrast < self.contrast_threshold:
contrast = PreprocessingContrastEnum.CLAHE
# Calculate strength based on how far below threshold
# contrast=40 threshold, contrast=20 → strength=2.0, contrast=30 → strength=1.5
contrast_ratio = (self.contrast_threshold - metrics.contrast) / self.contrast_threshold
contrast_strength = min(1.0 + contrast_ratio * 2.0, 3.0) # Range: 1.0 to 3.0
else:
contrast = PreprocessingContrastEnum.NONE
contrast_strength = 1.0
# Determine sharpening and strength
if metrics.edge_strength < self.edge_threshold:
sharpen = True
# Calculate strength based on how far below threshold
# edge=15 threshold, edge=5 → strength=1.67, edge=10 → strength=1.33
edge_ratio = (self.edge_threshold - metrics.edge_strength) / self.edge_threshold
sharpen_strength = min(1.0 + edge_ratio * 1.0, 2.0) # Range: 1.0 to 2.0
else:
sharpen = False
sharpen_strength = 1.0
# Binarization is disabled by default - it rarely helps and often hurts
# Only enable for extremely low contrast (< 15) which indicates a scan quality issue
binarize = False # Disabled by default
# Scan artifact removal is always enabled in auto mode for scanned documents
remove_scan_artifacts = True
logger.debug(
f"Auto config: contrast={contrast} strength={contrast_strength:.2f}, "
f"sharpen={sharpen} strength={sharpen_strength:.2f}, binarize={binarize}, "
f"remove_scan_artifacts={remove_scan_artifacts}"
)
return PreprocessingConfig(
contrast=contrast,
contrast_strength=round(contrast_strength, 2),
sharpen=sharpen,
sharpen_strength=round(sharpen_strength, 2),
binarize=binarize,
remove_scan_artifacts=remove_scan_artifacts
)
def apply_contrast_enhancement(
self,
image: np.ndarray,
method: PreprocessingContrastEnum,
strength: float = 1.0
) -> np.ndarray:
"""
Apply contrast enhancement to image.
Args:
image: Input image (BGR)
method: Enhancement method (none, histogram, clahe)
strength: Enhancement strength (0.5-3.0, default 1.0)
- 0.5: Subtle enhancement
- 1.0: Normal enhancement
- 2.0: Strong enhancement
- 3.0: Maximum enhancement
Returns:
Enhanced image (BGR)
"""
if method == PreprocessingContrastEnum.NONE:
return image
# Convert to LAB color space for better enhancement
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
l_channel, a_channel, b_channel = cv2.split(lab)
if method == PreprocessingContrastEnum.HISTOGRAM:
# Standard histogram equalization (strength affects blending)
l_equalized = cv2.equalizeHist(l_channel)
# Blend original with equalized based on strength
alpha = min(strength, 1.0) # Cap at 1.0 for histogram
l_enhanced = cv2.addWeighted(l_equalized, alpha, l_channel, 1 - alpha, 0)
elif method == PreprocessingContrastEnum.CLAHE:
# Contrast Limited Adaptive Histogram Equalization
# clipLimit controls contrast amplification: 2.0 is default, up to 6.0 for strong
clip_limit = self.clahe_clip_limit * strength # 2.0 * 1.0 = 2.0, 2.0 * 2.0 = 4.0
clahe = cv2.createCLAHE(
clipLimit=clip_limit,
tileGridSize=self.clahe_tile_grid_size
)
l_enhanced = clahe.apply(l_channel)
elif method == PreprocessingContrastEnum.DOCUMENT:
# Document-specific enhancement for scanned documents
# Step 1: Background normalization to remove uneven illumination
l_normalized = self._normalize_background(l_channel)
# Step 2: CLAHE with larger tiles optimized for documents
clip_limit = self.document_clahe_clip_limit * strength
clahe = cv2.createCLAHE(
clipLimit=clip_limit,
tileGridSize=self.document_clahe_tile_grid_size
)
l_enhanced = clahe.apply(l_normalized)
else:
return image
# Merge channels and convert back to BGR
enhanced_lab = cv2.merge([l_enhanced, a_channel, b_channel])
enhanced_bgr = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
return enhanced_bgr
def apply_sharpening(self, image: np.ndarray, strength: float = 1.0) -> np.ndarray:
"""
Apply sharpening to enhance edges and faint lines using unsharp mask.
Args:
image: Input image (BGR)
strength: Sharpening strength (0.5-2.0, default 1.0)
- 0.5: Subtle sharpening
- 1.0: Normal sharpening
- 1.5: Strong sharpening
- 2.0: Maximum sharpening
Returns:
Sharpened image (BGR)
"""
# Use unsharp mask technique for better control
# 1. Create blurred version
# 2. Subtract from original (scaled by strength)
# 3. Add back to original
# Gaussian blur with sigma based on strength
sigma = 1.0
blurred = cv2.GaussianBlur(image, (0, 0), sigma)
# Unsharp mask: original + (original - blurred) * strength
# This is equivalent to: original * (1 + strength) - blurred * strength
sharpened = cv2.addWeighted(image, 1.0 + strength, blurred, -strength, 0)
# Clip values to valid range
sharpened = np.clip(sharpened, 0, 255).astype(np.uint8)
return sharpened
def apply_binarization(self, image: np.ndarray) -> np.ndarray:
"""
Apply adaptive binarization for very low contrast documents.
Args:
image: Input image (BGR)
Returns:
Binarized image (BGR, but grayscale values)
"""
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding
binary = cv2.adaptiveThreshold(
gray,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,
blockSize=11,
C=2
)
# Convert back to BGR for consistency
binary_bgr = cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)
return binary_bgr
def preprocess(
self,
image: Union[np.ndarray, Image.Image, str, Path],
mode: PreprocessingModeEnum = PreprocessingModeEnum.AUTO,
config: Optional[PreprocessingConfig] = None,
apply_scaling: bool = True
) -> PreprocessingResult:
"""
Preprocess image for layout detection.
The preprocessing pipeline:
1. Load image from path/PIL if needed
2. Analyze image quality (on original image for accurate metrics)
3. Scale down high-resolution images for better layout detection
4. Apply contrast enhancement if needed
5. Apply sharpening if needed
6. Apply binarization if requested (not recommended)
IMPORTANT: When scaling is applied, all bounding boxes from layout detection
must be scaled back to original coordinates using ScalingInfo.scale_factor.
The original image should be used for element extraction (cropping).
Args:
image: Input image (numpy array, PIL Image, or path)
mode: Preprocessing mode (auto, manual, disabled)
config: Manual configuration (required if mode='manual')
apply_scaling: Whether to apply automatic downscaling (default True)
Returns:
PreprocessingResult with preprocessed image and metadata
"""
# Load image if path provided
if isinstance(image, (str, Path)):
image = cv2.imread(str(image))
if image is None:
raise ValueError(f"Failed to load image: {image}")
elif isinstance(image, Image.Image):
# Convert PIL to OpenCV format (BGR)
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
# Analyze quality on ORIGINAL image (before scaling) for accurate metrics
metrics = self.analyze_image_quality(image)
logger.debug(f"Image quality: contrast={metrics.contrast}, edge_strength={metrics.edge_strength}")
# Apply scaling for layout detection (even if preprocessing is disabled)
if apply_scaling:
scaled_image, scaling_info = self.scale_for_layout_detection(image)
else:
h, w = image.shape[:2]
scaled_image = image
scaling_info = ScalingInfo(
was_scaled=False,
scale_factor=1.0,
original_size=(w, h),
scaled_size=(w, h)
)
# Determine configuration
if mode == PreprocessingModeEnum.DISABLED:
# Even when preprocessing is disabled, we still return scaled image
# for better layout detection. Original image is preserved for cropping.
return PreprocessingResult(
image=scaled_image,
config_used=PreprocessingConfig(
contrast=PreprocessingContrastEnum.NONE,
sharpen=False,
binarize=False,
remove_scan_artifacts=False
),
quality_metrics=metrics,
was_processed=scaling_info.was_scaled, # True if scaling was applied
scaling_info=scaling_info
)
if mode == PreprocessingModeEnum.AUTO:
config = self.get_auto_config(metrics)
logger.debug(f"Auto config: {config}")
elif config is None:
# Manual mode but no config provided, use defaults
config = PreprocessingConfig()
# Apply preprocessing pipeline on SCALED image
processed = scaled_image.copy()
was_processed = scaling_info.was_scaled # Start with True if already scaled
# Step 0: Remove scan artifacts BEFORE any enhancement
# This prevents scanner light bar lines from being enhanced and misdetected as table borders
if getattr(config, 'remove_scan_artifacts', True): # Default True for backwards compatibility
processed = self.remove_scan_artifacts(processed)
was_processed = True
logger.debug("Applied scan artifact removal")
# Step 1: Contrast enhancement
if config.contrast != PreprocessingContrastEnum.NONE:
processed = self.apply_contrast_enhancement(
processed,
config.contrast,
strength=config.contrast_strength
)
was_processed = True
logger.debug(f"Applied contrast enhancement: {config.contrast} (strength={config.contrast_strength})")
# Step 2: Sharpening
if config.sharpen:
processed = self.apply_sharpening(processed, strength=config.sharpen_strength)
was_processed = True
logger.debug(f"Applied sharpening (strength={config.sharpen_strength})")
# Step 3: Binarization (last step, overwrites color)
if config.binarize:
processed = self.apply_binarization(processed)
was_processed = True
logger.debug("Applied binarization")
return PreprocessingResult(
image=processed,
config_used=config,
quality_metrics=metrics,
was_processed=was_processed,
scaling_info=scaling_info
)
def preprocess_to_pil(
self,
image: Union[np.ndarray, Image.Image, str, Path],
mode: PreprocessingModeEnum = PreprocessingModeEnum.AUTO,
config: Optional[PreprocessingConfig] = None,
apply_scaling: bool = True
) -> Tuple[Image.Image, PreprocessingResult]:
"""
Preprocess image and return as PIL Image.
Convenience method for integration with PP-Structure which accepts PIL images.
IMPORTANT: When result.scaling_info.was_scaled is True, all bounding boxes
from PP-Structure must be scaled back to original coordinates using:
scaled_bbox = (x1 * scale_factor, y1 * scale_factor, x2 * scale_factor, y2 * scale_factor)
where scale_factor = result.scaling_info.scale_factor
Args:
image: Input image
mode: Preprocessing mode
config: Manual configuration
apply_scaling: Whether to apply automatic downscaling (default True)
Returns:
Tuple of (PIL Image for PP-Structure, PreprocessingResult with scaling info)
"""
result = self.preprocess(image, mode, config, apply_scaling=apply_scaling)
# Convert BGR to RGB for PIL
rgb_image = cv2.cvtColor(result.image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(rgb_image)
return pil_image, result
def save_preview(
self,
original: np.ndarray,
preprocessed: np.ndarray,
output_dir: Path,
prefix: str = "preview"
) -> Tuple[Path, Path]:
"""
Save original and preprocessed images for preview.
Args:
original: Original image (BGR)
preprocessed: Preprocessed image (BGR)
output_dir: Directory to save images
prefix: Filename prefix
Returns:
Tuple of (original_path, preprocessed_path)
"""
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
original_path = output_dir / f"{prefix}_original.png"
preprocessed_path = output_dir / f"{prefix}_preprocessed.png"
cv2.imwrite(str(original_path), original)
cv2.imwrite(str(preprocessed_path), preprocessed)
return original_path, preprocessed_path
# Singleton instance
_layout_preprocessing_service: Optional[LayoutPreprocessingService] = None
def get_layout_preprocessing_service() -> LayoutPreprocessingService:
"""Get or create the layout preprocessing service singleton."""
global _layout_preprocessing_service
if _layout_preprocessing_service is None:
_layout_preprocessing_service = LayoutPreprocessingService()
return _layout_preprocessing_service