Major improvements to preprocessing controls: Backend: - Add contrast_strength (0.5-3.0) and sharpen_strength (0.5-2.0) to PreprocessingConfig - Auto-detection now calculates optimal strength based on image quality metrics: - Lower contrast → Higher contrast_strength - Lower edge strength → Higher sharpen_strength - Disable binarization in auto mode (rarely beneficial) - CLAHE clipLimit now scales with contrast_strength - Sharpening uses unsharp mask with variable strength Frontend: - Add strength sliders for contrast and sharpen in manual mode - Sliders show current value and strength level (輕微/正常/強/最強) - Move binarize option to collapsible "進階選項" section - Updated i18n translations for strength labels 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
427 lines
15 KiB
Python
427 lines
15 KiB
Python
"""
|
|
Tool_OCR - Layout Preprocessing Service
|
|
Image preprocessing to enhance layout detection for documents with faint lines/borders.
|
|
|
|
This service provides:
|
|
1. Image quality analysis (contrast, edge strength)
|
|
2. Contrast enhancement (histogram equalization, CLAHE)
|
|
3. Sharpening for faint lines
|
|
4. Optional binarization for very low contrast documents
|
|
|
|
IMPORTANT: Preprocessing only affects layout detection input.
|
|
Original images are preserved for element extraction.
|
|
"""
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Optional, Tuple, Union
|
|
from dataclasses import dataclass
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
from app.core.config import settings
|
|
from app.schemas.task import (
|
|
PreprocessingConfig,
|
|
PreprocessingContrastEnum,
|
|
PreprocessingModeEnum,
|
|
ImageQualityMetrics,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class PreprocessingResult:
|
|
"""Result of preprocessing operation."""
|
|
image: np.ndarray
|
|
config_used: PreprocessingConfig
|
|
quality_metrics: ImageQualityMetrics
|
|
was_processed: bool
|
|
|
|
|
|
class LayoutPreprocessingService:
|
|
"""
|
|
Service for preprocessing images to improve layout detection.
|
|
|
|
The preprocessing pipeline:
|
|
1. Analyze image quality (contrast, edge strength)
|
|
2. Apply contrast enhancement if needed (CLAHE or histogram)
|
|
3. Apply sharpening if edge strength is low
|
|
4. Apply binarization if contrast is very low (optional)
|
|
|
|
All operations preserve the original color image dimensions.
|
|
"""
|
|
|
|
def __init__(self):
|
|
# Load thresholds from config
|
|
self.contrast_threshold = settings.layout_preprocessing_contrast_threshold
|
|
self.edge_threshold = settings.layout_preprocessing_edge_threshold
|
|
self.binarize_threshold = settings.layout_preprocessing_binarize_threshold
|
|
|
|
# CLAHE parameters
|
|
self.clahe_clip_limit = 2.0
|
|
self.clahe_tile_grid_size = (8, 8)
|
|
|
|
# Sharpening kernel (unsharp mask style)
|
|
self.sharpen_kernel = np.array([
|
|
[0, -1, 0],
|
|
[-1, 5, -1],
|
|
[0, -1, 0]
|
|
], dtype=np.float32)
|
|
|
|
logger.info(
|
|
f"LayoutPreprocessingService initialized with thresholds: "
|
|
f"contrast={self.contrast_threshold}, edge={self.edge_threshold}, "
|
|
f"binarize={self.binarize_threshold}"
|
|
)
|
|
|
|
def analyze_image_quality(self, image: np.ndarray) -> ImageQualityMetrics:
|
|
"""
|
|
Analyze image quality to determine preprocessing needs.
|
|
|
|
Args:
|
|
image: Input image (BGR or grayscale)
|
|
|
|
Returns:
|
|
ImageQualityMetrics with contrast and edge_strength
|
|
"""
|
|
# Convert to grayscale if needed
|
|
if len(image.shape) == 3:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
gray = image
|
|
|
|
# Calculate contrast (standard deviation of pixel values)
|
|
contrast = float(np.std(gray))
|
|
|
|
# Calculate edge strength (mean of Sobel gradient magnitude)
|
|
sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
|
|
sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
|
|
edge_strength = float(np.mean(np.sqrt(sobel_x**2 + sobel_y**2)))
|
|
|
|
return ImageQualityMetrics(
|
|
contrast=round(contrast, 2),
|
|
edge_strength=round(edge_strength, 2)
|
|
)
|
|
|
|
def get_auto_config(self, metrics: ImageQualityMetrics) -> PreprocessingConfig:
|
|
"""
|
|
Determine optimal preprocessing config based on image quality.
|
|
|
|
Auto-detection calculates appropriate strength values:
|
|
- Lower image contrast → Higher contrast_strength
|
|
- Lower edge strength → Higher sharpen_strength
|
|
- Binarization is disabled by default (rarely beneficial)
|
|
|
|
Args:
|
|
metrics: Image quality metrics from analyze_image_quality()
|
|
|
|
Returns:
|
|
PreprocessingConfig with recommended settings
|
|
"""
|
|
# Determine contrast enhancement and strength
|
|
if metrics.contrast < self.contrast_threshold:
|
|
contrast = PreprocessingContrastEnum.CLAHE
|
|
# Calculate strength based on how far below threshold
|
|
# contrast=40 threshold, contrast=20 → strength=2.0, contrast=30 → strength=1.5
|
|
contrast_ratio = (self.contrast_threshold - metrics.contrast) / self.contrast_threshold
|
|
contrast_strength = min(1.0 + contrast_ratio * 2.0, 3.0) # Range: 1.0 to 3.0
|
|
else:
|
|
contrast = PreprocessingContrastEnum.NONE
|
|
contrast_strength = 1.0
|
|
|
|
# Determine sharpening and strength
|
|
if metrics.edge_strength < self.edge_threshold:
|
|
sharpen = True
|
|
# Calculate strength based on how far below threshold
|
|
# edge=15 threshold, edge=5 → strength=1.67, edge=10 → strength=1.33
|
|
edge_ratio = (self.edge_threshold - metrics.edge_strength) / self.edge_threshold
|
|
sharpen_strength = min(1.0 + edge_ratio * 1.0, 2.0) # Range: 1.0 to 2.0
|
|
else:
|
|
sharpen = False
|
|
sharpen_strength = 1.0
|
|
|
|
# Binarization is disabled by default - it rarely helps and often hurts
|
|
# Only enable for extremely low contrast (< 15) which indicates a scan quality issue
|
|
binarize = False # Disabled by default
|
|
|
|
logger.debug(
|
|
f"Auto config: contrast={contrast} strength={contrast_strength:.2f}, "
|
|
f"sharpen={sharpen} strength={sharpen_strength:.2f}, binarize={binarize}"
|
|
)
|
|
|
|
return PreprocessingConfig(
|
|
contrast=contrast,
|
|
contrast_strength=round(contrast_strength, 2),
|
|
sharpen=sharpen,
|
|
sharpen_strength=round(sharpen_strength, 2),
|
|
binarize=binarize
|
|
)
|
|
|
|
def apply_contrast_enhancement(
|
|
self,
|
|
image: np.ndarray,
|
|
method: PreprocessingContrastEnum,
|
|
strength: float = 1.0
|
|
) -> np.ndarray:
|
|
"""
|
|
Apply contrast enhancement to image.
|
|
|
|
Args:
|
|
image: Input image (BGR)
|
|
method: Enhancement method (none, histogram, clahe)
|
|
strength: Enhancement strength (0.5-3.0, default 1.0)
|
|
- 0.5: Subtle enhancement
|
|
- 1.0: Normal enhancement
|
|
- 2.0: Strong enhancement
|
|
- 3.0: Maximum enhancement
|
|
|
|
Returns:
|
|
Enhanced image (BGR)
|
|
"""
|
|
if method == PreprocessingContrastEnum.NONE:
|
|
return image
|
|
|
|
# Convert to LAB color space for better enhancement
|
|
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
|
|
l_channel, a_channel, b_channel = cv2.split(lab)
|
|
|
|
if method == PreprocessingContrastEnum.HISTOGRAM:
|
|
# Standard histogram equalization (strength affects blending)
|
|
l_equalized = cv2.equalizeHist(l_channel)
|
|
# Blend original with equalized based on strength
|
|
alpha = min(strength, 1.0) # Cap at 1.0 for histogram
|
|
l_enhanced = cv2.addWeighted(l_equalized, alpha, l_channel, 1 - alpha, 0)
|
|
elif method == PreprocessingContrastEnum.CLAHE:
|
|
# Contrast Limited Adaptive Histogram Equalization
|
|
# clipLimit controls contrast amplification: 2.0 is default, up to 6.0 for strong
|
|
clip_limit = self.clahe_clip_limit * strength # 2.0 * 1.0 = 2.0, 2.0 * 2.0 = 4.0
|
|
clahe = cv2.createCLAHE(
|
|
clipLimit=clip_limit,
|
|
tileGridSize=self.clahe_tile_grid_size
|
|
)
|
|
l_enhanced = clahe.apply(l_channel)
|
|
else:
|
|
return image
|
|
|
|
# Merge channels and convert back to BGR
|
|
enhanced_lab = cv2.merge([l_enhanced, a_channel, b_channel])
|
|
enhanced_bgr = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
|
|
|
|
return enhanced_bgr
|
|
|
|
def apply_sharpening(self, image: np.ndarray, strength: float = 1.0) -> np.ndarray:
|
|
"""
|
|
Apply sharpening to enhance edges and faint lines using unsharp mask.
|
|
|
|
Args:
|
|
image: Input image (BGR)
|
|
strength: Sharpening strength (0.5-2.0, default 1.0)
|
|
- 0.5: Subtle sharpening
|
|
- 1.0: Normal sharpening
|
|
- 1.5: Strong sharpening
|
|
- 2.0: Maximum sharpening
|
|
|
|
Returns:
|
|
Sharpened image (BGR)
|
|
"""
|
|
# Use unsharp mask technique for better control
|
|
# 1. Create blurred version
|
|
# 2. Subtract from original (scaled by strength)
|
|
# 3. Add back to original
|
|
|
|
# Gaussian blur with sigma based on strength
|
|
sigma = 1.0
|
|
blurred = cv2.GaussianBlur(image, (0, 0), sigma)
|
|
|
|
# Unsharp mask: original + (original - blurred) * strength
|
|
# This is equivalent to: original * (1 + strength) - blurred * strength
|
|
sharpened = cv2.addWeighted(image, 1.0 + strength, blurred, -strength, 0)
|
|
|
|
# Clip values to valid range
|
|
sharpened = np.clip(sharpened, 0, 255).astype(np.uint8)
|
|
|
|
return sharpened
|
|
|
|
def apply_binarization(self, image: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Apply adaptive binarization for very low contrast documents.
|
|
|
|
Args:
|
|
image: Input image (BGR)
|
|
|
|
Returns:
|
|
Binarized image (BGR, but grayscale values)
|
|
"""
|
|
# Convert to grayscale
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Apply adaptive thresholding
|
|
binary = cv2.adaptiveThreshold(
|
|
gray,
|
|
255,
|
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
cv2.THRESH_BINARY,
|
|
blockSize=11,
|
|
C=2
|
|
)
|
|
|
|
# Convert back to BGR for consistency
|
|
binary_bgr = cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)
|
|
|
|
return binary_bgr
|
|
|
|
def preprocess(
|
|
self,
|
|
image: Union[np.ndarray, Image.Image, str, Path],
|
|
mode: PreprocessingModeEnum = PreprocessingModeEnum.AUTO,
|
|
config: Optional[PreprocessingConfig] = None
|
|
) -> PreprocessingResult:
|
|
"""
|
|
Preprocess image for layout detection.
|
|
|
|
Args:
|
|
image: Input image (numpy array, PIL Image, or path)
|
|
mode: Preprocessing mode (auto, manual, disabled)
|
|
config: Manual configuration (required if mode='manual')
|
|
|
|
Returns:
|
|
PreprocessingResult with preprocessed image and metadata
|
|
"""
|
|
# Load image if path provided
|
|
if isinstance(image, (str, Path)):
|
|
image = cv2.imread(str(image))
|
|
if image is None:
|
|
raise ValueError(f"Failed to load image: {image}")
|
|
elif isinstance(image, Image.Image):
|
|
# Convert PIL to OpenCV format (BGR)
|
|
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
|
|
|
# Analyze quality
|
|
metrics = self.analyze_image_quality(image)
|
|
logger.debug(f"Image quality: contrast={metrics.contrast}, edge_strength={metrics.edge_strength}")
|
|
|
|
# Determine configuration
|
|
if mode == PreprocessingModeEnum.DISABLED:
|
|
return PreprocessingResult(
|
|
image=image,
|
|
config_used=PreprocessingConfig(
|
|
contrast=PreprocessingContrastEnum.NONE,
|
|
sharpen=False,
|
|
binarize=False
|
|
),
|
|
quality_metrics=metrics,
|
|
was_processed=False
|
|
)
|
|
|
|
if mode == PreprocessingModeEnum.AUTO:
|
|
config = self.get_auto_config(metrics)
|
|
logger.debug(f"Auto config: {config}")
|
|
elif config is None:
|
|
# Manual mode but no config provided, use defaults
|
|
config = PreprocessingConfig()
|
|
|
|
# Apply preprocessing pipeline
|
|
processed = image.copy()
|
|
was_processed = False
|
|
|
|
# Step 1: Contrast enhancement
|
|
if config.contrast != PreprocessingContrastEnum.NONE:
|
|
processed = self.apply_contrast_enhancement(
|
|
processed,
|
|
config.contrast,
|
|
strength=config.contrast_strength
|
|
)
|
|
was_processed = True
|
|
logger.debug(f"Applied contrast enhancement: {config.contrast} (strength={config.contrast_strength})")
|
|
|
|
# Step 2: Sharpening
|
|
if config.sharpen:
|
|
processed = self.apply_sharpening(processed, strength=config.sharpen_strength)
|
|
was_processed = True
|
|
logger.debug(f"Applied sharpening (strength={config.sharpen_strength})")
|
|
|
|
# Step 3: Binarization (last step, overwrites color)
|
|
if config.binarize:
|
|
processed = self.apply_binarization(processed)
|
|
was_processed = True
|
|
logger.debug("Applied binarization")
|
|
|
|
return PreprocessingResult(
|
|
image=processed,
|
|
config_used=config,
|
|
quality_metrics=metrics,
|
|
was_processed=was_processed
|
|
)
|
|
|
|
def preprocess_to_pil(
|
|
self,
|
|
image: Union[np.ndarray, Image.Image, str, Path],
|
|
mode: PreprocessingModeEnum = PreprocessingModeEnum.AUTO,
|
|
config: Optional[PreprocessingConfig] = None
|
|
) -> Tuple[Image.Image, PreprocessingResult]:
|
|
"""
|
|
Preprocess image and return as PIL Image.
|
|
|
|
Convenience method for integration with PP-Structure which accepts PIL images.
|
|
|
|
Args:
|
|
image: Input image
|
|
mode: Preprocessing mode
|
|
config: Manual configuration
|
|
|
|
Returns:
|
|
Tuple of (PIL Image, PreprocessingResult)
|
|
"""
|
|
result = self.preprocess(image, mode, config)
|
|
|
|
# Convert BGR to RGB for PIL
|
|
rgb_image = cv2.cvtColor(result.image, cv2.COLOR_BGR2RGB)
|
|
pil_image = Image.fromarray(rgb_image)
|
|
|
|
return pil_image, result
|
|
|
|
def save_preview(
|
|
self,
|
|
original: np.ndarray,
|
|
preprocessed: np.ndarray,
|
|
output_dir: Path,
|
|
prefix: str = "preview"
|
|
) -> Tuple[Path, Path]:
|
|
"""
|
|
Save original and preprocessed images for preview.
|
|
|
|
Args:
|
|
original: Original image (BGR)
|
|
preprocessed: Preprocessed image (BGR)
|
|
output_dir: Directory to save images
|
|
prefix: Filename prefix
|
|
|
|
Returns:
|
|
Tuple of (original_path, preprocessed_path)
|
|
"""
|
|
output_dir = Path(output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
original_path = output_dir / f"{prefix}_original.png"
|
|
preprocessed_path = output_dir / f"{prefix}_preprocessed.png"
|
|
|
|
cv2.imwrite(str(original_path), original)
|
|
cv2.imwrite(str(preprocessed_path), preprocessed)
|
|
|
|
return original_path, preprocessed_path
|
|
|
|
|
|
# Singleton instance
|
|
_layout_preprocessing_service: Optional[LayoutPreprocessingService] = None
|
|
|
|
|
|
def get_layout_preprocessing_service() -> LayoutPreprocessingService:
|
|
"""Get or create the layout preprocessing service singleton."""
|
|
global _layout_preprocessing_service
|
|
if _layout_preprocessing_service is None:
|
|
_layout_preprocessing_service = LayoutPreprocessingService()
|
|
return _layout_preprocessing_service
|