feat: add contrast/sharpen strength controls, disable binarization

Major improvements to preprocessing controls:

Backend:
- Add contrast_strength (0.5-3.0) and sharpen_strength (0.5-2.0) to PreprocessingConfig
- Auto-detection now calculates optimal strength based on image quality metrics:
  - Lower contrast → Higher contrast_strength
  - Lower edge strength → Higher sharpen_strength
- Disable binarization in auto mode (rarely beneficial)
- CLAHE clipLimit now scales with contrast_strength
- Sharpening uses unsharp mask with variable strength

Frontend:
- Add strength sliders for contrast and sharpen in manual mode
- Sliders show current value and strength level (輕微/正常/強/最強)
- Move binarize option to collapsible "進階選項" section
- Updated i18n translations for strength labels

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-27 17:18:44 +08:00
parent f6d2957592
commit 5982fff71c
7 changed files with 212 additions and 53 deletions

View File

@@ -116,7 +116,9 @@ def process_task_ocr(
from app.schemas.task import PreprocessingContrastEnum from app.schemas.task import PreprocessingContrastEnum
preprocess_config_obj = PreprocessingConfig( preprocess_config_obj = PreprocessingConfig(
contrast=PreprocessingContrastEnum(preprocessing_config.get("contrast", "clahe")), contrast=PreprocessingContrastEnum(preprocessing_config.get("contrast", "clahe")),
contrast_strength=preprocessing_config.get("contrast_strength", 1.0),
sharpen=preprocessing_config.get("sharpen", True), sharpen=preprocessing_config.get("sharpen", True),
sharpen_strength=preprocessing_config.get("sharpen_strength", 1.0),
binarize=preprocessing_config.get("binarize", False) binarize=preprocessing_config.get("binarize", False)
) )
@@ -772,7 +774,9 @@ async def start_task(
if options.preprocessing_config: if options.preprocessing_config:
preprocessing_config = { preprocessing_config = {
"contrast": options.preprocessing_config.contrast.value, "contrast": options.preprocessing_config.contrast.value,
"contrast_strength": options.preprocessing_config.contrast_strength,
"sharpen": options.preprocessing_config.sharpen, "sharpen": options.preprocessing_config.sharpen,
"sharpen_strength": options.preprocessing_config.sharpen_strength,
"binarize": options.preprocessing_config.binarize "binarize": options.preprocessing_config.binarize
} }
logger.info(f"Preprocessing: mode={preprocessing_mode}, config={preprocessing_config}") logger.info(f"Preprocessing: mode={preprocessing_mode}, config={preprocessing_config}")

View File

@@ -72,13 +72,25 @@ class PreprocessingConfig(BaseModel):
default=PreprocessingContrastEnum.CLAHE, default=PreprocessingContrastEnum.CLAHE,
description="Contrast enhancement method" description="Contrast enhancement method"
) )
contrast_strength: float = Field(
default=1.0,
ge=0.5,
le=3.0,
description="Contrast enhancement strength (0.5=subtle, 1.0=normal, 2.0=strong, 3.0=maximum)"
)
sharpen: bool = Field( sharpen: bool = Field(
default=True, default=True,
description="Enable sharpening for faint lines" description="Enable sharpening for faint lines"
) )
sharpen_strength: float = Field(
default=1.0,
ge=0.5,
le=2.0,
description="Sharpening strength (0.5=subtle, 1.0=normal, 1.5=strong, 2.0=maximum)"
)
binarize: bool = Field( binarize: bool = Field(
default=False, default=False,
description="Enable binarization (aggressive, for very low contrast)" description="Enable binarization (aggressive, for very low contrast). Not recommended for most documents."
) )

View File

@@ -110,34 +110,61 @@ class LayoutPreprocessingService:
""" """
Determine optimal preprocessing config based on image quality. Determine optimal preprocessing config based on image quality.
Auto-detection calculates appropriate strength values:
- Lower image contrast → Higher contrast_strength
- Lower edge strength → Higher sharpen_strength
- Binarization is disabled by default (rarely beneficial)
Args: Args:
metrics: Image quality metrics from analyze_image_quality() metrics: Image quality metrics from analyze_image_quality()
Returns: Returns:
PreprocessingConfig with recommended settings PreprocessingConfig with recommended settings
""" """
# Determine contrast enhancement # Determine contrast enhancement and strength
if metrics.contrast < self.contrast_threshold: if metrics.contrast < self.contrast_threshold:
contrast = PreprocessingContrastEnum.CLAHE contrast = PreprocessingContrastEnum.CLAHE
# Calculate strength based on how far below threshold
# contrast=40 threshold, contrast=20 → strength=2.0, contrast=30 → strength=1.5
contrast_ratio = (self.contrast_threshold - metrics.contrast) / self.contrast_threshold
contrast_strength = min(1.0 + contrast_ratio * 2.0, 3.0) # Range: 1.0 to 3.0
else: else:
contrast = PreprocessingContrastEnum.NONE contrast = PreprocessingContrastEnum.NONE
contrast_strength = 1.0
# Determine sharpening # Determine sharpening and strength
sharpen = metrics.edge_strength < self.edge_threshold if metrics.edge_strength < self.edge_threshold:
sharpen = True
# Calculate strength based on how far below threshold
# edge=15 threshold, edge=5 → strength=1.67, edge=10 → strength=1.33
edge_ratio = (self.edge_threshold - metrics.edge_strength) / self.edge_threshold
sharpen_strength = min(1.0 + edge_ratio * 1.0, 2.0) # Range: 1.0 to 2.0
else:
sharpen = False
sharpen_strength = 1.0
# Determine binarization (only for very low contrast) # Binarization is disabled by default - it rarely helps and often hurts
binarize = metrics.contrast < self.binarize_threshold # Only enable for extremely low contrast (< 15) which indicates a scan quality issue
binarize = False # Disabled by default
logger.debug(
f"Auto config: contrast={contrast} strength={contrast_strength:.2f}, "
f"sharpen={sharpen} strength={sharpen_strength:.2f}, binarize={binarize}"
)
return PreprocessingConfig( return PreprocessingConfig(
contrast=contrast, contrast=contrast,
contrast_strength=round(contrast_strength, 2),
sharpen=sharpen, sharpen=sharpen,
sharpen_strength=round(sharpen_strength, 2),
binarize=binarize binarize=binarize
) )
def apply_contrast_enhancement( def apply_contrast_enhancement(
self, self,
image: np.ndarray, image: np.ndarray,
method: PreprocessingContrastEnum method: PreprocessingContrastEnum,
strength: float = 1.0
) -> np.ndarray: ) -> np.ndarray:
""" """
Apply contrast enhancement to image. Apply contrast enhancement to image.
@@ -145,6 +172,11 @@ class LayoutPreprocessingService:
Args: Args:
image: Input image (BGR) image: Input image (BGR)
method: Enhancement method (none, histogram, clahe) method: Enhancement method (none, histogram, clahe)
strength: Enhancement strength (0.5-3.0, default 1.0)
- 0.5: Subtle enhancement
- 1.0: Normal enhancement
- 2.0: Strong enhancement
- 3.0: Maximum enhancement
Returns: Returns:
Enhanced image (BGR) Enhanced image (BGR)
@@ -157,12 +189,17 @@ class LayoutPreprocessingService:
l_channel, a_channel, b_channel = cv2.split(lab) l_channel, a_channel, b_channel = cv2.split(lab)
if method == PreprocessingContrastEnum.HISTOGRAM: if method == PreprocessingContrastEnum.HISTOGRAM:
# Standard histogram equalization # Standard histogram equalization (strength affects blending)
l_enhanced = cv2.equalizeHist(l_channel) l_equalized = cv2.equalizeHist(l_channel)
# Blend original with equalized based on strength
alpha = min(strength, 1.0) # Cap at 1.0 for histogram
l_enhanced = cv2.addWeighted(l_equalized, alpha, l_channel, 1 - alpha, 0)
elif method == PreprocessingContrastEnum.CLAHE: elif method == PreprocessingContrastEnum.CLAHE:
# Contrast Limited Adaptive Histogram Equalization # Contrast Limited Adaptive Histogram Equalization
# clipLimit controls contrast amplification: 2.0 is default, up to 6.0 for strong
clip_limit = self.clahe_clip_limit * strength # 2.0 * 1.0 = 2.0, 2.0 * 2.0 = 4.0
clahe = cv2.createCLAHE( clahe = cv2.createCLAHE(
clipLimit=self.clahe_clip_limit, clipLimit=clip_limit,
tileGridSize=self.clahe_tile_grid_size tileGridSize=self.clahe_tile_grid_size
) )
l_enhanced = clahe.apply(l_channel) l_enhanced = clahe.apply(l_channel)
@@ -175,18 +212,33 @@ class LayoutPreprocessingService:
return enhanced_bgr return enhanced_bgr
def apply_sharpening(self, image: np.ndarray) -> np.ndarray: def apply_sharpening(self, image: np.ndarray, strength: float = 1.0) -> np.ndarray:
""" """
Apply sharpening to enhance edges and faint lines. Apply sharpening to enhance edges and faint lines using unsharp mask.
Args: Args:
image: Input image (BGR) image: Input image (BGR)
strength: Sharpening strength (0.5-2.0, default 1.0)
- 0.5: Subtle sharpening
- 1.0: Normal sharpening
- 1.5: Strong sharpening
- 2.0: Maximum sharpening
Returns: Returns:
Sharpened image (BGR) Sharpened image (BGR)
""" """
# Apply unsharp mask style sharpening # Use unsharp mask technique for better control
sharpened = cv2.filter2D(image, -1, self.sharpen_kernel) # 1. Create blurred version
# 2. Subtract from original (scaled by strength)
# 3. Add back to original
# Gaussian blur with sigma based on strength
sigma = 1.0
blurred = cv2.GaussianBlur(image, (0, 0), sigma)
# Unsharp mask: original + (original - blurred) * strength
# This is equivalent to: original * (1 + strength) - blurred * strength
sharpened = cv2.addWeighted(image, 1.0 + strength, blurred, -strength, 0)
# Clip values to valid range # Clip values to valid range
sharpened = np.clip(sharpened, 0, 255).astype(np.uint8) sharpened = np.clip(sharpened, 0, 255).astype(np.uint8)
@@ -277,15 +329,19 @@ class LayoutPreprocessingService:
# Step 1: Contrast enhancement # Step 1: Contrast enhancement
if config.contrast != PreprocessingContrastEnum.NONE: if config.contrast != PreprocessingContrastEnum.NONE:
processed = self.apply_contrast_enhancement(processed, config.contrast) processed = self.apply_contrast_enhancement(
processed,
config.contrast,
strength=config.contrast_strength
)
was_processed = True was_processed = True
logger.debug(f"Applied contrast enhancement: {config.contrast}") logger.debug(f"Applied contrast enhancement: {config.contrast} (strength={config.contrast_strength})")
# Step 2: Sharpening # Step 2: Sharpening
if config.sharpen: if config.sharpen:
processed = self.apply_sharpening(processed) processed = self.apply_sharpening(processed, strength=config.sharpen_strength)
was_processed = True was_processed = True
logger.debug("Applied sharpening") logger.debug(f"Applied sharpening (strength={config.sharpen_strength})")
# Step 3: Binarization (last step, overwrites color) # Step 3: Binarization (last step, overwrites color)
if config.binarize: if config.binarize:

View File

@@ -41,6 +41,20 @@ export default function PreprocessingSettings({
onConfigChange({ ...config, [field]: value }) onConfigChange({ ...config, [field]: value })
} }
const getStrengthLabel = (value: number, type: 'contrast' | 'sharpen') => {
if (type === 'contrast') {
if (value <= 0.75) return t('processing.preprocessing.strength.subtle')
if (value <= 1.25) return t('processing.preprocessing.strength.normal')
if (value <= 2.0) return t('processing.preprocessing.strength.strong')
return t('processing.preprocessing.strength.maximum')
} else {
if (value <= 0.75) return t('processing.preprocessing.strength.subtle')
if (value <= 1.25) return t('processing.preprocessing.strength.normal')
if (value <= 1.5) return t('processing.preprocessing.strength.strong')
return t('processing.preprocessing.strength.maximum')
}
}
return ( return (
<div className={cn('border rounded-lg p-4 bg-white', className)}> <div className={cn('border rounded-lg p-4 bg-white', className)}>
{/* Header */} {/* Header */}
@@ -131,14 +145,14 @@ export default function PreprocessingSettings({
{/* Manual Configuration (shown only when mode is 'manual') */} {/* Manual Configuration (shown only when mode is 'manual') */}
{mode === 'manual' && ( {mode === 'manual' && (
<div className="mt-4 p-3 bg-gray-50 rounded-lg border border-gray-200 space-y-3"> <div className="mt-4 p-3 bg-gray-50 rounded-lg border border-gray-200 space-y-4">
<h4 className="text-sm font-medium text-gray-700"> <h4 className="text-sm font-medium text-gray-700">
{t('processing.preprocessing.manualConfig')} {t('processing.preprocessing.manualConfig')}
</h4> </h4>
{/* Contrast Enhancement */} {/* Contrast Enhancement */}
<div> <div className="space-y-2">
<label className="block text-xs font-medium text-gray-600 mb-1.5"> <label className="block text-xs font-medium text-gray-600">
{t('processing.preprocessing.contrast.label')} {t('processing.preprocessing.contrast.label')}
</label> </label>
<select <select
@@ -157,38 +171,99 @@ export default function PreprocessingSettings({
</option> </option>
))} ))}
</select> </select>
{/* Contrast Strength Slider */}
{config.contrast !== 'none' && (
<div className="pt-1">
<div className="flex justify-between text-xs text-gray-500 mb-1">
<span>{t('processing.preprocessing.strength.label')}</span>
<span className="font-medium text-gray-700">
{config.contrast_strength.toFixed(1)} ({getStrengthLabel(config.contrast_strength, 'contrast')})
</span>
</div>
<input
type="range"
min="0.5"
max="3.0"
step="0.1"
value={config.contrast_strength}
onChange={(e) => handleConfigChange('contrast_strength', parseFloat(e.target.value))}
disabled={disabled}
className="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer accent-blue-600"
/>
<div className="flex justify-between text-xs text-gray-400 mt-0.5">
<span>0.5</span>
<span>3.0</span>
</div>
</div>
)}
</div> </div>
{/* Sharpen Toggle */} {/* Sharpen Section */}
<label className="flex items-center gap-2 cursor-pointer"> <div className="space-y-2">
<input <label className="flex items-center gap-2 cursor-pointer">
type="checkbox" <input
checked={config.sharpen} type="checkbox"
onChange={(e) => handleConfigChange('sharpen', e.target.checked)} checked={config.sharpen}
disabled={disabled} onChange={(e) => handleConfigChange('sharpen', e.target.checked)}
className="w-4 h-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500" disabled={disabled}
/> className="w-4 h-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
<span className="text-sm text-gray-700"> />
{t('processing.preprocessing.sharpen')} <span className="text-sm text-gray-700">
</span> {t('processing.preprocessing.sharpen')}
</label> </span>
</label>
{/* Binarize Toggle */} {/* Sharpen Strength Slider */}
<label className="flex items-center gap-2 cursor-pointer"> {config.sharpen && (
<input <div className="pl-6 pt-1">
type="checkbox" <div className="flex justify-between text-xs text-gray-500 mb-1">
checked={config.binarize} <span>{t('processing.preprocessing.strength.label')}</span>
onChange={(e) => handleConfigChange('binarize', e.target.checked)} <span className="font-medium text-gray-700">
disabled={disabled} {config.sharpen_strength.toFixed(1)} ({getStrengthLabel(config.sharpen_strength, 'sharpen')})
className="w-4 h-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500" </span>
/> </div>
<span className="text-sm text-gray-700"> <input
{t('processing.preprocessing.binarize')} type="range"
</span> min="0.5"
<span className="text-xs text-orange-600"> max="2.0"
({t('processing.preprocessing.binarizeWarning')}) step="0.1"
</span> value={config.sharpen_strength}
</label> onChange={(e) => handleConfigChange('sharpen_strength', parseFloat(e.target.value))}
disabled={disabled}
className="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer accent-blue-600"
/>
<div className="flex justify-between text-xs text-gray-400 mt-0.5">
<span>0.5</span>
<span>2.0</span>
</div>
</div>
)}
</div>
{/* Binarize Toggle - Hidden by default, shown only in advanced mode */}
<details className="pt-2">
<summary className="text-xs text-gray-500 cursor-pointer hover:text-gray-700">
{t('processing.preprocessing.advanced')}
</summary>
<div className="mt-2 pl-2 border-l-2 border-gray-200">
<label className="flex items-center gap-2 cursor-pointer">
<input
type="checkbox"
checked={config.binarize}
onChange={(e) => handleConfigChange('binarize', e.target.checked)}
disabled={disabled}
className="w-4 h-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
/>
<span className="text-sm text-gray-700">
{t('processing.preprocessing.binarize')}
</span>
<span className="text-xs text-orange-600">
({t('processing.preprocessing.binarizeWarning')})
</span>
</label>
</div>
</details>
</div> </div>
)} )}

View File

@@ -68,9 +68,9 @@
"title": "影像前處理", "title": "影像前處理",
"mode": { "mode": {
"auto": "自動模式", "auto": "自動模式",
"autoDesc": "系統自動分析影像品質,決定最佳的前處理方式", "autoDesc": "系統自動分析影像品質,決定最佳的前處理方式和強度",
"manual": "手動模式", "manual": "手動模式",
"manualDesc": "手動選擇前處理選項,完全控制處理流程", "manualDesc": "手動選擇前處理選項和強度,完全控制處理流程",
"disabled": "停用前處理", "disabled": "停用前處理",
"disabledDesc": "不進行任何前處理,直接使用原始影像" "disabledDesc": "不進行任何前處理,直接使用原始影像"
}, },
@@ -84,8 +84,16 @@
"clahe": "CLAHE 自適應均衡化" "clahe": "CLAHE 自適應均衡化"
}, },
"sharpen": "邊緣銳化", "sharpen": "邊緣銳化",
"strength": {
"label": "強度",
"subtle": "輕微",
"normal": "正常",
"strong": "強",
"maximum": "最強"
},
"advanced": "進階選項",
"binarize": "二值化處理", "binarize": "二值化處理",
"binarizeWarning": "可能影響顏色資訊", "binarizeWarning": "不建議使用",
"note": "前處理僅影響版面偵測階段,用於改善表格和文字區塊的識別。原始影像仍用於最終的 OCR 文字提取,確保最佳識別品質。" "note": "前處理僅影響版面偵測階段,用於改善表格和文字區塊的識別。原始影像仍用於最終的 OCR 文字提取,確保最佳識別品質。"
} }
}, },

View File

@@ -39,7 +39,9 @@ export default function ProcessingPage() {
const [preprocessingMode, setPreprocessingMode] = useState<PreprocessingMode>('auto') const [preprocessingMode, setPreprocessingMode] = useState<PreprocessingMode>('auto')
const [preprocessingConfig, setPreprocessingConfig] = useState<PreprocessingConfig>({ const [preprocessingConfig, setPreprocessingConfig] = useState<PreprocessingConfig>({
contrast: 'clahe', contrast: 'clahe',
contrast_strength: 1.0,
sharpen: true, sharpen: true,
sharpen_strength: 1.0,
binarize: false, binarize: false,
}) })

View File

@@ -100,7 +100,9 @@ export type PreprocessingContrast = 'none' | 'histogram' | 'clahe'
*/ */
export interface PreprocessingConfig { export interface PreprocessingConfig {
contrast: PreprocessingContrast contrast: PreprocessingContrast
contrast_strength: number // 0.5-3.0, default 1.0
sharpen: boolean sharpen: boolean
sharpen_strength: number // 0.5-2.0, default 1.0
binarize: boolean binarize: boolean
} }