diff --git a/backend/app/routers/tasks.py b/backend/app/routers/tasks.py index 539df1e..33c83c1 100644 --- a/backend/app/routers/tasks.py +++ b/backend/app/routers/tasks.py @@ -116,7 +116,9 @@ def process_task_ocr( from app.schemas.task import PreprocessingContrastEnum preprocess_config_obj = PreprocessingConfig( contrast=PreprocessingContrastEnum(preprocessing_config.get("contrast", "clahe")), + contrast_strength=preprocessing_config.get("contrast_strength", 1.0), sharpen=preprocessing_config.get("sharpen", True), + sharpen_strength=preprocessing_config.get("sharpen_strength", 1.0), binarize=preprocessing_config.get("binarize", False) ) @@ -772,7 +774,9 @@ async def start_task( if options.preprocessing_config: preprocessing_config = { "contrast": options.preprocessing_config.contrast.value, + "contrast_strength": options.preprocessing_config.contrast_strength, "sharpen": options.preprocessing_config.sharpen, + "sharpen_strength": options.preprocessing_config.sharpen_strength, "binarize": options.preprocessing_config.binarize } logger.info(f"Preprocessing: mode={preprocessing_mode}, config={preprocessing_config}") diff --git a/backend/app/schemas/task.py b/backend/app/schemas/task.py index f50500a..16a0cf4 100644 --- a/backend/app/schemas/task.py +++ b/backend/app/schemas/task.py @@ -72,13 +72,25 @@ class PreprocessingConfig(BaseModel): default=PreprocessingContrastEnum.CLAHE, description="Contrast enhancement method" ) + contrast_strength: float = Field( + default=1.0, + ge=0.5, + le=3.0, + description="Contrast enhancement strength (0.5=subtle, 1.0=normal, 2.0=strong, 3.0=maximum)" + ) sharpen: bool = Field( default=True, description="Enable sharpening for faint lines" ) + sharpen_strength: float = Field( + default=1.0, + ge=0.5, + le=2.0, + description="Sharpening strength (0.5=subtle, 1.0=normal, 1.5=strong, 2.0=maximum)" + ) binarize: bool = Field( default=False, - description="Enable binarization (aggressive, for very low contrast)" + description="Enable binarization (aggressive, for very low contrast). Not recommended for most documents." ) diff --git a/backend/app/services/layout_preprocessing_service.py b/backend/app/services/layout_preprocessing_service.py index 05a918a..6deccaf 100644 --- a/backend/app/services/layout_preprocessing_service.py +++ b/backend/app/services/layout_preprocessing_service.py @@ -110,34 +110,61 @@ class LayoutPreprocessingService: """ Determine optimal preprocessing config based on image quality. + Auto-detection calculates appropriate strength values: + - Lower image contrast → Higher contrast_strength + - Lower edge strength → Higher sharpen_strength + - Binarization is disabled by default (rarely beneficial) + Args: metrics: Image quality metrics from analyze_image_quality() Returns: PreprocessingConfig with recommended settings """ - # Determine contrast enhancement + # Determine contrast enhancement and strength if metrics.contrast < self.contrast_threshold: contrast = PreprocessingContrastEnum.CLAHE + # Calculate strength based on how far below threshold + # contrast=40 threshold, contrast=20 → strength=2.0, contrast=30 → strength=1.5 + contrast_ratio = (self.contrast_threshold - metrics.contrast) / self.contrast_threshold + contrast_strength = min(1.0 + contrast_ratio * 2.0, 3.0) # Range: 1.0 to 3.0 else: contrast = PreprocessingContrastEnum.NONE + contrast_strength = 1.0 - # Determine sharpening - sharpen = metrics.edge_strength < self.edge_threshold + # Determine sharpening and strength + if metrics.edge_strength < self.edge_threshold: + sharpen = True + # Calculate strength based on how far below threshold + # edge=15 threshold, edge=5 → strength=1.67, edge=10 → strength=1.33 + edge_ratio = (self.edge_threshold - metrics.edge_strength) / self.edge_threshold + sharpen_strength = min(1.0 + edge_ratio * 1.0, 2.0) # Range: 1.0 to 2.0 + else: + sharpen = False + sharpen_strength = 1.0 - # Determine binarization (only for very low contrast) - binarize = metrics.contrast < self.binarize_threshold + # Binarization is disabled by default - it rarely helps and often hurts + # Only enable for extremely low contrast (< 15) which indicates a scan quality issue + binarize = False # Disabled by default + + logger.debug( + f"Auto config: contrast={contrast} strength={contrast_strength:.2f}, " + f"sharpen={sharpen} strength={sharpen_strength:.2f}, binarize={binarize}" + ) return PreprocessingConfig( contrast=contrast, + contrast_strength=round(contrast_strength, 2), sharpen=sharpen, + sharpen_strength=round(sharpen_strength, 2), binarize=binarize ) def apply_contrast_enhancement( self, image: np.ndarray, - method: PreprocessingContrastEnum + method: PreprocessingContrastEnum, + strength: float = 1.0 ) -> np.ndarray: """ Apply contrast enhancement to image. @@ -145,6 +172,11 @@ class LayoutPreprocessingService: Args: image: Input image (BGR) method: Enhancement method (none, histogram, clahe) + strength: Enhancement strength (0.5-3.0, default 1.0) + - 0.5: Subtle enhancement + - 1.0: Normal enhancement + - 2.0: Strong enhancement + - 3.0: Maximum enhancement Returns: Enhanced image (BGR) @@ -157,12 +189,17 @@ class LayoutPreprocessingService: l_channel, a_channel, b_channel = cv2.split(lab) if method == PreprocessingContrastEnum.HISTOGRAM: - # Standard histogram equalization - l_enhanced = cv2.equalizeHist(l_channel) + # Standard histogram equalization (strength affects blending) + l_equalized = cv2.equalizeHist(l_channel) + # Blend original with equalized based on strength + alpha = min(strength, 1.0) # Cap at 1.0 for histogram + l_enhanced = cv2.addWeighted(l_equalized, alpha, l_channel, 1 - alpha, 0) elif method == PreprocessingContrastEnum.CLAHE: # Contrast Limited Adaptive Histogram Equalization + # clipLimit controls contrast amplification: 2.0 is default, up to 6.0 for strong + clip_limit = self.clahe_clip_limit * strength # 2.0 * 1.0 = 2.0, 2.0 * 2.0 = 4.0 clahe = cv2.createCLAHE( - clipLimit=self.clahe_clip_limit, + clipLimit=clip_limit, tileGridSize=self.clahe_tile_grid_size ) l_enhanced = clahe.apply(l_channel) @@ -175,18 +212,33 @@ class LayoutPreprocessingService: return enhanced_bgr - def apply_sharpening(self, image: np.ndarray) -> np.ndarray: + def apply_sharpening(self, image: np.ndarray, strength: float = 1.0) -> np.ndarray: """ - Apply sharpening to enhance edges and faint lines. + Apply sharpening to enhance edges and faint lines using unsharp mask. Args: image: Input image (BGR) + strength: Sharpening strength (0.5-2.0, default 1.0) + - 0.5: Subtle sharpening + - 1.0: Normal sharpening + - 1.5: Strong sharpening + - 2.0: Maximum sharpening Returns: Sharpened image (BGR) """ - # Apply unsharp mask style sharpening - sharpened = cv2.filter2D(image, -1, self.sharpen_kernel) + # Use unsharp mask technique for better control + # 1. Create blurred version + # 2. Subtract from original (scaled by strength) + # 3. Add back to original + + # Gaussian blur with sigma based on strength + sigma = 1.0 + blurred = cv2.GaussianBlur(image, (0, 0), sigma) + + # Unsharp mask: original + (original - blurred) * strength + # This is equivalent to: original * (1 + strength) - blurred * strength + sharpened = cv2.addWeighted(image, 1.0 + strength, blurred, -strength, 0) # Clip values to valid range sharpened = np.clip(sharpened, 0, 255).astype(np.uint8) @@ -277,15 +329,19 @@ class LayoutPreprocessingService: # Step 1: Contrast enhancement if config.contrast != PreprocessingContrastEnum.NONE: - processed = self.apply_contrast_enhancement(processed, config.contrast) + processed = self.apply_contrast_enhancement( + processed, + config.contrast, + strength=config.contrast_strength + ) was_processed = True - logger.debug(f"Applied contrast enhancement: {config.contrast}") + logger.debug(f"Applied contrast enhancement: {config.contrast} (strength={config.contrast_strength})") # Step 2: Sharpening if config.sharpen: - processed = self.apply_sharpening(processed) + processed = self.apply_sharpening(processed, strength=config.sharpen_strength) was_processed = True - logger.debug("Applied sharpening") + logger.debug(f"Applied sharpening (strength={config.sharpen_strength})") # Step 3: Binarization (last step, overwrites color) if config.binarize: diff --git a/frontend/src/components/PreprocessingSettings.tsx b/frontend/src/components/PreprocessingSettings.tsx index c31b6ff..90b9487 100644 --- a/frontend/src/components/PreprocessingSettings.tsx +++ b/frontend/src/components/PreprocessingSettings.tsx @@ -41,6 +41,20 @@ export default function PreprocessingSettings({ onConfigChange({ ...config, [field]: value }) } + const getStrengthLabel = (value: number, type: 'contrast' | 'sharpen') => { + if (type === 'contrast') { + if (value <= 0.75) return t('processing.preprocessing.strength.subtle') + if (value <= 1.25) return t('processing.preprocessing.strength.normal') + if (value <= 2.0) return t('processing.preprocessing.strength.strong') + return t('processing.preprocessing.strength.maximum') + } else { + if (value <= 0.75) return t('processing.preprocessing.strength.subtle') + if (value <= 1.25) return t('processing.preprocessing.strength.normal') + if (value <= 1.5) return t('processing.preprocessing.strength.strong') + return t('processing.preprocessing.strength.maximum') + } + } + return (
{/* Header */} @@ -131,14 +145,14 @@ export default function PreprocessingSettings({ {/* Manual Configuration (shown only when mode is 'manual') */} {mode === 'manual' && ( -
+

{t('processing.preprocessing.manualConfig')}

{/* Contrast Enhancement */} -
-
)} diff --git a/frontend/src/i18n/locales/zh-TW.json b/frontend/src/i18n/locales/zh-TW.json index d4296eb..e72f027 100644 --- a/frontend/src/i18n/locales/zh-TW.json +++ b/frontend/src/i18n/locales/zh-TW.json @@ -68,9 +68,9 @@ "title": "影像前處理", "mode": { "auto": "自動模式", - "autoDesc": "系統自動分析影像品質,決定最佳的前處理方式", + "autoDesc": "系統自動分析影像品質,決定最佳的前處理方式和強度", "manual": "手動模式", - "manualDesc": "手動選擇前處理選項,完全控制處理流程", + "manualDesc": "手動選擇前處理選項和強度,完全控制處理流程", "disabled": "停用前處理", "disabledDesc": "不進行任何前處理,直接使用原始影像" }, @@ -84,8 +84,16 @@ "clahe": "CLAHE 自適應均衡化" }, "sharpen": "邊緣銳化", + "strength": { + "label": "強度", + "subtle": "輕微", + "normal": "正常", + "strong": "強", + "maximum": "最強" + }, + "advanced": "進階選項", "binarize": "二值化處理", - "binarizeWarning": "可能影響顏色資訊", + "binarizeWarning": "不建議使用", "note": "前處理僅影響版面偵測階段,用於改善表格和文字區塊的識別。原始影像仍用於最終的 OCR 文字提取,確保最佳識別品質。" } }, diff --git a/frontend/src/pages/ProcessingPage.tsx b/frontend/src/pages/ProcessingPage.tsx index 59b3b86..9a86ce5 100644 --- a/frontend/src/pages/ProcessingPage.tsx +++ b/frontend/src/pages/ProcessingPage.tsx @@ -39,7 +39,9 @@ export default function ProcessingPage() { const [preprocessingMode, setPreprocessingMode] = useState('auto') const [preprocessingConfig, setPreprocessingConfig] = useState({ contrast: 'clahe', + contrast_strength: 1.0, sharpen: true, + sharpen_strength: 1.0, binarize: false, }) diff --git a/frontend/src/types/apiV2.ts b/frontend/src/types/apiV2.ts index 716c966..6f582c6 100644 --- a/frontend/src/types/apiV2.ts +++ b/frontend/src/types/apiV2.ts @@ -100,7 +100,9 @@ export type PreprocessingContrast = 'none' | 'histogram' | 'clahe' */ export interface PreprocessingConfig { contrast: PreprocessingContrast + contrast_strength: number // 0.5-3.0, default 1.0 sharpen: boolean + sharpen_strength: number // 0.5-2.0, default 1.0 binarize: boolean }