#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 圖像預處理工具 - 用於提升 OCR 識別準確度 Author: PANJIT IT Team Created: 2025-10-01 Modified: 2025-10-01 """ import io import numpy as np from PIL import Image, ImageEnhance, ImageFilter from typing import Optional, Tuple from app.utils.logger import get_logger logger = get_logger(__name__) # 檢查 OpenCV 是否可用 try: import cv2 _HAS_OPENCV = True logger.info("OpenCV is available for advanced image preprocessing") except ImportError: _HAS_OPENCV = False logger.warning("OpenCV not available, using PIL-only preprocessing") class ImagePreprocessor: """圖像預處理器 - 提升掃描文件 OCR 品質""" def __init__(self, use_opencv: bool = True): """ 初始化圖像預處理器 Args: use_opencv: 是否使用 OpenCV 進行進階處理(若可用) """ self.use_opencv = use_opencv and _HAS_OPENCV logger.info(f"ImagePreprocessor initialized (OpenCV: {self.use_opencv})") def preprocess_for_ocr(self, image_bytes: bytes, enhance_level: str = 'medium') -> bytes: """ 對圖像進行 OCR 前處理 Args: image_bytes: 原始圖像字節數據 enhance_level: 增強級別 ('low', 'medium', 'high') Returns: 處理後的圖像字節數據 (PNG格式) """ try: # 1. 載入圖像 image = Image.open(io.BytesIO(image_bytes)) original_mode = image.mode logger.debug(f"Original image: {image.size}, mode={original_mode}") # 2. 轉換為 RGB (如果需要) if image.mode not in ('RGB', 'L'): image = image.convert('RGB') logger.debug(f"Converted to RGB mode") # 3. 根據增強級別選擇處理流程 if self.use_opencv: processed_image = self._preprocess_with_opencv(image, enhance_level) else: processed_image = self._preprocess_with_pil(image, enhance_level) # 4. 轉換為 PNG 字節 output_buffer = io.BytesIO() processed_image.save(output_buffer, format='PNG', optimize=True) processed_bytes = output_buffer.getvalue() logger.info(f"Image preprocessed: {len(image_bytes)} -> {len(processed_bytes)} bytes (level={enhance_level})") return processed_bytes except Exception as e: logger.error(f"Image preprocessing failed: {e}, returning original image") return image_bytes # 失敗時返回原圖 def _preprocess_with_opencv(self, image: Image.Image, level: str) -> Image.Image: """使用 OpenCV 進行進階圖像處理""" # PIL Image -> NumPy array img_array = np.array(image) # 轉換為 BGR (OpenCV 格式) if len(img_array.shape) == 3 and img_array.shape[2] == 3: img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) else: img_bgr = img_array # 1. 灰階化 gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) logger.debug("Applied grayscale conversion (OpenCV)") # 2. 去噪 - 根據級別調整 if level == 'high': # 高級別:較強去噪 denoised = cv2.fastNlMeansDenoising(gray, None, h=10, templateWindowSize=7, searchWindowSize=21) logger.debug("Applied strong denoising (h=10)") elif level == 'medium': # 中級別:中等去噪 denoised = cv2.fastNlMeansDenoising(gray, None, h=7, templateWindowSize=7, searchWindowSize=21) logger.debug("Applied medium denoising (h=7)") else: # 低級別:輕度去噪 denoised = cv2.bilateralFilter(gray, 5, 50, 50) logger.debug("Applied light denoising (bilateral)") # 3. 對比度增強 - CLAHE clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) enhanced = clahe.apply(denoised) logger.debug("Applied CLAHE contrast enhancement") # 4. 銳化 (高級別才使用) if level == 'high': kernel = np.array([[-1,-1,-1], [-1, 9,-1], [-1,-1,-1]]) sharpened = cv2.filter2D(enhanced, -1, kernel) logger.debug("Applied sharpening filter") else: sharpened = enhanced # 5. 自適應二值化 (根據級別決定是否使用) if level in ('medium', 'high'): # 使用自適應閾值 binary = cv2.adaptiveThreshold( sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blockSize=11, C=2 ) logger.debug("Applied adaptive thresholding") final_image = binary else: final_image = sharpened # NumPy array -> PIL Image return Image.fromarray(final_image) def _preprocess_with_pil(self, image: Image.Image, level: str) -> Image.Image: """使用 PIL 進行基礎圖像處理(當 OpenCV 不可用時)""" # 1. 灰階化 gray = image.convert('L') logger.debug("Applied grayscale conversion (PIL)") # 2. 對比度增強 enhancer = ImageEnhance.Contrast(gray) if level == 'high': contrast_factor = 2.0 elif level == 'medium': contrast_factor = 1.5 else: contrast_factor = 1.2 enhanced = enhancer.enhance(contrast_factor) logger.debug(f"Applied contrast enhancement (factor={contrast_factor})") # 3. 銳化 if level in ('medium', 'high'): sharpness = ImageEnhance.Sharpness(enhanced) sharp_factor = 2.0 if level == 'high' else 1.5 sharpened = sharpness.enhance(sharp_factor) logger.debug(f"Applied sharpening (factor={sharp_factor})") else: sharpened = enhanced # 4. 去噪 (使用中值濾波) if level == 'high': denoised = sharpened.filter(ImageFilter.MedianFilter(size=3)) logger.debug("Applied median filter (size=3)") else: denoised = sharpened return denoised def auto_detect_enhance_level(self, image_bytes: bytes) -> str: """ 自動偵測最佳增強級別 Args: image_bytes: 圖像字節數據 Returns: 建議的增強級別 ('low', 'medium', 'high') """ try: image = Image.open(io.BytesIO(image_bytes)) if self.use_opencv: # 使用 OpenCV 計算圖像品質指標 img_array = np.array(image.convert('L')) # 計算拉普拉斯方差 (評估清晰度) laplacian_var = cv2.Laplacian(img_array, cv2.CV_64F).var() # 計算對比度 (標準差) contrast = np.std(img_array) logger.debug(f"Image quality metrics: laplacian_var={laplacian_var:.2f}, contrast={contrast:.2f}") # 根據指標決定增強級別 if laplacian_var < 50 or contrast < 40: # 模糊或低對比度 -> 高級別增強 return 'high' elif laplacian_var < 100 or contrast < 60: # 中等品質 -> 中級別增強 return 'medium' else: # 高品質 -> 低級別增強 return 'low' else: # PIL 簡易判斷 gray = image.convert('L') img_array = np.array(gray) # 簡單對比度評估 contrast = np.std(img_array) if contrast < 40: return 'high' elif contrast < 60: return 'medium' else: return 'low' except Exception as e: logger.error(f"Auto enhance level detection failed: {e}") return 'medium' # 預設使用中級別 def preprocess_smart(self, image_bytes: bytes) -> bytes: """ 智能預處理 - 自動偵測並應用最佳處理級別 Args: image_bytes: 原始圖像字節數據 Returns: 處理後的圖像字節數據 """ enhance_level = self.auto_detect_enhance_level(image_bytes) logger.info(f"Auto-detected enhancement level: {enhance_level}") return self.preprocess_for_ocr(image_bytes, enhance_level)