249 lines
8.5 KiB
Python
249 lines
8.5 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
圖像預處理工具 - 用於提升 OCR 識別準確度
|
|
|
|
Author: PANJIT IT Team
|
|
Created: 2025-10-01
|
|
Modified: 2025-10-01
|
|
"""
|
|
|
|
import io
|
|
import numpy as np
|
|
from PIL import Image, ImageEnhance, ImageFilter
|
|
from typing import Optional, Tuple
|
|
from app.utils.logger import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
# 檢查 OpenCV 是否可用
|
|
try:
|
|
import cv2
|
|
_HAS_OPENCV = True
|
|
logger.info("OpenCV is available for advanced image preprocessing")
|
|
except ImportError:
|
|
_HAS_OPENCV = False
|
|
logger.warning("OpenCV not available, using PIL-only preprocessing")
|
|
|
|
|
|
class ImagePreprocessor:
|
|
"""圖像預處理器 - 提升掃描文件 OCR 品質"""
|
|
|
|
def __init__(self, use_opencv: bool = True):
|
|
"""
|
|
初始化圖像預處理器
|
|
|
|
Args:
|
|
use_opencv: 是否使用 OpenCV 進行進階處理(若可用)
|
|
"""
|
|
self.use_opencv = use_opencv and _HAS_OPENCV
|
|
logger.info(f"ImagePreprocessor initialized (OpenCV: {self.use_opencv})")
|
|
|
|
def preprocess_for_ocr(self, image_bytes: bytes,
|
|
enhance_level: str = 'medium') -> bytes:
|
|
"""
|
|
對圖像進行 OCR 前處理
|
|
|
|
Args:
|
|
image_bytes: 原始圖像字節數據
|
|
enhance_level: 增強級別 ('low', 'medium', 'high')
|
|
|
|
Returns:
|
|
處理後的圖像字節數據 (PNG格式)
|
|
"""
|
|
try:
|
|
# 1. 載入圖像
|
|
image = Image.open(io.BytesIO(image_bytes))
|
|
original_mode = image.mode
|
|
logger.debug(f"Original image: {image.size}, mode={original_mode}")
|
|
|
|
# 2. 轉換為 RGB (如果需要)
|
|
if image.mode not in ('RGB', 'L'):
|
|
image = image.convert('RGB')
|
|
logger.debug(f"Converted to RGB mode")
|
|
|
|
# 3. 根據增強級別選擇處理流程
|
|
if self.use_opencv:
|
|
processed_image = self._preprocess_with_opencv(image, enhance_level)
|
|
else:
|
|
processed_image = self._preprocess_with_pil(image, enhance_level)
|
|
|
|
# 4. 轉換為 PNG 字節
|
|
output_buffer = io.BytesIO()
|
|
processed_image.save(output_buffer, format='PNG', optimize=True)
|
|
processed_bytes = output_buffer.getvalue()
|
|
|
|
logger.info(f"Image preprocessed: {len(image_bytes)} -> {len(processed_bytes)} bytes (level={enhance_level})")
|
|
return processed_bytes
|
|
|
|
except Exception as e:
|
|
logger.error(f"Image preprocessing failed: {e}, returning original image")
|
|
return image_bytes # 失敗時返回原圖
|
|
|
|
def _preprocess_with_opencv(self, image: Image.Image, level: str) -> Image.Image:
|
|
"""使用 OpenCV 進行進階圖像處理"""
|
|
# PIL Image -> NumPy array
|
|
img_array = np.array(image)
|
|
|
|
# 轉換為 BGR (OpenCV 格式)
|
|
if len(img_array.shape) == 3 and img_array.shape[2] == 3:
|
|
img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
|
else:
|
|
img_bgr = img_array
|
|
|
|
# 1. 灰階化
|
|
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
|
logger.debug("Applied grayscale conversion (OpenCV)")
|
|
|
|
# 2. 去噪 - 根據級別調整
|
|
if level == 'high':
|
|
# 高級別:較強去噪
|
|
denoised = cv2.fastNlMeansDenoising(gray, None, h=10, templateWindowSize=7, searchWindowSize=21)
|
|
logger.debug("Applied strong denoising (h=10)")
|
|
elif level == 'medium':
|
|
# 中級別:中等去噪
|
|
denoised = cv2.fastNlMeansDenoising(gray, None, h=7, templateWindowSize=7, searchWindowSize=21)
|
|
logger.debug("Applied medium denoising (h=7)")
|
|
else:
|
|
# 低級別:輕度去噪
|
|
denoised = cv2.bilateralFilter(gray, 5, 50, 50)
|
|
logger.debug("Applied light denoising (bilateral)")
|
|
|
|
# 3. 對比度增強 - CLAHE
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
enhanced = clahe.apply(denoised)
|
|
logger.debug("Applied CLAHE contrast enhancement")
|
|
|
|
# 4. 銳化 (高級別才使用)
|
|
if level == 'high':
|
|
kernel = np.array([[-1,-1,-1],
|
|
[-1, 9,-1],
|
|
[-1,-1,-1]])
|
|
sharpened = cv2.filter2D(enhanced, -1, kernel)
|
|
logger.debug("Applied sharpening filter")
|
|
else:
|
|
sharpened = enhanced
|
|
|
|
# 5. 自適應二值化 (根據級別決定是否使用)
|
|
if level in ('medium', 'high'):
|
|
# 使用自適應閾值
|
|
binary = cv2.adaptiveThreshold(
|
|
sharpened, 255,
|
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
cv2.THRESH_BINARY,
|
|
blockSize=11,
|
|
C=2
|
|
)
|
|
logger.debug("Applied adaptive thresholding")
|
|
final_image = binary
|
|
else:
|
|
final_image = sharpened
|
|
|
|
# NumPy array -> PIL Image
|
|
return Image.fromarray(final_image)
|
|
|
|
def _preprocess_with_pil(self, image: Image.Image, level: str) -> Image.Image:
|
|
"""使用 PIL 進行基礎圖像處理(當 OpenCV 不可用時)"""
|
|
|
|
# 1. 灰階化
|
|
gray = image.convert('L')
|
|
logger.debug("Applied grayscale conversion (PIL)")
|
|
|
|
# 2. 對比度增強
|
|
enhancer = ImageEnhance.Contrast(gray)
|
|
if level == 'high':
|
|
contrast_factor = 2.0
|
|
elif level == 'medium':
|
|
contrast_factor = 1.5
|
|
else:
|
|
contrast_factor = 1.2
|
|
|
|
enhanced = enhancer.enhance(contrast_factor)
|
|
logger.debug(f"Applied contrast enhancement (factor={contrast_factor})")
|
|
|
|
# 3. 銳化
|
|
if level in ('medium', 'high'):
|
|
sharpness = ImageEnhance.Sharpness(enhanced)
|
|
sharp_factor = 2.0 if level == 'high' else 1.5
|
|
sharpened = sharpness.enhance(sharp_factor)
|
|
logger.debug(f"Applied sharpening (factor={sharp_factor})")
|
|
else:
|
|
sharpened = enhanced
|
|
|
|
# 4. 去噪 (使用中值濾波)
|
|
if level == 'high':
|
|
denoised = sharpened.filter(ImageFilter.MedianFilter(size=3))
|
|
logger.debug("Applied median filter (size=3)")
|
|
else:
|
|
denoised = sharpened
|
|
|
|
return denoised
|
|
|
|
def auto_detect_enhance_level(self, image_bytes: bytes) -> str:
|
|
"""
|
|
自動偵測最佳增強級別
|
|
|
|
Args:
|
|
image_bytes: 圖像字節數據
|
|
|
|
Returns:
|
|
建議的增強級別 ('low', 'medium', 'high')
|
|
"""
|
|
try:
|
|
image = Image.open(io.BytesIO(image_bytes))
|
|
|
|
if self.use_opencv:
|
|
# 使用 OpenCV 計算圖像品質指標
|
|
img_array = np.array(image.convert('L'))
|
|
|
|
# 計算拉普拉斯方差 (評估清晰度)
|
|
laplacian_var = cv2.Laplacian(img_array, cv2.CV_64F).var()
|
|
|
|
# 計算對比度 (標準差)
|
|
contrast = np.std(img_array)
|
|
|
|
logger.debug(f"Image quality metrics: laplacian_var={laplacian_var:.2f}, contrast={contrast:.2f}")
|
|
|
|
# 根據指標決定增強級別
|
|
if laplacian_var < 50 or contrast < 40:
|
|
# 模糊或低對比度 -> 高級別增強
|
|
return 'high'
|
|
elif laplacian_var < 100 or contrast < 60:
|
|
# 中等品質 -> 中級別增強
|
|
return 'medium'
|
|
else:
|
|
# 高品質 -> 低級別增強
|
|
return 'low'
|
|
else:
|
|
# PIL 簡易判斷
|
|
gray = image.convert('L')
|
|
img_array = np.array(gray)
|
|
|
|
# 簡單對比度評估
|
|
contrast = np.std(img_array)
|
|
|
|
if contrast < 40:
|
|
return 'high'
|
|
elif contrast < 60:
|
|
return 'medium'
|
|
else:
|
|
return 'low'
|
|
|
|
except Exception as e:
|
|
logger.error(f"Auto enhance level detection failed: {e}")
|
|
return 'medium' # 預設使用中級別
|
|
|
|
def preprocess_smart(self, image_bytes: bytes) -> bytes:
|
|
"""
|
|
智能預處理 - 自動偵測並應用最佳處理級別
|
|
|
|
Args:
|
|
image_bytes: 原始圖像字節數據
|
|
|
|
Returns:
|
|
處理後的圖像字節數據
|
|
"""
|
|
enhance_level = self.auto_detect_enhance_level(image_bytes)
|
|
logger.info(f"Auto-detected enhancement level: {enhance_level}")
|
|
return self.preprocess_for_ocr(image_bytes, enhance_level)
|