feat: add table detection options and scan artifact removal

- Add TableDetectionSelector component for wired/wireless/region detection - Add CV-based table line detector module (disabled due to poor performance) - Add scan artifact removal preprocessing step (removes faint horizontal lines) - Add PreprocessingConfig schema with remove_scan_artifacts option - Update frontend PreprocessingSettings with scan artifact toggle - Integrate table detection config into ProcessingPage - Archive extract-table-cell-boxes proposal 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-30 13:21:50 +08:00
parent f5a2c8a750
commit 95ae1f1bdb
17 changed files with 1906 additions and 344 deletions
--- a/backend/app/services/cv_table_detector.py
+++ b/backend/app/services/cv_table_detector.py
@@ -0,0 +1,362 @@
+"""
+CV-based Table Line Detection Module
+
+Uses OpenCV morphological operations to detect table lines and extract cell boundaries.
+This is more reliable for wired/bordered tables than ML-based cell detection.
+"""
+
+import cv2
+import numpy as np
+from typing import List, Tuple, Optional
+from pathlib import Path
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class CVTableDetector:
+    """
+    Detects table cell boundaries using computer vision techniques.
+    Works by detecting horizontal and vertical lines in the image.
+    """
+
+    def __init__(
+        self,
+        min_line_length: int = 30,
+        line_thickness: int = 2,
+        min_cell_width: int = 20,
+        min_cell_height: int = 15
+    ):
+        """
+        Initialize the CV table detector.
+
+        Args:
+            min_line_length: Minimum length of lines to detect (in pixels)
+            line_thickness: Expected thickness of table lines
+            min_cell_width: Minimum width of a valid cell
+            min_cell_height: Minimum height of a valid cell
+        """
+        self.min_line_length = min_line_length
+        self.line_thickness = line_thickness
+        self.min_cell_width = min_cell_width
+        self.min_cell_height = min_cell_height
+
+    def detect_cells(
+        self,
+        image: np.ndarray,
+        table_bbox: Optional[List[float]] = None
+    ) -> List[List[float]]:
+        """
+        Detect cell boundaries in a table image.
+
+        Args:
+            image: Input image (BGR format)
+            table_bbox: Optional [x1, y1, x2, y2] to crop table region first
+
+        Returns:
+            List of cell bounding boxes [[x1, y1, x2, y2], ...]
+        """
+        # Crop to table region if bbox provided
+        offset_x, offset_y = 0, 0
+        if table_bbox:
+            x1, y1, x2, y2 = [int(v) for v in table_bbox]
+            offset_x, offset_y = x1, y1
+            image = image[y1:y2, x1:x2]
+
+        if image.size == 0:
+            logger.warning("Empty image after cropping")
+            return []
+
+        # Convert to grayscale
+        if len(image.shape) == 3:
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        else:
+            gray = image
+
+        # Detect lines
+        horizontal_lines, vertical_lines = self._detect_lines(gray)
+
+        if horizontal_lines is None or vertical_lines is None:
+            logger.warning("Failed to detect table lines")
+            return []
+
+        # Find intersections to build grid
+        cells = self._build_cell_grid(horizontal_lines, vertical_lines, gray.shape)
+
+        # Convert to absolute coordinates
+        absolute_cells = []
+        for cell in cells:
+            abs_cell = [
+                cell[0] + offset_x,
+                cell[1] + offset_y,
+                cell[2] + offset_x,
+                cell[3] + offset_y
+            ]
+            absolute_cells.append(abs_cell)
+
+        logger.info(f"[CV] Detected {len(absolute_cells)} cells from table lines")
+        return absolute_cells
+
+    def _detect_lines(
+        self,
+        gray: np.ndarray
+    ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+        """
+        Detect horizontal and vertical lines using morphological operations.
+
+        Args:
+            gray: Grayscale image
+
+        Returns:
+            Tuple of (horizontal_lines_mask, vertical_lines_mask)
+        """
+        # Adaptive threshold for better line detection
+        binary = cv2.adaptiveThreshold(
+            gray, 255,
+            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY_INV,
+            11, 2
+        )
+
+        # Detect horizontal lines
+        h_kernel_length = max(self.min_line_length, gray.shape[1] // 30)
+        horizontal_kernel = cv2.getStructuringElement(
+            cv2.MORPH_RECT, (h_kernel_length, 1)
+        )
+        horizontal_lines = cv2.morphologyEx(
+            binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2
+        )
+
+        # Detect vertical lines
+        v_kernel_length = max(self.min_line_length, gray.shape[0] // 30)
+        vertical_kernel = cv2.getStructuringElement(
+            cv2.MORPH_RECT, (1, v_kernel_length)
+        )
+        vertical_lines = cv2.morphologyEx(
+            binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2
+        )
+
+        return horizontal_lines, vertical_lines
+
+    def _build_cell_grid(
+        self,
+        horizontal_mask: np.ndarray,
+        vertical_mask: np.ndarray,
+        image_shape: Tuple[int, int]
+    ) -> List[List[float]]:
+        """
+        Build cell grid from detected line masks.
+
+        Args:
+            horizontal_mask: Binary mask of horizontal lines
+            vertical_mask: Binary mask of vertical lines
+            image_shape: (height, width) of the image
+
+        Returns:
+            List of cell bounding boxes
+        """
+        height, width = image_shape[:2]
+
+        # Combine masks to find table structure
+        table_mask = cv2.add(horizontal_mask, vertical_mask)
+
+        # Find contours (cells are enclosed regions)
+        contours, hierarchy = cv2.findContours(
+            table_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
+        )
+
+        # Method 1: Use contours to find cells
+        cells_from_contours = self._cells_from_contours(contours, hierarchy)
+
+        # Method 2: Use line intersections to build grid
+        cells_from_grid = self._cells_from_line_intersections(
+            horizontal_mask, vertical_mask, height, width
+        )
+
+        # Use whichever method found more valid cells
+        if len(cells_from_grid) >= len(cells_from_contours):
+            return cells_from_grid
+        return cells_from_contours
+
+    def _cells_from_contours(
+        self,
+        contours,
+        hierarchy
+    ) -> List[List[float]]:
+        """Extract cell bounding boxes from contours."""
+        cells = []
+
+        for i, contour in enumerate(contours):
+            x, y, w, h = cv2.boundingRect(contour)
+
+            # Filter by minimum size
+            if w >= self.min_cell_width and h >= self.min_cell_height:
+                # Check if this is an inner contour (cell) not the outer table
+                if hierarchy is not None and hierarchy[0][i][3] != -1:
+                    cells.append([float(x), float(y), float(x + w), float(y + h)])
+
+        return cells
+
+    def _cells_from_line_intersections(
+        self,
+        horizontal_mask: np.ndarray,
+        vertical_mask: np.ndarray,
+        height: int,
+        width: int
+    ) -> List[List[float]]:
+        """Build cells from line intersections (grid-based approach)."""
+        # Find horizontal line y-coordinates
+        h_projection = np.sum(horizontal_mask, axis=1)
+        h_lines = self._find_line_positions(h_projection, min_gap=self.min_cell_height)
+
+        # Find vertical line x-coordinates
+        v_projection = np.sum(vertical_mask, axis=0)
+        v_lines = self._find_line_positions(v_projection, min_gap=self.min_cell_width)
+
+        if len(h_lines) < 2 or len(v_lines) < 2:
+            logger.debug(f"Insufficient lines: {len(h_lines)} horizontal, {len(v_lines)} vertical")
+            return []
+
+        # Build cells from grid
+        cells = []
+        for i in range(len(h_lines) - 1):
+            for j in range(len(v_lines) - 1):
+                y1, y2 = h_lines[i], h_lines[i + 1]
+                x1, x2 = v_lines[j], v_lines[j + 1]
+
+                # Validate cell size
+                if (x2 - x1) >= self.min_cell_width and (y2 - y1) >= self.min_cell_height:
+                    cells.append([float(x1), float(y1), float(x2), float(y2)])
+
+        return cells
+
+    def _find_line_positions(
+        self,
+        projection: np.ndarray,
+        min_gap: int
+    ) -> List[int]:
+        """
+        Find line positions from projection profile.
+
+        Args:
+            projection: 1D array of pixel sums
+            min_gap: Minimum gap between lines
+
+        Returns:
+            List of line positions
+        """
+        # Threshold to find peaks (lines)
+        threshold = np.max(projection) * 0.3
+        peaks = projection > threshold
+
+        # Find transitions (line positions)
+        positions = []
+        in_peak = False
+        peak_start = 0
+
+        for i, is_peak in enumerate(peaks):
+            if is_peak and not in_peak:
+                peak_start = i
+                in_peak = True
+            elif not is_peak and in_peak:
+                # End of peak - use center
+                peak_center = (peak_start + i) // 2
+                if not positions or (peak_center - positions[-1]) >= min_gap:
+                    positions.append(peak_center)
+                in_peak = False
+
+        return positions
+
+    def detect_and_merge_with_ml(
+        self,
+        image: np.ndarray,
+        table_bbox: List[float],
+        ml_cell_boxes: List[List[float]]
+    ) -> List[List[float]]:
+        """
+        Detect cells using CV and merge/validate with ML-detected boxes.
+
+        CV detection is used as the primary source for wired tables,
+        with ML boxes used to fill gaps or validate.
+
+        Args:
+            image: Input image
+            table_bbox: Table bounding box [x1, y1, x2, y2]
+            ml_cell_boxes: Cell boxes from ML model (RT-DETR-L)
+
+        Returns:
+            Merged/validated cell boxes
+        """
+        cv_cells = self.detect_cells(image, table_bbox)
+
+        if not cv_cells:
+            # CV detection failed, fall back to ML
+            logger.info("[CV] No cells detected by CV, using ML cells")
+            return ml_cell_boxes
+
+        if not ml_cell_boxes:
+            # Only CV cells available
+            return cv_cells
+
+        # Validate: CV should find structured grid
+        # If CV found significantly fewer cells, there might be merged cells
+        cv_count = len(cv_cells)
+        ml_count = len(ml_cell_boxes)
+
+        logger.info(f"[CV] CV detected {cv_count} cells, ML detected {ml_count} cells")
+
+        # For wired tables, prefer CV detection (cleaner grid)
+        if cv_count >= ml_count * 0.5:
+            # CV found reasonable number of cells
+            return cv_cells
+        else:
+            # CV might have missed cells (possibly due to merged cells)
+            # Try to use ML boxes that don't overlap with CV cells
+            merged = list(cv_cells)
+            for ml_box in ml_cell_boxes:
+                if not self._has_significant_overlap(ml_box, cv_cells):
+                    merged.append(ml_box)
+            return merged
+
+    def _has_significant_overlap(
+        self,
+        box: List[float],
+        boxes: List[List[float]],
+        threshold: float = 0.5
+    ) -> bool:
+        """Check if box significantly overlaps with any box in the list."""
+        for other in boxes:
+            iou = self._calculate_iou(box, other)
+            if iou > threshold:
+                return True
+        return False
+
+    def _calculate_iou(
+        self,
+        box1: List[float],
+        box2: List[float]
+    ) -> float:
+        """Calculate Intersection over Union of two boxes."""
+        x1 = max(box1[0], box2[0])
+        y1 = max(box1[1], box2[1])
+        x2 = min(box1[2], box2[2])
+        y2 = min(box1[3], box2[3])
+
+        if x2 <= x1 or y2 <= y1:
+            return 0.0
+
+        intersection = (x2 - x1) * (y2 - y1)
+        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+        union = area1 + area2 - intersection
+
+        return intersection / union if union > 0 else 0.0
+
+
+def load_image(image_path: str) -> Optional[np.ndarray]:
+    """Load image from path."""
+    path = Path(image_path)
+    if not path.exists():
+        logger.error(f"Image not found: {image_path}")
+        return None
+    return cv2.imread(str(path))