""" CV-based Table Line Detection Module Uses OpenCV morphological operations to detect table lines and extract cell boundaries. This is more reliable for wired/bordered tables than ML-based cell detection. """ import cv2 import numpy as np from typing import List, Tuple, Optional from pathlib import Path import logging logger = logging.getLogger(__name__) class CVTableDetector: """ Detects table cell boundaries using computer vision techniques. Works by detecting horizontal and vertical lines in the image. """ def __init__( self, min_line_length: int = 30, line_thickness: int = 2, min_cell_width: int = 20, min_cell_height: int = 15 ): """ Initialize the CV table detector. Args: min_line_length: Minimum length of lines to detect (in pixels) line_thickness: Expected thickness of table lines min_cell_width: Minimum width of a valid cell min_cell_height: Minimum height of a valid cell """ self.min_line_length = min_line_length self.line_thickness = line_thickness self.min_cell_width = min_cell_width self.min_cell_height = min_cell_height def detect_cells( self, image: np.ndarray, table_bbox: Optional[List[float]] = None ) -> List[List[float]]: """ Detect cell boundaries in a table image. Args: image: Input image (BGR format) table_bbox: Optional [x1, y1, x2, y2] to crop table region first Returns: List of cell bounding boxes [[x1, y1, x2, y2], ...] """ # Crop to table region if bbox provided offset_x, offset_y = 0, 0 if table_bbox: x1, y1, x2, y2 = [int(v) for v in table_bbox] offset_x, offset_y = x1, y1 image = image[y1:y2, x1:x2] if image.size == 0: logger.warning("Empty image after cropping") return [] # Convert to grayscale if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image # Detect lines horizontal_lines, vertical_lines = self._detect_lines(gray) if horizontal_lines is None or vertical_lines is None: logger.warning("Failed to detect table lines") return [] # Find intersections to build grid cells = self._build_cell_grid(horizontal_lines, vertical_lines, gray.shape) # Convert to absolute coordinates absolute_cells = [] for cell in cells: abs_cell = [ cell[0] + offset_x, cell[1] + offset_y, cell[2] + offset_x, cell[3] + offset_y ] absolute_cells.append(abs_cell) logger.info(f"[CV] Detected {len(absolute_cells)} cells from table lines") return absolute_cells def _detect_lines( self, gray: np.ndarray ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]: """ Detect horizontal and vertical lines using morphological operations. Args: gray: Grayscale image Returns: Tuple of (horizontal_lines_mask, vertical_lines_mask) """ # Adaptive threshold for better line detection binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # Detect horizontal lines h_kernel_length = max(self.min_line_length, gray.shape[1] // 30) horizontal_kernel = cv2.getStructuringElement( cv2.MORPH_RECT, (h_kernel_length, 1) ) horizontal_lines = cv2.morphologyEx( binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2 ) # Detect vertical lines v_kernel_length = max(self.min_line_length, gray.shape[0] // 30) vertical_kernel = cv2.getStructuringElement( cv2.MORPH_RECT, (1, v_kernel_length) ) vertical_lines = cv2.morphologyEx( binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2 ) return horizontal_lines, vertical_lines def _build_cell_grid( self, horizontal_mask: np.ndarray, vertical_mask: np.ndarray, image_shape: Tuple[int, int] ) -> List[List[float]]: """ Build cell grid from detected line masks. Args: horizontal_mask: Binary mask of horizontal lines vertical_mask: Binary mask of vertical lines image_shape: (height, width) of the image Returns: List of cell bounding boxes """ height, width = image_shape[:2] # Combine masks to find table structure table_mask = cv2.add(horizontal_mask, vertical_mask) # Find contours (cells are enclosed regions) contours, hierarchy = cv2.findContours( table_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE ) # Method 1: Use contours to find cells cells_from_contours = self._cells_from_contours(contours, hierarchy) # Method 2: Use line intersections to build grid cells_from_grid = self._cells_from_line_intersections( horizontal_mask, vertical_mask, height, width ) # Use whichever method found more valid cells if len(cells_from_grid) >= len(cells_from_contours): return cells_from_grid return cells_from_contours def _cells_from_contours( self, contours, hierarchy ) -> List[List[float]]: """Extract cell bounding boxes from contours.""" cells = [] for i, contour in enumerate(contours): x, y, w, h = cv2.boundingRect(contour) # Filter by minimum size if w >= self.min_cell_width and h >= self.min_cell_height: # Check if this is an inner contour (cell) not the outer table if hierarchy is not None and hierarchy[0][i][3] != -1: cells.append([float(x), float(y), float(x + w), float(y + h)]) return cells def _cells_from_line_intersections( self, horizontal_mask: np.ndarray, vertical_mask: np.ndarray, height: int, width: int ) -> List[List[float]]: """Build cells from line intersections (grid-based approach).""" # Find horizontal line y-coordinates h_projection = np.sum(horizontal_mask, axis=1) h_lines = self._find_line_positions(h_projection, min_gap=self.min_cell_height) # Find vertical line x-coordinates v_projection = np.sum(vertical_mask, axis=0) v_lines = self._find_line_positions(v_projection, min_gap=self.min_cell_width) if len(h_lines) < 2 or len(v_lines) < 2: logger.debug(f"Insufficient lines: {len(h_lines)} horizontal, {len(v_lines)} vertical") return [] # Build cells from grid cells = [] for i in range(len(h_lines) - 1): for j in range(len(v_lines) - 1): y1, y2 = h_lines[i], h_lines[i + 1] x1, x2 = v_lines[j], v_lines[j + 1] # Validate cell size if (x2 - x1) >= self.min_cell_width and (y2 - y1) >= self.min_cell_height: cells.append([float(x1), float(y1), float(x2), float(y2)]) return cells def _find_line_positions( self, projection: np.ndarray, min_gap: int ) -> List[int]: """ Find line positions from projection profile. Args: projection: 1D array of pixel sums min_gap: Minimum gap between lines Returns: List of line positions """ # Threshold to find peaks (lines) threshold = np.max(projection) * 0.3 peaks = projection > threshold # Find transitions (line positions) positions = [] in_peak = False peak_start = 0 for i, is_peak in enumerate(peaks): if is_peak and not in_peak: peak_start = i in_peak = True elif not is_peak and in_peak: # End of peak - use center peak_center = (peak_start + i) // 2 if not positions or (peak_center - positions[-1]) >= min_gap: positions.append(peak_center) in_peak = False return positions def detect_and_merge_with_ml( self, image: np.ndarray, table_bbox: List[float], ml_cell_boxes: List[List[float]] ) -> List[List[float]]: """ Detect cells using CV and merge/validate with ML-detected boxes. CV detection is used as the primary source for wired tables, with ML boxes used to fill gaps or validate. Args: image: Input image table_bbox: Table bounding box [x1, y1, x2, y2] ml_cell_boxes: Cell boxes from ML model (RT-DETR-L) Returns: Merged/validated cell boxes """ cv_cells = self.detect_cells(image, table_bbox) if not cv_cells: # CV detection failed, fall back to ML logger.info("[CV] No cells detected by CV, using ML cells") return ml_cell_boxes if not ml_cell_boxes: # Only CV cells available return cv_cells # Validate: CV should find structured grid # If CV found significantly fewer cells, there might be merged cells cv_count = len(cv_cells) ml_count = len(ml_cell_boxes) logger.info(f"[CV] CV detected {cv_count} cells, ML detected {ml_count} cells") # For wired tables, prefer CV detection (cleaner grid) if cv_count >= ml_count * 0.5: # CV found reasonable number of cells return cv_cells else: # CV might have missed cells (possibly due to merged cells) # Try to use ML boxes that don't overlap with CV cells merged = list(cv_cells) for ml_box in ml_cell_boxes: if not self._has_significant_overlap(ml_box, cv_cells): merged.append(ml_box) return merged def _has_significant_overlap( self, box: List[float], boxes: List[List[float]], threshold: float = 0.5 ) -> bool: """Check if box significantly overlaps with any box in the list.""" for other in boxes: iou = self._calculate_iou(box, other) if iou > threshold: return True return False def _calculate_iou( self, box1: List[float], box2: List[float] ) -> float: """Calculate Intersection over Union of two boxes.""" x1 = max(box1[0], box2[0]) y1 = max(box1[1], box2[1]) x2 = min(box1[2], box2[2]) y2 = min(box1[3], box2[3]) if x2 <= x1 or y2 <= y1: return 0.0 intersection = (x2 - x1) * (y2 - y1) area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]) area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]) union = area1 + area2 - intersection return intersection / union if union > 0 else 0.0 def load_image(image_path: str) -> Optional[np.ndarray]: """Load image from path.""" path = Path(image_path) if not path.exists(): logger.error(f"Image not found: {image_path}") return None return cv2.imread(str(path))