- Add TableDetectionSelector component for wired/wireless/region detection - Add CV-based table line detector module (disabled due to poor performance) - Add scan artifact removal preprocessing step (removes faint horizontal lines) - Add PreprocessingConfig schema with remove_scan_artifacts option - Update frontend PreprocessingSettings with scan artifact toggle - Integrate table detection config into ProcessingPage - Archive extract-table-cell-boxes proposal 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
363 lines
11 KiB
Python
363 lines
11 KiB
Python
"""
|
|
CV-based Table Line Detection Module
|
|
|
|
Uses OpenCV morphological operations to detect table lines and extract cell boundaries.
|
|
This is more reliable for wired/bordered tables than ML-based cell detection.
|
|
"""
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from typing import List, Tuple, Optional
|
|
from pathlib import Path
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class CVTableDetector:
|
|
"""
|
|
Detects table cell boundaries using computer vision techniques.
|
|
Works by detecting horizontal and vertical lines in the image.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
min_line_length: int = 30,
|
|
line_thickness: int = 2,
|
|
min_cell_width: int = 20,
|
|
min_cell_height: int = 15
|
|
):
|
|
"""
|
|
Initialize the CV table detector.
|
|
|
|
Args:
|
|
min_line_length: Minimum length of lines to detect (in pixels)
|
|
line_thickness: Expected thickness of table lines
|
|
min_cell_width: Minimum width of a valid cell
|
|
min_cell_height: Minimum height of a valid cell
|
|
"""
|
|
self.min_line_length = min_line_length
|
|
self.line_thickness = line_thickness
|
|
self.min_cell_width = min_cell_width
|
|
self.min_cell_height = min_cell_height
|
|
|
|
def detect_cells(
|
|
self,
|
|
image: np.ndarray,
|
|
table_bbox: Optional[List[float]] = None
|
|
) -> List[List[float]]:
|
|
"""
|
|
Detect cell boundaries in a table image.
|
|
|
|
Args:
|
|
image: Input image (BGR format)
|
|
table_bbox: Optional [x1, y1, x2, y2] to crop table region first
|
|
|
|
Returns:
|
|
List of cell bounding boxes [[x1, y1, x2, y2], ...]
|
|
"""
|
|
# Crop to table region if bbox provided
|
|
offset_x, offset_y = 0, 0
|
|
if table_bbox:
|
|
x1, y1, x2, y2 = [int(v) for v in table_bbox]
|
|
offset_x, offset_y = x1, y1
|
|
image = image[y1:y2, x1:x2]
|
|
|
|
if image.size == 0:
|
|
logger.warning("Empty image after cropping")
|
|
return []
|
|
|
|
# Convert to grayscale
|
|
if len(image.shape) == 3:
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
else:
|
|
gray = image
|
|
|
|
# Detect lines
|
|
horizontal_lines, vertical_lines = self._detect_lines(gray)
|
|
|
|
if horizontal_lines is None or vertical_lines is None:
|
|
logger.warning("Failed to detect table lines")
|
|
return []
|
|
|
|
# Find intersections to build grid
|
|
cells = self._build_cell_grid(horizontal_lines, vertical_lines, gray.shape)
|
|
|
|
# Convert to absolute coordinates
|
|
absolute_cells = []
|
|
for cell in cells:
|
|
abs_cell = [
|
|
cell[0] + offset_x,
|
|
cell[1] + offset_y,
|
|
cell[2] + offset_x,
|
|
cell[3] + offset_y
|
|
]
|
|
absolute_cells.append(abs_cell)
|
|
|
|
logger.info(f"[CV] Detected {len(absolute_cells)} cells from table lines")
|
|
return absolute_cells
|
|
|
|
def _detect_lines(
|
|
self,
|
|
gray: np.ndarray
|
|
) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
|
|
"""
|
|
Detect horizontal and vertical lines using morphological operations.
|
|
|
|
Args:
|
|
gray: Grayscale image
|
|
|
|
Returns:
|
|
Tuple of (horizontal_lines_mask, vertical_lines_mask)
|
|
"""
|
|
# Adaptive threshold for better line detection
|
|
binary = cv2.adaptiveThreshold(
|
|
gray, 255,
|
|
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
|
cv2.THRESH_BINARY_INV,
|
|
11, 2
|
|
)
|
|
|
|
# Detect horizontal lines
|
|
h_kernel_length = max(self.min_line_length, gray.shape[1] // 30)
|
|
horizontal_kernel = cv2.getStructuringElement(
|
|
cv2.MORPH_RECT, (h_kernel_length, 1)
|
|
)
|
|
horizontal_lines = cv2.morphologyEx(
|
|
binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2
|
|
)
|
|
|
|
# Detect vertical lines
|
|
v_kernel_length = max(self.min_line_length, gray.shape[0] // 30)
|
|
vertical_kernel = cv2.getStructuringElement(
|
|
cv2.MORPH_RECT, (1, v_kernel_length)
|
|
)
|
|
vertical_lines = cv2.morphologyEx(
|
|
binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2
|
|
)
|
|
|
|
return horizontal_lines, vertical_lines
|
|
|
|
def _build_cell_grid(
|
|
self,
|
|
horizontal_mask: np.ndarray,
|
|
vertical_mask: np.ndarray,
|
|
image_shape: Tuple[int, int]
|
|
) -> List[List[float]]:
|
|
"""
|
|
Build cell grid from detected line masks.
|
|
|
|
Args:
|
|
horizontal_mask: Binary mask of horizontal lines
|
|
vertical_mask: Binary mask of vertical lines
|
|
image_shape: (height, width) of the image
|
|
|
|
Returns:
|
|
List of cell bounding boxes
|
|
"""
|
|
height, width = image_shape[:2]
|
|
|
|
# Combine masks to find table structure
|
|
table_mask = cv2.add(horizontal_mask, vertical_mask)
|
|
|
|
# Find contours (cells are enclosed regions)
|
|
contours, hierarchy = cv2.findContours(
|
|
table_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
|
|
)
|
|
|
|
# Method 1: Use contours to find cells
|
|
cells_from_contours = self._cells_from_contours(contours, hierarchy)
|
|
|
|
# Method 2: Use line intersections to build grid
|
|
cells_from_grid = self._cells_from_line_intersections(
|
|
horizontal_mask, vertical_mask, height, width
|
|
)
|
|
|
|
# Use whichever method found more valid cells
|
|
if len(cells_from_grid) >= len(cells_from_contours):
|
|
return cells_from_grid
|
|
return cells_from_contours
|
|
|
|
def _cells_from_contours(
|
|
self,
|
|
contours,
|
|
hierarchy
|
|
) -> List[List[float]]:
|
|
"""Extract cell bounding boxes from contours."""
|
|
cells = []
|
|
|
|
for i, contour in enumerate(contours):
|
|
x, y, w, h = cv2.boundingRect(contour)
|
|
|
|
# Filter by minimum size
|
|
if w >= self.min_cell_width and h >= self.min_cell_height:
|
|
# Check if this is an inner contour (cell) not the outer table
|
|
if hierarchy is not None and hierarchy[0][i][3] != -1:
|
|
cells.append([float(x), float(y), float(x + w), float(y + h)])
|
|
|
|
return cells
|
|
|
|
def _cells_from_line_intersections(
|
|
self,
|
|
horizontal_mask: np.ndarray,
|
|
vertical_mask: np.ndarray,
|
|
height: int,
|
|
width: int
|
|
) -> List[List[float]]:
|
|
"""Build cells from line intersections (grid-based approach)."""
|
|
# Find horizontal line y-coordinates
|
|
h_projection = np.sum(horizontal_mask, axis=1)
|
|
h_lines = self._find_line_positions(h_projection, min_gap=self.min_cell_height)
|
|
|
|
# Find vertical line x-coordinates
|
|
v_projection = np.sum(vertical_mask, axis=0)
|
|
v_lines = self._find_line_positions(v_projection, min_gap=self.min_cell_width)
|
|
|
|
if len(h_lines) < 2 or len(v_lines) < 2:
|
|
logger.debug(f"Insufficient lines: {len(h_lines)} horizontal, {len(v_lines)} vertical")
|
|
return []
|
|
|
|
# Build cells from grid
|
|
cells = []
|
|
for i in range(len(h_lines) - 1):
|
|
for j in range(len(v_lines) - 1):
|
|
y1, y2 = h_lines[i], h_lines[i + 1]
|
|
x1, x2 = v_lines[j], v_lines[j + 1]
|
|
|
|
# Validate cell size
|
|
if (x2 - x1) >= self.min_cell_width and (y2 - y1) >= self.min_cell_height:
|
|
cells.append([float(x1), float(y1), float(x2), float(y2)])
|
|
|
|
return cells
|
|
|
|
def _find_line_positions(
|
|
self,
|
|
projection: np.ndarray,
|
|
min_gap: int
|
|
) -> List[int]:
|
|
"""
|
|
Find line positions from projection profile.
|
|
|
|
Args:
|
|
projection: 1D array of pixel sums
|
|
min_gap: Minimum gap between lines
|
|
|
|
Returns:
|
|
List of line positions
|
|
"""
|
|
# Threshold to find peaks (lines)
|
|
threshold = np.max(projection) * 0.3
|
|
peaks = projection > threshold
|
|
|
|
# Find transitions (line positions)
|
|
positions = []
|
|
in_peak = False
|
|
peak_start = 0
|
|
|
|
for i, is_peak in enumerate(peaks):
|
|
if is_peak and not in_peak:
|
|
peak_start = i
|
|
in_peak = True
|
|
elif not is_peak and in_peak:
|
|
# End of peak - use center
|
|
peak_center = (peak_start + i) // 2
|
|
if not positions or (peak_center - positions[-1]) >= min_gap:
|
|
positions.append(peak_center)
|
|
in_peak = False
|
|
|
|
return positions
|
|
|
|
def detect_and_merge_with_ml(
|
|
self,
|
|
image: np.ndarray,
|
|
table_bbox: List[float],
|
|
ml_cell_boxes: List[List[float]]
|
|
) -> List[List[float]]:
|
|
"""
|
|
Detect cells using CV and merge/validate with ML-detected boxes.
|
|
|
|
CV detection is used as the primary source for wired tables,
|
|
with ML boxes used to fill gaps or validate.
|
|
|
|
Args:
|
|
image: Input image
|
|
table_bbox: Table bounding box [x1, y1, x2, y2]
|
|
ml_cell_boxes: Cell boxes from ML model (RT-DETR-L)
|
|
|
|
Returns:
|
|
Merged/validated cell boxes
|
|
"""
|
|
cv_cells = self.detect_cells(image, table_bbox)
|
|
|
|
if not cv_cells:
|
|
# CV detection failed, fall back to ML
|
|
logger.info("[CV] No cells detected by CV, using ML cells")
|
|
return ml_cell_boxes
|
|
|
|
if not ml_cell_boxes:
|
|
# Only CV cells available
|
|
return cv_cells
|
|
|
|
# Validate: CV should find structured grid
|
|
# If CV found significantly fewer cells, there might be merged cells
|
|
cv_count = len(cv_cells)
|
|
ml_count = len(ml_cell_boxes)
|
|
|
|
logger.info(f"[CV] CV detected {cv_count} cells, ML detected {ml_count} cells")
|
|
|
|
# For wired tables, prefer CV detection (cleaner grid)
|
|
if cv_count >= ml_count * 0.5:
|
|
# CV found reasonable number of cells
|
|
return cv_cells
|
|
else:
|
|
# CV might have missed cells (possibly due to merged cells)
|
|
# Try to use ML boxes that don't overlap with CV cells
|
|
merged = list(cv_cells)
|
|
for ml_box in ml_cell_boxes:
|
|
if not self._has_significant_overlap(ml_box, cv_cells):
|
|
merged.append(ml_box)
|
|
return merged
|
|
|
|
def _has_significant_overlap(
|
|
self,
|
|
box: List[float],
|
|
boxes: List[List[float]],
|
|
threshold: float = 0.5
|
|
) -> bool:
|
|
"""Check if box significantly overlaps with any box in the list."""
|
|
for other in boxes:
|
|
iou = self._calculate_iou(box, other)
|
|
if iou > threshold:
|
|
return True
|
|
return False
|
|
|
|
def _calculate_iou(
|
|
self,
|
|
box1: List[float],
|
|
box2: List[float]
|
|
) -> float:
|
|
"""Calculate Intersection over Union of two boxes."""
|
|
x1 = max(box1[0], box2[0])
|
|
y1 = max(box1[1], box2[1])
|
|
x2 = min(box1[2], box2[2])
|
|
y2 = min(box1[3], box2[3])
|
|
|
|
if x2 <= x1 or y2 <= y1:
|
|
return 0.0
|
|
|
|
intersection = (x2 - x1) * (y2 - y1)
|
|
area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
|
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
|
|
union = area1 + area2 - intersection
|
|
|
|
return intersection / union if union > 0 else 0.0
|
|
|
|
|
|
def load_image(image_path: str) -> Optional[np.ndarray]:
|
|
"""Load image from path."""
|
|
path = Path(image_path)
|
|
if not path.exists():
|
|
logger.error(f"Image not found: {image_path}")
|
|
return None
|
|
return cv2.imread(str(path))
|