feat: add table detection options and scan artifact removal

- Add TableDetectionSelector component for wired/wireless/region detection
- Add CV-based table line detector module (disabled due to poor performance)
- Add scan artifact removal preprocessing step (removes faint horizontal lines)
- Add PreprocessingConfig schema with remove_scan_artifacts option
- Update frontend PreprocessingSettings with scan artifact toggle
- Integrate table detection config into ProcessingPage
- Archive extract-table-cell-boxes proposal

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-30 13:21:50 +08:00
parent f5a2c8a750
commit 95ae1f1bdb
17 changed files with 1906 additions and 344 deletions

View File

@@ -0,0 +1,362 @@
"""
CV-based Table Line Detection Module
Uses OpenCV morphological operations to detect table lines and extract cell boundaries.
This is more reliable for wired/bordered tables than ML-based cell detection.
"""
import cv2
import numpy as np
from typing import List, Tuple, Optional
from pathlib import Path
import logging
logger = logging.getLogger(__name__)
class CVTableDetector:
"""
Detects table cell boundaries using computer vision techniques.
Works by detecting horizontal and vertical lines in the image.
"""
def __init__(
self,
min_line_length: int = 30,
line_thickness: int = 2,
min_cell_width: int = 20,
min_cell_height: int = 15
):
"""
Initialize the CV table detector.
Args:
min_line_length: Minimum length of lines to detect (in pixels)
line_thickness: Expected thickness of table lines
min_cell_width: Minimum width of a valid cell
min_cell_height: Minimum height of a valid cell
"""
self.min_line_length = min_line_length
self.line_thickness = line_thickness
self.min_cell_width = min_cell_width
self.min_cell_height = min_cell_height
def detect_cells(
self,
image: np.ndarray,
table_bbox: Optional[List[float]] = None
) -> List[List[float]]:
"""
Detect cell boundaries in a table image.
Args:
image: Input image (BGR format)
table_bbox: Optional [x1, y1, x2, y2] to crop table region first
Returns:
List of cell bounding boxes [[x1, y1, x2, y2], ...]
"""
# Crop to table region if bbox provided
offset_x, offset_y = 0, 0
if table_bbox:
x1, y1, x2, y2 = [int(v) for v in table_bbox]
offset_x, offset_y = x1, y1
image = image[y1:y2, x1:x2]
if image.size == 0:
logger.warning("Empty image after cropping")
return []
# Convert to grayscale
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image
# Detect lines
horizontal_lines, vertical_lines = self._detect_lines(gray)
if horizontal_lines is None or vertical_lines is None:
logger.warning("Failed to detect table lines")
return []
# Find intersections to build grid
cells = self._build_cell_grid(horizontal_lines, vertical_lines, gray.shape)
# Convert to absolute coordinates
absolute_cells = []
for cell in cells:
abs_cell = [
cell[0] + offset_x,
cell[1] + offset_y,
cell[2] + offset_x,
cell[3] + offset_y
]
absolute_cells.append(abs_cell)
logger.info(f"[CV] Detected {len(absolute_cells)} cells from table lines")
return absolute_cells
def _detect_lines(
self,
gray: np.ndarray
) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
"""
Detect horizontal and vertical lines using morphological operations.
Args:
gray: Grayscale image
Returns:
Tuple of (horizontal_lines_mask, vertical_lines_mask)
"""
# Adaptive threshold for better line detection
binary = cv2.adaptiveThreshold(
gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
11, 2
)
# Detect horizontal lines
h_kernel_length = max(self.min_line_length, gray.shape[1] // 30)
horizontal_kernel = cv2.getStructuringElement(
cv2.MORPH_RECT, (h_kernel_length, 1)
)
horizontal_lines = cv2.morphologyEx(
binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2
)
# Detect vertical lines
v_kernel_length = max(self.min_line_length, gray.shape[0] // 30)
vertical_kernel = cv2.getStructuringElement(
cv2.MORPH_RECT, (1, v_kernel_length)
)
vertical_lines = cv2.morphologyEx(
binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2
)
return horizontal_lines, vertical_lines
def _build_cell_grid(
self,
horizontal_mask: np.ndarray,
vertical_mask: np.ndarray,
image_shape: Tuple[int, int]
) -> List[List[float]]:
"""
Build cell grid from detected line masks.
Args:
horizontal_mask: Binary mask of horizontal lines
vertical_mask: Binary mask of vertical lines
image_shape: (height, width) of the image
Returns:
List of cell bounding boxes
"""
height, width = image_shape[:2]
# Combine masks to find table structure
table_mask = cv2.add(horizontal_mask, vertical_mask)
# Find contours (cells are enclosed regions)
contours, hierarchy = cv2.findContours(
table_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
)
# Method 1: Use contours to find cells
cells_from_contours = self._cells_from_contours(contours, hierarchy)
# Method 2: Use line intersections to build grid
cells_from_grid = self._cells_from_line_intersections(
horizontal_mask, vertical_mask, height, width
)
# Use whichever method found more valid cells
if len(cells_from_grid) >= len(cells_from_contours):
return cells_from_grid
return cells_from_contours
def _cells_from_contours(
self,
contours,
hierarchy
) -> List[List[float]]:
"""Extract cell bounding boxes from contours."""
cells = []
for i, contour in enumerate(contours):
x, y, w, h = cv2.boundingRect(contour)
# Filter by minimum size
if w >= self.min_cell_width and h >= self.min_cell_height:
# Check if this is an inner contour (cell) not the outer table
if hierarchy is not None and hierarchy[0][i][3] != -1:
cells.append([float(x), float(y), float(x + w), float(y + h)])
return cells
def _cells_from_line_intersections(
self,
horizontal_mask: np.ndarray,
vertical_mask: np.ndarray,
height: int,
width: int
) -> List[List[float]]:
"""Build cells from line intersections (grid-based approach)."""
# Find horizontal line y-coordinates
h_projection = np.sum(horizontal_mask, axis=1)
h_lines = self._find_line_positions(h_projection, min_gap=self.min_cell_height)
# Find vertical line x-coordinates
v_projection = np.sum(vertical_mask, axis=0)
v_lines = self._find_line_positions(v_projection, min_gap=self.min_cell_width)
if len(h_lines) < 2 or len(v_lines) < 2:
logger.debug(f"Insufficient lines: {len(h_lines)} horizontal, {len(v_lines)} vertical")
return []
# Build cells from grid
cells = []
for i in range(len(h_lines) - 1):
for j in range(len(v_lines) - 1):
y1, y2 = h_lines[i], h_lines[i + 1]
x1, x2 = v_lines[j], v_lines[j + 1]
# Validate cell size
if (x2 - x1) >= self.min_cell_width and (y2 - y1) >= self.min_cell_height:
cells.append([float(x1), float(y1), float(x2), float(y2)])
return cells
def _find_line_positions(
self,
projection: np.ndarray,
min_gap: int
) -> List[int]:
"""
Find line positions from projection profile.
Args:
projection: 1D array of pixel sums
min_gap: Minimum gap between lines
Returns:
List of line positions
"""
# Threshold to find peaks (lines)
threshold = np.max(projection) * 0.3
peaks = projection > threshold
# Find transitions (line positions)
positions = []
in_peak = False
peak_start = 0
for i, is_peak in enumerate(peaks):
if is_peak and not in_peak:
peak_start = i
in_peak = True
elif not is_peak and in_peak:
# End of peak - use center
peak_center = (peak_start + i) // 2
if not positions or (peak_center - positions[-1]) >= min_gap:
positions.append(peak_center)
in_peak = False
return positions
def detect_and_merge_with_ml(
self,
image: np.ndarray,
table_bbox: List[float],
ml_cell_boxes: List[List[float]]
) -> List[List[float]]:
"""
Detect cells using CV and merge/validate with ML-detected boxes.
CV detection is used as the primary source for wired tables,
with ML boxes used to fill gaps or validate.
Args:
image: Input image
table_bbox: Table bounding box [x1, y1, x2, y2]
ml_cell_boxes: Cell boxes from ML model (RT-DETR-L)
Returns:
Merged/validated cell boxes
"""
cv_cells = self.detect_cells(image, table_bbox)
if not cv_cells:
# CV detection failed, fall back to ML
logger.info("[CV] No cells detected by CV, using ML cells")
return ml_cell_boxes
if not ml_cell_boxes:
# Only CV cells available
return cv_cells
# Validate: CV should find structured grid
# If CV found significantly fewer cells, there might be merged cells
cv_count = len(cv_cells)
ml_count = len(ml_cell_boxes)
logger.info(f"[CV] CV detected {cv_count} cells, ML detected {ml_count} cells")
# For wired tables, prefer CV detection (cleaner grid)
if cv_count >= ml_count * 0.5:
# CV found reasonable number of cells
return cv_cells
else:
# CV might have missed cells (possibly due to merged cells)
# Try to use ML boxes that don't overlap with CV cells
merged = list(cv_cells)
for ml_box in ml_cell_boxes:
if not self._has_significant_overlap(ml_box, cv_cells):
merged.append(ml_box)
return merged
def _has_significant_overlap(
self,
box: List[float],
boxes: List[List[float]],
threshold: float = 0.5
) -> bool:
"""Check if box significantly overlaps with any box in the list."""
for other in boxes:
iou = self._calculate_iou(box, other)
if iou > threshold:
return True
return False
def _calculate_iou(
self,
box1: List[float],
box2: List[float]
) -> float:
"""Calculate Intersection over Union of two boxes."""
x1 = max(box1[0], box2[0])
y1 = max(box1[1], box2[1])
x2 = min(box1[2], box2[2])
y2 = min(box1[3], box2[3])
if x2 <= x1 or y2 <= y1:
return 0.0
intersection = (x2 - x1) * (y2 - y1)
area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
union = area1 + area2 - intersection
return intersection / union if union > 0 else 0.0
def load_image(image_path: str) -> Optional[np.ndarray]:
"""Load image from path."""
path = Path(image_path)
if not path.exists():
logger.error(f"Image not found: {image_path}")
return None
return cv2.imread(str(path))