feat: add table detection options and scan artifact removal
- Add TableDetectionSelector component for wired/wireless/region detection - Add CV-based table line detector module (disabled due to poor performance) - Add scan artifact removal preprocessing step (removes faint horizontal lines) - Add PreprocessingConfig schema with remove_scan_artifacts option - Update frontend PreprocessingSettings with scan artifact toggle - Integrate table detection config into ProcessingPage - Archive extract-table-cell-boxes proposal 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -212,7 +212,8 @@ class GapFillingService:
|
||||
def _is_region_covered(
|
||||
self,
|
||||
region: TextRegion,
|
||||
pp_structure_elements: List[DocumentElement]
|
||||
pp_structure_elements: List[DocumentElement],
|
||||
skip_table_coverage: bool = True
|
||||
) -> bool:
|
||||
"""
|
||||
Check if a raw OCR region is covered by any PP-StructureV3 element.
|
||||
@@ -220,6 +221,9 @@ class GapFillingService:
|
||||
Args:
|
||||
region: Raw OCR text region
|
||||
pp_structure_elements: List of PP-StructureV3 elements
|
||||
skip_table_coverage: If True, don't consider TABLE elements as covering
|
||||
(allows raw OCR text inside tables to pass through
|
||||
for layered rendering)
|
||||
|
||||
Returns:
|
||||
True if the region is covered
|
||||
@@ -228,6 +232,12 @@ class GapFillingService:
|
||||
region_bbox = region.normalized_bbox
|
||||
|
||||
for element in pp_structure_elements:
|
||||
# Skip TABLE elements when checking coverage
|
||||
# This allows raw OCR text inside tables to be preserved
|
||||
# PDF generator will render: table borders + raw text positions
|
||||
if skip_table_coverage and element.type == ElementType.TABLE:
|
||||
continue
|
||||
|
||||
elem_bbox = (
|
||||
element.bbox.x0, element.bbox.y0,
|
||||
element.bbox.x1, element.bbox.y1
|
||||
|
||||
Reference in New Issue
Block a user