feat: add table detection options and scan artifact removal
- Add TableDetectionSelector component for wired/wireless/region detection - Add CV-based table line detector module (disabled due to poor performance) - Add scan artifact removal preprocessing step (removes faint horizontal lines) - Add PreprocessingConfig schema with remove_scan_artifacts option - Update frontend PreprocessingSettings with scan artifact toggle - Integrate table detection config into ProcessingPage - Archive extract-table-cell-boxes proposal 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -96,6 +96,35 @@ class PreprocessingConfig(BaseModel):
|
||||
default=False,
|
||||
description="Enable binarization (aggressive, for very low contrast). Not recommended for most documents."
|
||||
)
|
||||
remove_scan_artifacts: bool = Field(
|
||||
default=True,
|
||||
description="Remove horizontal scan line artifacts. Recommended for scanned documents to prevent misdetection of scanner light bar lines as table borders."
|
||||
)
|
||||
|
||||
|
||||
class TableDetectionConfig(BaseModel):
|
||||
"""Table detection configuration for PP-StructureV3.
|
||||
|
||||
Controls which table detection modes to enable. PP-StructureV3 uses specialized
|
||||
models for different table types:
|
||||
- Wired (bordered): Tables with visible cell borders/grid lines
|
||||
- Wireless (borderless): Tables without visible borders, relying on alignment
|
||||
- Region detection: Detect table-like regions for better cell structure
|
||||
|
||||
Multiple options can be enabled simultaneously for comprehensive detection.
|
||||
"""
|
||||
enable_wired_table: bool = Field(
|
||||
default=True,
|
||||
description="Enable wired (bordered) table detection. Best for tables with visible grid lines."
|
||||
)
|
||||
enable_wireless_table: bool = Field(
|
||||
default=True,
|
||||
description="Enable wireless (borderless) table detection. Best for tables without visible borders."
|
||||
)
|
||||
enable_region_detection: bool = Field(
|
||||
default=True,
|
||||
description="Enable region detection for better table structure inference."
|
||||
)
|
||||
|
||||
|
||||
class ImageQualityMetrics(BaseModel):
|
||||
@@ -294,6 +323,12 @@ class ProcessingOptions(BaseModel):
|
||||
description="Manual preprocessing config (only used when preprocessing_mode='manual')"
|
||||
)
|
||||
|
||||
# Table detection configuration (OCR track only)
|
||||
table_detection: Optional[TableDetectionConfig] = Field(
|
||||
None,
|
||||
description="Table detection config. If None, all table detection modes are enabled."
|
||||
)
|
||||
|
||||
|
||||
class AnalyzeRequest(BaseModel):
|
||||
"""Document analysis request"""
|
||||
|
||||
Reference in New Issue
Block a user