feat: add table detection options and scan artifact removal

- Add TableDetectionSelector component for wired/wireless/region detection
- Add CV-based table line detector module (disabled due to poor performance)
- Add scan artifact removal preprocessing step (removes faint horizontal lines)
- Add PreprocessingConfig schema with remove_scan_artifacts option
- Update frontend PreprocessingSettings with scan artifact toggle
- Integrate table detection config into ProcessingPage
- Archive extract-table-cell-boxes proposal

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-30 13:21:50 +08:00
parent f5a2c8a750
commit 95ae1f1bdb
17 changed files with 1906 additions and 344 deletions

View File

@@ -40,6 +40,7 @@ from app.schemas.task import (
PreprocessingPreviewRequest,
PreprocessingPreviewResponse,
ImageQualityMetrics,
TableDetectionConfig,
)
from app.services.task_service import task_service
from app.services.file_access_service import file_access_service
@@ -75,7 +76,8 @@ def process_task_ocr(
language: str = 'ch',
layout_model: Optional[str] = "chinese",
preprocessing_mode: Optional[str] = "auto",
preprocessing_config: Optional[dict] = None
preprocessing_config: Optional[dict] = None,
table_detection_config: Optional[dict] = None
):
"""
Background task to process OCR for a task with dual-track support.
@@ -94,6 +96,7 @@ def process_task_ocr(
layout_model: Layout detection model ('chinese', 'default', 'cdla')
preprocessing_mode: Preprocessing mode ('auto', 'manual', 'disabled')
preprocessing_config: Manual preprocessing config dict (contrast, sharpen, binarize)
table_detection_config: Table detection config dict (enable_wired_table, enable_wireless_table, enable_region_detection)
"""
from app.core.database import SessionLocal
from app.models.task import Task
@@ -106,6 +109,7 @@ def process_task_ocr(
logger.info(f"Starting OCR processing for task {task_id}, file: {filename}")
logger.info(f"Processing options: dual_track={use_dual_track}, force_track={force_track}, lang={language}")
logger.info(f"Preprocessing options: mode={preprocessing_mode}, config={preprocessing_config}")
logger.info(f"Table detection options: {table_detection_config}")
# Convert preprocessing parameters to proper types
preprocess_mode_enum = None
@@ -122,6 +126,15 @@ def process_task_ocr(
binarize=preprocessing_config.get("binarize", False)
)
# Convert table detection config to object
table_det_config_obj = None
if table_detection_config:
table_det_config_obj = TableDetectionConfig(
enable_wired_table=table_detection_config.get("enable_wired_table", True),
enable_wireless_table=table_detection_config.get("enable_wireless_table", True),
enable_region_detection=table_detection_config.get("enable_region_detection", True)
)
# Get task directly by database ID (bypass user isolation for background task)
task = db.query(Task).filter(Task.id == task_db_id).first()
if not task:
@@ -170,7 +183,8 @@ def process_task_ocr(
force_track=force_track,
layout_model=layout_model,
preprocessing_mode=preprocess_mode_enum,
preprocessing_config=preprocess_config_obj
preprocessing_config=preprocess_config_obj,
table_detection_config=table_det_config_obj
)
else:
# Fall back to traditional processing (no force_track support)
@@ -181,7 +195,8 @@ def process_task_ocr(
output_dir=result_dir,
layout_model=layout_model,
preprocessing_mode=preprocess_mode_enum,
preprocessing_config=preprocess_config_obj
preprocessing_config=preprocess_config_obj,
table_detection_config=table_det_config_obj
)
# Calculate processing time
@@ -754,6 +769,7 @@ async def start_task(
- **force_track**: Force specific processing track ('ocr' or 'direct')
- **language**: OCR language code (default: 'ch')
- **layout_model**: Layout detection model ('chinese', 'default', 'cdla')
- **table_detection**: Table detection config (enable_wired_table, enable_wireless_table, enable_region_detection)
"""
try:
# Parse processing options with defaults
@@ -781,6 +797,16 @@ async def start_task(
}
logger.info(f"Preprocessing: mode={preprocessing_mode}, config={preprocessing_config}")
# Extract table detection options
table_detection_config = None
if options.table_detection:
table_detection_config = {
"enable_wired_table": options.table_detection.enable_wired_table,
"enable_wireless_table": options.table_detection.enable_wireless_table,
"enable_region_detection": options.table_detection.enable_region_detection
}
logger.info(f"Table detection: {table_detection_config}")
# Get task details
task = task_service.get_task_by_id(
db=db,
@@ -829,11 +855,12 @@ async def start_task(
language=language,
layout_model=layout_model,
preprocessing_mode=preprocessing_mode,
preprocessing_config=preprocessing_config
preprocessing_config=preprocessing_config,
table_detection_config=table_detection_config
)
logger.info(f"Started OCR processing task {task_id} for user {current_user.email}")
logger.info(f"Options: dual_track={use_dual_track}, force_track={force_track}, lang={language}, layout_model={layout_model}, preprocessing={preprocessing_mode}")
logger.info(f"Options: dual_track={use_dual_track}, force_track={force_track}, lang={language}, layout_model={layout_model}, preprocessing={preprocessing_mode}, table_detection={table_detection_config}")
return task
except HTTPException: