diff --git a/backend/app/routers/tasks.py b/backend/app/routers/tasks.py index 3504f16..539df1e 100644 --- a/backend/app/routers/tasks.py +++ b/backend/app/routers/tasks.py @@ -73,7 +73,9 @@ def process_task_ocr( use_dual_track: bool = True, force_track: Optional[str] = None, language: str = 'ch', - layout_model: Optional[str] = "chinese" + layout_model: Optional[str] = "chinese", + preprocessing_mode: Optional[str] = "auto", + preprocessing_config: Optional[dict] = None ): """ Background task to process OCR for a task with dual-track support. @@ -90,6 +92,8 @@ def process_task_ocr( force_track: Force specific track ('ocr' or 'direct') language: OCR language code layout_model: Layout detection model ('chinese', 'default', 'cdla') + preprocessing_mode: Preprocessing mode ('auto', 'manual', 'disabled') + preprocessing_config: Manual preprocessing config dict (contrast, sharpen, binarize) """ from app.core.database import SessionLocal from app.models.task import Task @@ -101,6 +105,20 @@ def process_task_ocr( try: logger.info(f"Starting OCR processing for task {task_id}, file: {filename}") logger.info(f"Processing options: dual_track={use_dual_track}, force_track={force_track}, lang={language}") + logger.info(f"Preprocessing options: mode={preprocessing_mode}, config={preprocessing_config}") + + # Convert preprocessing parameters to proper types + preprocess_mode_enum = None + preprocess_config_obj = None + if preprocessing_mode: + preprocess_mode_enum = PreprocessingModeEnum(preprocessing_mode) + if preprocessing_config: + from app.schemas.task import PreprocessingContrastEnum + preprocess_config_obj = PreprocessingConfig( + contrast=PreprocessingContrastEnum(preprocessing_config.get("contrast", "clahe")), + sharpen=preprocessing_config.get("sharpen", True), + binarize=preprocessing_config.get("binarize", False) + ) # Get task directly by database ID (bypass user isolation for background task) task = db.query(Task).filter(Task.id == task_db_id).first() @@ -148,7 +166,9 @@ def process_task_ocr( output_dir=result_dir, use_dual_track=use_dual_track, force_track=force_track, - layout_model=layout_model + layout_model=layout_model, + preprocessing_mode=preprocess_mode_enum, + preprocessing_config=preprocess_config_obj ) else: # Fall back to traditional processing (no force_track support) @@ -157,7 +177,9 @@ def process_task_ocr( lang=language, detect_layout=True, output_dir=result_dir, - layout_model=layout_model + layout_model=layout_model, + preprocessing_mode=preprocess_mode_enum, + preprocessing_config=preprocess_config_obj ) # Calculate processing time @@ -744,6 +766,17 @@ async def start_task( layout_model = options.layout_model.value if options.layout_model else "chinese" logger.info(f"Using layout model: {layout_model}") + # Extract preprocessing options + preprocessing_mode = options.preprocessing_mode.value if options.preprocessing_mode else "auto" + preprocessing_config = None + if options.preprocessing_config: + preprocessing_config = { + "contrast": options.preprocessing_config.contrast.value, + "sharpen": options.preprocessing_config.sharpen, + "binarize": options.preprocessing_config.binarize + } + logger.info(f"Preprocessing: mode={preprocessing_mode}, config={preprocessing_config}") + # Get task details task = task_service.get_task_by_id( db=db, @@ -790,11 +823,13 @@ async def start_task( use_dual_track=use_dual_track, force_track=force_track, language=language, - layout_model=layout_model + layout_model=layout_model, + preprocessing_mode=preprocessing_mode, + preprocessing_config=preprocessing_config ) logger.info(f"Started OCR processing task {task_id} for user {current_user.email}") - logger.info(f"Options: dual_track={use_dual_track}, force_track={force_track}, lang={language}, layout_model={layout_model}") + logger.info(f"Options: dual_track={use_dual_track}, force_track={force_track}, lang={language}, layout_model={layout_model}, preprocessing={preprocessing_mode}") return task except HTTPException: