feat: implement layout preprocessing backend
Backend implementation for add-layout-preprocessing proposal:
- Add LayoutPreprocessingService with CLAHE, sharpen, binarize
- Add auto-detection: analyze_image_quality() for contrast/edge metrics
- Integrate preprocessing into OCR pipeline (analyze_layout)
- Add Preview API: POST /api/v2/tasks/{id}/preview/preprocessing
- Add config options: layout_preprocessing_mode, thresholds
- Add schemas: PreprocessingConfig, PreprocessingPreviewResponse
Preprocessing only affects layout detection input.
Original images preserved for element extraction.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -35,6 +35,11 @@ from app.schemas.task import (
|
||||
ProcessingMetadata,
|
||||
TaskResponseWithMetadata,
|
||||
ExportOptions,
|
||||
PreprocessingModeEnum,
|
||||
PreprocessingConfig,
|
||||
PreprocessingPreviewRequest,
|
||||
PreprocessingPreviewResponse,
|
||||
ImageQualityMetrics,
|
||||
)
|
||||
from app.services.task_service import task_service
|
||||
from app.services.file_access_service import file_access_service
|
||||
@@ -1131,3 +1136,193 @@ async def download_unified(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to download: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# ===== Preprocessing Preview Endpoints =====
|
||||
|
||||
@router.post("/{task_id}/preview/preprocessing", response_model=PreprocessingPreviewResponse, summary="Preview preprocessing effect")
|
||||
async def preview_preprocessing(
|
||||
task_id: str,
|
||||
request: PreprocessingPreviewRequest,
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Preview the effect of image preprocessing before OCR processing.
|
||||
|
||||
Shows side-by-side comparison of original and preprocessed images,
|
||||
along with image quality metrics and auto-detected configuration.
|
||||
|
||||
- **task_id**: Task UUID
|
||||
- **page**: Page number to preview (1-based)
|
||||
- **mode**: Preprocessing mode ('auto', 'manual', 'disabled')
|
||||
- **config**: Manual preprocessing config (only used when mode='manual')
|
||||
"""
|
||||
from pdf2image import convert_from_path
|
||||
import base64
|
||||
import io
|
||||
from PIL import Image
|
||||
from app.services.layout_preprocessing_service import get_layout_preprocessing_service
|
||||
|
||||
try:
|
||||
# Get task details
|
||||
task = task_service.get_task_by_id(
|
||||
db=db,
|
||||
task_id=task_id,
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if not task:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Task not found"
|
||||
)
|
||||
|
||||
# Get task file
|
||||
task_file = db.query(TaskFile).filter(TaskFile.task_id == task.id).first()
|
||||
if not task_file:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Task file not found"
|
||||
)
|
||||
|
||||
file_path = Path(task_file.stored_path)
|
||||
if not file_path.exists():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Source file not found"
|
||||
)
|
||||
|
||||
# Get the page image
|
||||
page_num = request.page
|
||||
if file_path.suffix.lower() == '.pdf':
|
||||
# Convert specific page from PDF
|
||||
images = convert_from_path(
|
||||
str(file_path),
|
||||
first_page=page_num,
|
||||
last_page=page_num,
|
||||
dpi=150
|
||||
)
|
||||
if not images:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Page {page_num} not found in PDF"
|
||||
)
|
||||
original_image = images[0]
|
||||
else:
|
||||
# Direct image file
|
||||
if page_num != 1:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Single image file only has page 1"
|
||||
)
|
||||
original_image = Image.open(file_path)
|
||||
|
||||
# Get preprocessing service
|
||||
preprocessing_service = get_layout_preprocessing_service()
|
||||
|
||||
# Apply preprocessing
|
||||
preprocessed_image, preprocess_result = preprocessing_service.preprocess_to_pil(
|
||||
original_image,
|
||||
mode=request.mode,
|
||||
config=request.config
|
||||
)
|
||||
|
||||
# Create result directory for preview images
|
||||
preview_dir = Path(settings.result_dir) / task_id / "preview"
|
||||
preview_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save preview images
|
||||
original_filename = f"page_{page_num}_original.png"
|
||||
preprocessed_filename = f"page_{page_num}_preprocessed.png"
|
||||
|
||||
original_path = preview_dir / original_filename
|
||||
preprocessed_path = preview_dir / preprocessed_filename
|
||||
|
||||
original_image.save(str(original_path), "PNG")
|
||||
preprocessed_image.save(str(preprocessed_path), "PNG")
|
||||
|
||||
# Build URLs (relative paths that can be served)
|
||||
base_url = f"/api/v2/tasks/{task_id}/preview/image"
|
||||
original_url = f"{base_url}?type=original&page={page_num}"
|
||||
preprocessed_url = f"{base_url}?type=preprocessed&page={page_num}"
|
||||
|
||||
return PreprocessingPreviewResponse(
|
||||
original_url=original_url,
|
||||
preprocessed_url=preprocessed_url,
|
||||
quality_metrics=preprocess_result.quality_metrics,
|
||||
auto_config=preprocess_result.config_used,
|
||||
mode_used=request.mode
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to preview preprocessing for task {task_id}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to preview preprocessing: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{task_id}/preview/image", summary="Get preview image")
|
||||
async def get_preview_image(
|
||||
task_id: str,
|
||||
type: str = Query(..., description="Image type: 'original' or 'preprocessed'"),
|
||||
page: int = Query(1, ge=1, description="Page number"),
|
||||
db: Session = Depends(get_db),
|
||||
current_user: User = Depends(get_current_user)
|
||||
):
|
||||
"""
|
||||
Get a preview image (original or preprocessed).
|
||||
|
||||
- **task_id**: Task UUID
|
||||
- **type**: Image type ('original' or 'preprocessed')
|
||||
- **page**: Page number
|
||||
"""
|
||||
try:
|
||||
# Verify task ownership
|
||||
task = task_service.get_task_by_id(
|
||||
db=db,
|
||||
task_id=task_id,
|
||||
user_id=current_user.id
|
||||
)
|
||||
|
||||
if not task:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Task not found"
|
||||
)
|
||||
|
||||
# Validate type parameter
|
||||
if type not in ['original', 'preprocessed']:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Invalid type. Must be 'original' or 'preprocessed'"
|
||||
)
|
||||
|
||||
# Build image path
|
||||
preview_dir = Path(settings.result_dir) / task_id / "preview"
|
||||
image_filename = f"page_{page}_{type}.png"
|
||||
image_path = preview_dir / image_filename
|
||||
|
||||
if not image_path.exists():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Preview image not found. Please call preview/preprocessing first."
|
||||
)
|
||||
|
||||
return FileResponse(
|
||||
path=str(image_path),
|
||||
media_type="image/png",
|
||||
filename=image_filename
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to get preview image for task {task_id}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get preview image: {str(e)}"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user