feat: implement layout preprocessing backend

Backend implementation for add-layout-preprocessing proposal:
- Add LayoutPreprocessingService with CLAHE, sharpen, binarize
- Add auto-detection: analyze_image_quality() for contrast/edge metrics
- Integrate preprocessing into OCR pipeline (analyze_layout)
- Add Preview API: POST /api/v2/tasks/{id}/preview/preprocessing
- Add config options: layout_preprocessing_mode, thresholds
- Add schemas: PreprocessingConfig, PreprocessingPreviewResponse

Preprocessing only affects layout detection input.
Original images preserved for element extraction.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-27 15:17:20 +08:00
parent 06a5973f2e
commit ea0dd7456c
7 changed files with 800 additions and 22 deletions

View File

@@ -35,6 +35,11 @@ from app.schemas.task import (
ProcessingMetadata,
TaskResponseWithMetadata,
ExportOptions,
PreprocessingModeEnum,
PreprocessingConfig,
PreprocessingPreviewRequest,
PreprocessingPreviewResponse,
ImageQualityMetrics,
)
from app.services.task_service import task_service
from app.services.file_access_service import file_access_service
@@ -1131,3 +1136,193 @@ async def download_unified(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to download: {str(e)}"
)
# ===== Preprocessing Preview Endpoints =====
@router.post("/{task_id}/preview/preprocessing", response_model=PreprocessingPreviewResponse, summary="Preview preprocessing effect")
async def preview_preprocessing(
task_id: str,
request: PreprocessingPreviewRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Preview the effect of image preprocessing before OCR processing.
Shows side-by-side comparison of original and preprocessed images,
along with image quality metrics and auto-detected configuration.
- **task_id**: Task UUID
- **page**: Page number to preview (1-based)
- **mode**: Preprocessing mode ('auto', 'manual', 'disabled')
- **config**: Manual preprocessing config (only used when mode='manual')
"""
from pdf2image import convert_from_path
import base64
import io
from PIL import Image
from app.services.layout_preprocessing_service import get_layout_preprocessing_service
try:
# Get task details
task = task_service.get_task_by_id(
db=db,
task_id=task_id,
user_id=current_user.id
)
if not task:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Task not found"
)
# Get task file
task_file = db.query(TaskFile).filter(TaskFile.task_id == task.id).first()
if not task_file:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Task file not found"
)
file_path = Path(task_file.stored_path)
if not file_path.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Source file not found"
)
# Get the page image
page_num = request.page
if file_path.suffix.lower() == '.pdf':
# Convert specific page from PDF
images = convert_from_path(
str(file_path),
first_page=page_num,
last_page=page_num,
dpi=150
)
if not images:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Page {page_num} not found in PDF"
)
original_image = images[0]
else:
# Direct image file
if page_num != 1:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Single image file only has page 1"
)
original_image = Image.open(file_path)
# Get preprocessing service
preprocessing_service = get_layout_preprocessing_service()
# Apply preprocessing
preprocessed_image, preprocess_result = preprocessing_service.preprocess_to_pil(
original_image,
mode=request.mode,
config=request.config
)
# Create result directory for preview images
preview_dir = Path(settings.result_dir) / task_id / "preview"
preview_dir.mkdir(parents=True, exist_ok=True)
# Save preview images
original_filename = f"page_{page_num}_original.png"
preprocessed_filename = f"page_{page_num}_preprocessed.png"
original_path = preview_dir / original_filename
preprocessed_path = preview_dir / preprocessed_filename
original_image.save(str(original_path), "PNG")
preprocessed_image.save(str(preprocessed_path), "PNG")
# Build URLs (relative paths that can be served)
base_url = f"/api/v2/tasks/{task_id}/preview/image"
original_url = f"{base_url}?type=original&page={page_num}"
preprocessed_url = f"{base_url}?type=preprocessed&page={page_num}"
return PreprocessingPreviewResponse(
original_url=original_url,
preprocessed_url=preprocessed_url,
quality_metrics=preprocess_result.quality_metrics,
auto_config=preprocess_result.config_used,
mode_used=request.mode
)
except HTTPException:
raise
except Exception as e:
logger.exception(f"Failed to preview preprocessing for task {task_id}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to preview preprocessing: {str(e)}"
)
@router.get("/{task_id}/preview/image", summary="Get preview image")
async def get_preview_image(
task_id: str,
type: str = Query(..., description="Image type: 'original' or 'preprocessed'"),
page: int = Query(1, ge=1, description="Page number"),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""
Get a preview image (original or preprocessed).
- **task_id**: Task UUID
- **type**: Image type ('original' or 'preprocessed')
- **page**: Page number
"""
try:
# Verify task ownership
task = task_service.get_task_by_id(
db=db,
task_id=task_id,
user_id=current_user.id
)
if not task:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Task not found"
)
# Validate type parameter
if type not in ['original', 'preprocessed']:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid type. Must be 'original' or 'preprocessed'"
)
# Build image path
preview_dir = Path(settings.result_dir) / task_id / "preview"
image_filename = f"page_{page}_{type}.png"
image_path = preview_dir / image_filename
if not image_path.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Preview image not found. Please call preview/preprocessing first."
)
return FileResponse(
path=str(image_path),
media_type="image/png",
filename=image_filename
)
except HTTPException:
raise
except Exception as e:
logger.exception(f"Failed to get preview image for task {task_id}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get preview image: {str(e)}"
)