- Add generate_translated_layout_pdf() method for layout-preserving translated PDFs - Add generate_translated_pdf() method for reflow translated PDFs - Update translate router to accept format parameter (layout/reflow) - Update frontend with dropdown to select translated PDF format - Fix reflow PDF table cell extraction from content dict - Add embedded images handling in reflow PDF tables - Archive improve-translated-text-fitting openspec proposal 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1415 lines
48 KiB
Python
1415 lines
48 KiB
Python
"""
|
|
Tool_OCR - Task Management Router
|
|
Handles OCR task operations with user isolation
|
|
"""
|
|
|
|
import logging
|
|
from typing import Optional
|
|
from pathlib import Path
|
|
import shutil
|
|
import hashlib
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, status, Query, UploadFile, File, BackgroundTasks
|
|
from fastapi.responses import FileResponse
|
|
from sqlalchemy.orm import Session
|
|
import json
|
|
from datetime import datetime
|
|
|
|
from app.core.deps import get_db, get_current_user
|
|
from app.core.config import settings
|
|
from app.models.user import User
|
|
from app.models.task import TaskStatus, TaskFile
|
|
from app.schemas.task import (
|
|
TaskCreate,
|
|
TaskUpdate,
|
|
TaskResponse,
|
|
TaskDetailResponse,
|
|
TaskListResponse,
|
|
TaskStatsResponse,
|
|
TaskStatusEnum,
|
|
UploadResponse,
|
|
ProcessingTrackEnum,
|
|
ProcessingOptions,
|
|
AnalyzeRequest,
|
|
DocumentAnalysisResponse,
|
|
ProcessingMetadata,
|
|
TaskResponseWithMetadata,
|
|
ExportOptions,
|
|
PreprocessingModeEnum,
|
|
PreprocessingConfig,
|
|
PreprocessingPreviewRequest,
|
|
PreprocessingPreviewResponse,
|
|
ImageQualityMetrics,
|
|
TableDetectionConfig,
|
|
)
|
|
from app.services.task_service import task_service
|
|
from app.services.file_access_service import file_access_service
|
|
from app.services.ocr_service import OCRService
|
|
from app.services.service_pool import get_service_pool, PoolConfig
|
|
|
|
# Import dual-track components
|
|
try:
|
|
from app.services.document_type_detector import DocumentTypeDetector
|
|
DUAL_TRACK_AVAILABLE = True
|
|
except ImportError:
|
|
DUAL_TRACK_AVAILABLE = False
|
|
|
|
# Service pool availability
|
|
SERVICE_POOL_AVAILABLE = True
|
|
try:
|
|
from app.services.memory_manager import get_model_manager
|
|
except ImportError:
|
|
SERVICE_POOL_AVAILABLE = False
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/api/v2/tasks", tags=["Tasks"])
|
|
|
|
|
|
def process_task_ocr(
|
|
task_id: str,
|
|
task_db_id: int,
|
|
file_path: str,
|
|
filename: str,
|
|
use_dual_track: bool = True,
|
|
force_track: Optional[str] = None,
|
|
language: str = 'ch',
|
|
layout_model: Optional[str] = "chinese",
|
|
preprocessing_mode: Optional[str] = "auto",
|
|
preprocessing_config: Optional[dict] = None,
|
|
table_detection_config: Optional[dict] = None
|
|
):
|
|
"""
|
|
Background task to process OCR for a task with dual-track support.
|
|
|
|
Uses OCRServicePool to acquire a shared service instance instead of
|
|
creating a new one, preventing GPU memory proliferation.
|
|
|
|
Args:
|
|
task_id: Task UUID string
|
|
task_db_id: Task database ID
|
|
file_path: Path to uploaded file
|
|
filename: Original filename
|
|
use_dual_track: Enable dual-track processing
|
|
force_track: Force specific track ('ocr' or 'direct')
|
|
language: OCR language code
|
|
layout_model: Layout detection model ('chinese', 'default', 'cdla')
|
|
preprocessing_mode: Preprocessing mode ('auto', 'manual', 'disabled')
|
|
preprocessing_config: Manual preprocessing config dict (contrast, sharpen, binarize)
|
|
table_detection_config: Table detection config dict (enable_wired_table, enable_wireless_table, enable_region_detection)
|
|
"""
|
|
from app.core.database import SessionLocal
|
|
from app.models.task import Task
|
|
|
|
db = SessionLocal()
|
|
start_time = datetime.now()
|
|
pooled_service = None
|
|
|
|
try:
|
|
logger.info(f"Starting OCR processing for task {task_id}, file: {filename}")
|
|
logger.info(f"Processing options: dual_track={use_dual_track}, force_track={force_track}, lang={language}")
|
|
logger.info(f"Preprocessing options: mode={preprocessing_mode}, config={preprocessing_config}")
|
|
logger.info(f"Table detection options: {table_detection_config}")
|
|
|
|
# Convert preprocessing parameters to proper types
|
|
preprocess_mode_enum = None
|
|
preprocess_config_obj = None
|
|
if preprocessing_mode:
|
|
preprocess_mode_enum = PreprocessingModeEnum(preprocessing_mode)
|
|
if preprocessing_config:
|
|
from app.schemas.task import PreprocessingContrastEnum
|
|
preprocess_config_obj = PreprocessingConfig(
|
|
contrast=PreprocessingContrastEnum(preprocessing_config.get("contrast", "clahe")),
|
|
contrast_strength=preprocessing_config.get("contrast_strength", 1.0),
|
|
sharpen=preprocessing_config.get("sharpen", True),
|
|
sharpen_strength=preprocessing_config.get("sharpen_strength", 1.0),
|
|
binarize=preprocessing_config.get("binarize", False)
|
|
)
|
|
|
|
# Convert table detection config to object
|
|
table_det_config_obj = None
|
|
if table_detection_config:
|
|
table_det_config_obj = TableDetectionConfig(
|
|
enable_wired_table=table_detection_config.get("enable_wired_table", True),
|
|
enable_wireless_table=table_detection_config.get("enable_wireless_table", True),
|
|
enable_region_detection=table_detection_config.get("enable_region_detection", True)
|
|
)
|
|
|
|
# Get task directly by database ID (bypass user isolation for background task)
|
|
task = db.query(Task).filter(Task.id == task_db_id).first()
|
|
if not task:
|
|
logger.error(f"Task {task_id} not found in database")
|
|
return
|
|
|
|
# Acquire OCR service from pool (or create new if pool disabled)
|
|
ocr_service = None
|
|
if settings.enable_service_pool and SERVICE_POOL_AVAILABLE:
|
|
try:
|
|
service_pool = get_service_pool()
|
|
pooled_service = service_pool.acquire(
|
|
device="GPU:0",
|
|
timeout=settings.service_acquire_timeout_seconds,
|
|
task_id=task_id
|
|
)
|
|
if pooled_service:
|
|
ocr_service = pooled_service.service
|
|
logger.info(f"Acquired OCR service from pool for task {task_id}")
|
|
else:
|
|
logger.warning(f"Timeout acquiring service from pool, creating new instance")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to acquire service from pool: {e}, creating new instance")
|
|
|
|
# Fallback: create new instance if pool acquisition failed
|
|
if ocr_service is None:
|
|
logger.info("Creating new OCRService instance (pool disabled or unavailable)")
|
|
ocr_service = OCRService()
|
|
|
|
# Create result directory before OCR processing (needed for saving extracted images)
|
|
result_dir = Path(settings.result_dir) / task_id
|
|
result_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Process the file with OCR
|
|
# Use dual-track processing if:
|
|
# 1. use_dual_track is True (auto-detection)
|
|
# 2. OR force_track is specified (explicit track selection)
|
|
if (use_dual_track or force_track) and hasattr(ocr_service, 'process'):
|
|
# Use new dual-track processing (or forced track)
|
|
ocr_result = ocr_service.process(
|
|
file_path=Path(file_path),
|
|
lang=language,
|
|
detect_layout=True,
|
|
output_dir=result_dir,
|
|
use_dual_track=use_dual_track,
|
|
force_track=force_track,
|
|
layout_model=layout_model,
|
|
preprocessing_mode=preprocess_mode_enum,
|
|
preprocessing_config=preprocess_config_obj,
|
|
table_detection_config=table_det_config_obj
|
|
)
|
|
else:
|
|
# Fall back to traditional processing (no force_track support)
|
|
ocr_result = ocr_service.process_image(
|
|
image_path=Path(file_path),
|
|
lang=language,
|
|
detect_layout=True,
|
|
output_dir=result_dir,
|
|
layout_model=layout_model,
|
|
preprocessing_mode=preprocess_mode_enum,
|
|
preprocessing_config=preprocess_config_obj,
|
|
table_detection_config=table_det_config_obj
|
|
)
|
|
|
|
# Calculate processing time
|
|
processing_time_ms = int((datetime.now() - start_time).total_seconds() * 1000)
|
|
|
|
# Save results using OCR service (includes JSON, Markdown, and PDF generation)
|
|
json_path, markdown_path, pdf_path = ocr_service.save_results(
|
|
result=ocr_result,
|
|
output_dir=result_dir,
|
|
file_id=Path(filename).stem,
|
|
source_file_path=Path(file_path)
|
|
)
|
|
|
|
# Release service back to pool (success case)
|
|
if pooled_service:
|
|
try:
|
|
service_pool = get_service_pool()
|
|
service_pool.release(pooled_service, error=None)
|
|
logger.info(f"Released OCR service back to pool for task {task_id}")
|
|
pooled_service = None # Prevent double release in finally
|
|
except Exception as e:
|
|
logger.warning(f"Failed to release service to pool: {e}")
|
|
|
|
# Close old session and create fresh one to avoid MySQL timeout
|
|
# (long OCR processing may cause connection to become stale)
|
|
db.close()
|
|
db = SessionLocal()
|
|
|
|
# Re-fetch task with fresh connection
|
|
task = db.query(Task).filter(Task.id == task_db_id).first()
|
|
if not task:
|
|
logger.error(f"Task {task_id} not found after OCR processing")
|
|
return
|
|
|
|
# Update task with results (direct database update)
|
|
task.result_json_path = str(json_path) if json_path else None
|
|
task.result_markdown_path = str(markdown_path) if markdown_path else None
|
|
task.result_pdf_path = str(pdf_path) if pdf_path else None
|
|
task.processing_time_ms = processing_time_ms
|
|
task.status = TaskStatus.COMPLETED
|
|
task.completed_at = datetime.utcnow()
|
|
task.updated_at = datetime.utcnow()
|
|
|
|
db.commit()
|
|
|
|
logger.info(f"OCR processing completed for task {task_id} in {processing_time_ms}ms")
|
|
|
|
except Exception as e:
|
|
logger.exception(f"OCR processing failed for task {task_id}")
|
|
|
|
# Release service back to pool with error
|
|
if pooled_service:
|
|
try:
|
|
service_pool = get_service_pool()
|
|
service_pool.release(pooled_service, error=e)
|
|
pooled_service = None
|
|
except Exception as release_error:
|
|
logger.warning(f"Failed to release service to pool: {release_error}")
|
|
|
|
# Update task status to failed (direct database update)
|
|
try:
|
|
task = db.query(Task).filter(Task.id == task_db_id).first()
|
|
if task:
|
|
task.status = TaskStatus.FAILED
|
|
task.error_message = str(e)
|
|
task.updated_at = datetime.utcnow()
|
|
db.commit()
|
|
except Exception as update_error:
|
|
logger.error(f"Failed to update task status: {update_error}")
|
|
|
|
finally:
|
|
# Ensure service is released in case of any missed release
|
|
if pooled_service:
|
|
try:
|
|
service_pool = get_service_pool()
|
|
service_pool.release(pooled_service, error=None)
|
|
except Exception:
|
|
pass
|
|
db.close()
|
|
|
|
|
|
@router.post("/", response_model=TaskResponse, status_code=status.HTTP_201_CREATED)
|
|
async def create_task(
|
|
task_data: TaskCreate,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Create a new OCR task
|
|
|
|
- **filename**: Original filename (optional)
|
|
- **file_type**: File MIME type (optional)
|
|
"""
|
|
try:
|
|
task = task_service.create_task(
|
|
db=db,
|
|
user_id=current_user.id,
|
|
filename=task_data.filename,
|
|
file_type=task_data.file_type
|
|
)
|
|
|
|
logger.info(f"Created task {task.task_id} for user {current_user.email}")
|
|
return task
|
|
|
|
except Exception as e:
|
|
logger.exception(f"Failed to create task for user {current_user.id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to create task: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.get("/", response_model=TaskListResponse)
|
|
async def list_tasks(
|
|
status_filter: Optional[TaskStatusEnum] = Query(None, alias="status"),
|
|
filename_search: Optional[str] = Query(None, alias="filename"),
|
|
date_from: Optional[str] = Query(None, alias="date_from"),
|
|
date_to: Optional[str] = Query(None, alias="date_to"),
|
|
page: int = Query(1, ge=1),
|
|
page_size: int = Query(50, ge=1, le=100),
|
|
order_by: str = Query("created_at"),
|
|
order_desc: bool = Query(True),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
List user's tasks with pagination and filtering
|
|
|
|
- **status**: Filter by task status (optional)
|
|
- **filename**: Search by filename (partial match, optional)
|
|
- **date_from**: Filter tasks from this date (YYYY-MM-DD, optional)
|
|
- **date_to**: Filter tasks until this date (YYYY-MM-DD, optional)
|
|
- **page**: Page number (starts from 1)
|
|
- **page_size**: Number of tasks per page (max 100)
|
|
- **order_by**: Sort field (created_at, updated_at, completed_at)
|
|
- **order_desc**: Sort descending (default: true)
|
|
"""
|
|
try:
|
|
# Convert enum to model enum if provided
|
|
status_enum = TaskStatus[status_filter.value.upper()] if status_filter else None
|
|
|
|
# Parse date strings
|
|
from datetime import datetime
|
|
date_from_dt = datetime.fromisoformat(date_from) if date_from else None
|
|
date_to_dt = datetime.fromisoformat(date_to) if date_to else None
|
|
|
|
# Calculate offset
|
|
skip = (page - 1) * page_size
|
|
|
|
# Get tasks
|
|
tasks, total = task_service.get_user_tasks(
|
|
db=db,
|
|
user_id=current_user.id,
|
|
status=status_enum,
|
|
filename_search=filename_search,
|
|
date_from=date_from_dt,
|
|
date_to=date_to_dt,
|
|
skip=skip,
|
|
limit=page_size,
|
|
order_by=order_by,
|
|
order_desc=order_desc
|
|
)
|
|
|
|
# Calculate pagination
|
|
has_more = (skip + len(tasks)) < total
|
|
|
|
return {
|
|
"tasks": tasks,
|
|
"total": total,
|
|
"page": page,
|
|
"page_size": page_size,
|
|
"has_more": has_more
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.exception(f"Failed to list tasks for user {current_user.id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to list tasks: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.get("/stats", response_model=TaskStatsResponse)
|
|
async def get_task_stats(
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Get task statistics for current user
|
|
|
|
Returns counts by status
|
|
"""
|
|
try:
|
|
stats = task_service.get_user_stats(db=db, user_id=current_user.id)
|
|
return stats
|
|
|
|
except Exception as e:
|
|
logger.exception(f"Failed to get stats for user {current_user.id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to get statistics: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.get("/{task_id}", response_model=TaskDetailResponse)
|
|
async def get_task(
|
|
task_id: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Get task details by ID
|
|
|
|
- **task_id**: Task UUID
|
|
"""
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Extract processing_track from result JSON metadata if available
|
|
processing_track = None
|
|
if task.result_json_path:
|
|
try:
|
|
import json
|
|
from pathlib import Path
|
|
result_path = Path(task.result_json_path)
|
|
if result_path.exists():
|
|
with open(result_path) as f:
|
|
result_data = json.load(f)
|
|
metadata = result_data.get("metadata", {})
|
|
track_str = metadata.get("processing_track")
|
|
# Convert string to enum to avoid Pydantic serialization warning
|
|
if track_str:
|
|
try:
|
|
processing_track = ProcessingTrackEnum(track_str)
|
|
except ValueError:
|
|
processing_track = None
|
|
except Exception:
|
|
pass # Silently ignore errors reading the result file
|
|
|
|
# Create response with processing_track
|
|
response = TaskDetailResponse.model_validate(task)
|
|
response.processing_track = processing_track
|
|
return response
|
|
|
|
|
|
@router.patch("/{task_id}", response_model=TaskResponse)
|
|
async def update_task(
|
|
task_id: str,
|
|
task_update: TaskUpdate,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Update task status and results
|
|
|
|
- **task_id**: Task UUID
|
|
- **status**: New task status (optional)
|
|
- **error_message**: Error message if failed (optional)
|
|
- **processing_time_ms**: Processing time in milliseconds (optional)
|
|
- **result_json_path**: Path to JSON result (optional)
|
|
- **result_markdown_path**: Path to Markdown result (optional)
|
|
- **result_pdf_path**: Path to searchable PDF (optional)
|
|
"""
|
|
try:
|
|
# Update status if provided
|
|
if task_update.status:
|
|
status_enum = TaskStatus[task_update.status.value.upper()]
|
|
task = task_service.update_task_status(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id,
|
|
status=status_enum,
|
|
error_message=task_update.error_message,
|
|
processing_time_ms=task_update.processing_time_ms
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Update result paths if provided
|
|
if any([
|
|
task_update.result_json_path,
|
|
task_update.result_markdown_path,
|
|
task_update.result_pdf_path
|
|
]):
|
|
task = task_service.update_task_results(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id,
|
|
result_json_path=task_update.result_json_path,
|
|
result_markdown_path=task_update.result_markdown_path,
|
|
result_pdf_path=task_update.result_pdf_path
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
return task
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.exception(f"Failed to update task {task_id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to update task: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.delete("/{task_id}", status_code=status.HTTP_204_NO_CONTENT)
|
|
async def delete_task(
|
|
task_id: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Delete a task
|
|
|
|
- **task_id**: Task UUID
|
|
"""
|
|
success = task_service.delete_task(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not success:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
logger.info(f"Deleted task {task_id} for user {current_user.email}")
|
|
return None
|
|
|
|
|
|
@router.get("/{task_id}/download/json", summary="Download JSON result")
|
|
async def download_json(
|
|
task_id: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Download task result as JSON file
|
|
|
|
- **task_id**: Task UUID
|
|
"""
|
|
# Get task
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Validate file access
|
|
is_valid, error_msg = file_access_service.validate_file_access(
|
|
db=db,
|
|
user_id=current_user.id,
|
|
task_id=task_id,
|
|
file_path=task.result_json_path
|
|
)
|
|
|
|
if not is_valid:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail=error_msg
|
|
)
|
|
|
|
# Return file
|
|
filename = f"{task.filename or task_id}_result.json"
|
|
return FileResponse(
|
|
path=task.result_json_path,
|
|
filename=filename,
|
|
media_type="application/json"
|
|
)
|
|
|
|
|
|
@router.get("/{task_id}/download/markdown", summary="Download Markdown result")
|
|
async def download_markdown(
|
|
task_id: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Download task result as Markdown file
|
|
|
|
- **task_id**: Task UUID
|
|
"""
|
|
# Get task
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Validate file access
|
|
is_valid, error_msg = file_access_service.validate_file_access(
|
|
db=db,
|
|
user_id=current_user.id,
|
|
task_id=task_id,
|
|
file_path=task.result_markdown_path
|
|
)
|
|
|
|
if not is_valid:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail=error_msg
|
|
)
|
|
|
|
# Return file
|
|
filename = f"{task.filename or task_id}_result.md"
|
|
return FileResponse(
|
|
path=task.result_markdown_path,
|
|
filename=filename,
|
|
media_type="text/markdown"
|
|
)
|
|
|
|
|
|
@router.get("/{task_id}/download/pdf", summary="Download PDF result")
|
|
async def download_pdf(
|
|
task_id: str,
|
|
format: Optional[str] = Query(
|
|
None,
|
|
description="PDF format: 'layout' (default) preserves original coordinates, 'reflow' provides flowing text with consistent font sizes"
|
|
),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Download task result as PDF file
|
|
|
|
- **task_id**: Task UUID
|
|
- **format**: Optional format parameter
|
|
- `layout` (default): Preserves original document layout and coordinates
|
|
- `reflow`: Flowing text with consistent font sizes for better readability
|
|
|
|
Returns a PDF generated from OCR JSON data. The PDF is cached for subsequent requests.
|
|
"""
|
|
from pathlib import Path
|
|
from app.services.pdf_generator_service import pdf_generator_service
|
|
|
|
# Get task
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Check if task is completed
|
|
if task.status.value != "completed":
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="Task is not completed yet. Please wait for OCR processing to finish."
|
|
)
|
|
|
|
# Determine format (default to layout)
|
|
use_reflow = format and format.lower() == "reflow"
|
|
|
|
# Check if PDF path is stored in database (only for layout format, as reflow is always generated)
|
|
if not use_reflow and task.result_pdf_path and Path(task.result_pdf_path).exists():
|
|
pdf_path = Path(task.result_pdf_path)
|
|
logger.info(f"Using pre-generated PDF from database: {pdf_path.name}")
|
|
else:
|
|
# Generate PDF on-demand
|
|
result_dir = Path(settings.result_dir) / task_id
|
|
|
|
# Use stored JSON path or construct it
|
|
if task.result_json_path and Path(task.result_json_path).exists():
|
|
json_path = Path(task.result_json_path)
|
|
else:
|
|
# Try to find JSON file in result directory
|
|
json_files = list(result_dir.glob("*_result.json"))
|
|
if not json_files:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="OCR result JSON not found"
|
|
)
|
|
json_path = json_files[0]
|
|
|
|
# Construct PDF path based on JSON filename and format
|
|
format_suffix = "_reflow" if use_reflow else "_layout"
|
|
pdf_filename = json_path.stem.replace("_result", format_suffix) + ".pdf"
|
|
pdf_path = result_dir / pdf_filename
|
|
|
|
# Generate PDF if it doesn't exist
|
|
if not pdf_path.exists():
|
|
logger.info(f"Generating {'reflow' if use_reflow else 'layout-preserving'} PDF for task {task_id}")
|
|
|
|
# Get source file path if available
|
|
source_file = None
|
|
task_file = db.query(TaskFile).filter(TaskFile.task_id == task.id).first()
|
|
if task_file and task_file.stored_path and Path(task_file.stored_path).exists():
|
|
source_file = Path(task_file.stored_path)
|
|
|
|
# Generate PDF based on format
|
|
if use_reflow:
|
|
# For reflow, pass result_dir as source_file_path (contains extracted images)
|
|
success = pdf_generator_service.generate_reflow_pdf(
|
|
json_path=json_path,
|
|
output_path=pdf_path,
|
|
source_file_path=result_dir
|
|
)
|
|
else:
|
|
success = pdf_generator_service.generate_layout_pdf(
|
|
json_path=json_path,
|
|
output_path=pdf_path,
|
|
source_file_path=source_file
|
|
)
|
|
|
|
if not success:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Failed to generate PDF. Please check server logs."
|
|
)
|
|
|
|
logger.info(f"PDF generated successfully: {pdf_path.name}")
|
|
|
|
# Validate file access
|
|
is_valid, error_msg = file_access_service.validate_file_access(
|
|
db=db,
|
|
user_id=current_user.id,
|
|
task_id=task_id,
|
|
file_path=str(pdf_path)
|
|
)
|
|
|
|
if not is_valid:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail=error_msg
|
|
)
|
|
|
|
# Return file with format indication in filename
|
|
base_name = task.filename or task_id
|
|
format_suffix = "_reflow" if use_reflow else "_layout"
|
|
filename = f"{base_name}{format_suffix}.pdf"
|
|
return FileResponse(
|
|
path=str(pdf_path),
|
|
filename=filename,
|
|
media_type="application/pdf"
|
|
)
|
|
|
|
|
|
@router.post("/{task_id}/start", response_model=TaskResponse, summary="Start task processing")
|
|
async def start_task(
|
|
task_id: str,
|
|
background_tasks: BackgroundTasks,
|
|
options: Optional[ProcessingOptions] = None,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Start processing a pending task with dual-track support and layout model selection
|
|
|
|
- **task_id**: Task UUID
|
|
- **options**: Processing options (in request body):
|
|
- **use_dual_track**: Enable intelligent track selection (default: true)
|
|
- **force_track**: Force specific processing track ('ocr' or 'direct')
|
|
- **language**: OCR language code (default: 'ch')
|
|
- **layout_model**: Layout detection model ('chinese', 'default', 'cdla')
|
|
- **table_detection**: Table detection config (enable_wired_table, enable_wireless_table, enable_region_detection)
|
|
"""
|
|
try:
|
|
# Parse processing options with defaults
|
|
if options is None:
|
|
options = ProcessingOptions()
|
|
|
|
use_dual_track = options.use_dual_track
|
|
force_track = options.force_track.value if options.force_track else None
|
|
language = options.language
|
|
|
|
# Extract layout model (default to 'chinese' for best Chinese document support)
|
|
layout_model = options.layout_model.value if options.layout_model else "chinese"
|
|
logger.info(f"Using layout model: {layout_model}")
|
|
|
|
# Extract preprocessing options
|
|
preprocessing_mode = options.preprocessing_mode.value if options.preprocessing_mode else "auto"
|
|
preprocessing_config = None
|
|
if options.preprocessing_config:
|
|
preprocessing_config = {
|
|
"contrast": options.preprocessing_config.contrast.value,
|
|
"contrast_strength": options.preprocessing_config.contrast_strength,
|
|
"sharpen": options.preprocessing_config.sharpen,
|
|
"sharpen_strength": options.preprocessing_config.sharpen_strength,
|
|
"binarize": options.preprocessing_config.binarize
|
|
}
|
|
logger.info(f"Preprocessing: mode={preprocessing_mode}, config={preprocessing_config}")
|
|
|
|
# Extract table detection options
|
|
table_detection_config = None
|
|
if options.table_detection:
|
|
table_detection_config = {
|
|
"enable_wired_table": options.table_detection.enable_wired_table,
|
|
"enable_wireless_table": options.table_detection.enable_wireless_table,
|
|
"enable_region_detection": options.table_detection.enable_region_detection
|
|
}
|
|
logger.info(f"Table detection: {table_detection_config}")
|
|
|
|
# Get task details
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Check if task is in pending status
|
|
if task.status != TaskStatus.PENDING:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=f"Cannot start task in '{task.status.value}' status"
|
|
)
|
|
|
|
# Get task file
|
|
task_file = db.query(TaskFile).filter(TaskFile.task_id == task.id).first()
|
|
if not task_file:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task file not found"
|
|
)
|
|
|
|
# Update task status to processing
|
|
task = task_service.update_task_status(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id,
|
|
status=TaskStatus.PROCESSING
|
|
)
|
|
|
|
# Start OCR processing in background with dual-track parameters and layout model
|
|
background_tasks.add_task(
|
|
process_task_ocr,
|
|
task_id=task_id,
|
|
task_db_id=task.id,
|
|
file_path=task_file.stored_path,
|
|
filename=task_file.original_name,
|
|
use_dual_track=use_dual_track,
|
|
force_track=force_track,
|
|
language=language,
|
|
layout_model=layout_model,
|
|
preprocessing_mode=preprocessing_mode,
|
|
preprocessing_config=preprocessing_config,
|
|
table_detection_config=table_detection_config
|
|
)
|
|
|
|
logger.info(f"Started OCR processing task {task_id} for user {current_user.email}")
|
|
logger.info(f"Options: dual_track={use_dual_track}, force_track={force_track}, lang={language}, layout_model={layout_model}, preprocessing={preprocessing_mode}, table_detection={table_detection_config}")
|
|
return task
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.exception(f"Failed to start task {task_id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to start task: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.post("/{task_id}/cancel", response_model=TaskResponse, summary="Cancel task")
|
|
async def cancel_task(
|
|
task_id: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Cancel a pending or processing task
|
|
|
|
- **task_id**: Task UUID
|
|
"""
|
|
try:
|
|
# Get current task
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Only allow canceling pending or processing tasks
|
|
if task.status not in [TaskStatus.PENDING, TaskStatus.PROCESSING]:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=f"Cannot cancel task in '{task.status.value}' status"
|
|
)
|
|
|
|
# Update to failed status with cancellation message
|
|
task = task_service.update_task_status(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id,
|
|
status=TaskStatus.FAILED,
|
|
error_message="Task cancelled by user"
|
|
)
|
|
|
|
logger.info(f"Cancelled task {task_id} for user {current_user.email}")
|
|
return task
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.exception(f"Failed to cancel task {task_id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to cancel task: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.post("/{task_id}/retry", response_model=TaskResponse, summary="Retry failed task")
|
|
async def retry_task(
|
|
task_id: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Retry a failed task
|
|
|
|
- **task_id**: Task UUID
|
|
"""
|
|
try:
|
|
# Get current task
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Only allow retrying failed tasks
|
|
if task.status != TaskStatus.FAILED:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=f"Cannot retry task in '{task.status.value}' status"
|
|
)
|
|
|
|
# Reset task to pending status
|
|
task = task_service.update_task_status(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id,
|
|
status=TaskStatus.PENDING,
|
|
error_message=None
|
|
)
|
|
|
|
logger.info(f"Retrying task {task_id} for user {current_user.email}")
|
|
return task
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.exception(f"Failed to retry task {task_id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to retry task: {str(e)}"
|
|
)
|
|
|
|
|
|
# ===== Document Analysis Endpoints =====
|
|
|
|
@router.post("/{task_id}/analyze", response_model=DocumentAnalysisResponse, summary="Analyze document type")
|
|
async def analyze_document(
|
|
task_id: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Analyze document to determine recommended processing track
|
|
|
|
Returns document type analysis with recommended processing track
|
|
(OCR for scanned documents, DIRECT for editable PDFs)
|
|
|
|
- **task_id**: Task UUID
|
|
"""
|
|
try:
|
|
if not DUAL_TRACK_AVAILABLE:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
|
detail="Dual-track processing not available"
|
|
)
|
|
|
|
# Get task details
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Get task file
|
|
task_file = db.query(TaskFile).filter(TaskFile.task_id == task.id).first()
|
|
if not task_file:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task file not found"
|
|
)
|
|
|
|
# Analyze document (using detect method)
|
|
detector = DocumentTypeDetector()
|
|
recommendation = detector.detect(Path(task_file.stored_path))
|
|
|
|
# Build response
|
|
response = DocumentAnalysisResponse(
|
|
task_id=task_id,
|
|
filename=task_file.original_name or "",
|
|
recommended_track=ProcessingTrackEnum(recommendation.track),
|
|
confidence=recommendation.confidence,
|
|
reason=recommendation.reason,
|
|
document_info=recommendation.metadata or {},
|
|
is_editable=recommendation.track == "direct",
|
|
text_coverage=recommendation.metadata.get("text_coverage") if recommendation.metadata else None,
|
|
page_count=recommendation.metadata.get("total_pages") if recommendation.metadata else None
|
|
)
|
|
|
|
logger.info(f"Document analysis for task {task_id}: {recommendation.track} (confidence: {recommendation.confidence})")
|
|
return response
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.exception(f"Failed to analyze document for task {task_id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to analyze document: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.get("/{task_id}/metadata", response_model=ProcessingMetadata, summary="Get processing metadata")
|
|
async def get_processing_metadata(
|
|
task_id: str,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Get processing metadata for a completed task
|
|
|
|
Returns detailed processing information including track used,
|
|
element counts, and statistics.
|
|
|
|
- **task_id**: Task UUID
|
|
"""
|
|
try:
|
|
# Get task details
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
if task.status != TaskStatus.COMPLETED:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="Task not completed"
|
|
)
|
|
|
|
# Load JSON result to get metadata
|
|
if not task.result_json_path:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Result JSON not found"
|
|
)
|
|
|
|
json_path = Path(task.result_json_path)
|
|
if not json_path.exists():
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Result file not found"
|
|
)
|
|
|
|
with open(json_path, 'r', encoding='utf-8') as f:
|
|
result_data = json.load(f)
|
|
|
|
# Extract metadata
|
|
metadata = result_data.get('metadata', {})
|
|
statistics = result_data.get('statistics', {})
|
|
|
|
response = ProcessingMetadata(
|
|
processing_track=ProcessingTrackEnum(metadata.get('processing_track', 'ocr')),
|
|
processing_time_seconds=metadata.get('processing_time', 0),
|
|
language=metadata.get('language', 'ch'),
|
|
page_count=statistics.get('page_count', 1),
|
|
total_elements=statistics.get('total_elements', 0),
|
|
total_text_regions=len(result_data.get('text_regions', [])) if 'text_regions' in result_data else statistics.get('total_elements', 0),
|
|
total_tables=statistics.get('total_tables', 0),
|
|
total_images=statistics.get('total_images', 0),
|
|
average_confidence=result_data.get('average_confidence'),
|
|
unified_format=metadata.get('processing_info', {}).get('export_format') == 'unified_document_v1'
|
|
)
|
|
|
|
return response
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.exception(f"Failed to get metadata for task {task_id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to get metadata: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.get("/{task_id}/download/unified", summary="Download unified format")
|
|
async def download_unified(
|
|
task_id: str,
|
|
include_metadata: bool = Query(True, description="Include processing metadata"),
|
|
include_statistics: bool = Query(True, description="Include document statistics"),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Download results in unified document format
|
|
|
|
Returns JSON with full UnifiedDocument structure including
|
|
all elements, coordinates, and metadata.
|
|
|
|
- **task_id**: Task UUID
|
|
- **include_metadata**: Include processing metadata
|
|
- **include_statistics**: Include document statistics
|
|
"""
|
|
try:
|
|
# Get task details
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
if task.status != TaskStatus.COMPLETED:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="Task not completed"
|
|
)
|
|
|
|
# Get JSON result path
|
|
if not task.result_json_path:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Result JSON not found"
|
|
)
|
|
|
|
json_path = Path(task.result_json_path)
|
|
if not json_path.exists():
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Result file not found"
|
|
)
|
|
|
|
# Return the unified format JSON
|
|
return FileResponse(
|
|
path=str(json_path),
|
|
filename=f"{task_id}_unified.json",
|
|
media_type="application/json"
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.exception(f"Failed to download unified format for task {task_id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to download: {str(e)}"
|
|
)
|
|
|
|
|
|
# ===== Preprocessing Preview Endpoints =====
|
|
|
|
@router.post("/{task_id}/preview/preprocessing", response_model=PreprocessingPreviewResponse, summary="Preview preprocessing effect")
|
|
async def preview_preprocessing(
|
|
task_id: str,
|
|
request: PreprocessingPreviewRequest,
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Preview the effect of image preprocessing before OCR processing.
|
|
|
|
Shows side-by-side comparison of original and preprocessed images,
|
|
along with image quality metrics and auto-detected configuration.
|
|
|
|
- **task_id**: Task UUID
|
|
- **page**: Page number to preview (1-based)
|
|
- **mode**: Preprocessing mode ('auto', 'manual', 'disabled')
|
|
- **config**: Manual preprocessing config (only used when mode='manual')
|
|
"""
|
|
from pdf2image import convert_from_path
|
|
import base64
|
|
import io
|
|
from PIL import Image
|
|
from app.services.layout_preprocessing_service import get_layout_preprocessing_service
|
|
|
|
try:
|
|
# Get task details
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Get task file
|
|
task_file = db.query(TaskFile).filter(TaskFile.task_id == task.id).first()
|
|
if not task_file:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task file not found"
|
|
)
|
|
|
|
file_path = Path(task_file.stored_path)
|
|
if not file_path.exists():
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Source file not found"
|
|
)
|
|
|
|
# Get the page image
|
|
page_num = request.page
|
|
if file_path.suffix.lower() == '.pdf':
|
|
# Convert specific page from PDF
|
|
images = convert_from_path(
|
|
str(file_path),
|
|
first_page=page_num,
|
|
last_page=page_num,
|
|
dpi=150
|
|
)
|
|
if not images:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=f"Page {page_num} not found in PDF"
|
|
)
|
|
original_image = images[0]
|
|
else:
|
|
# Direct image file
|
|
if page_num != 1:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="Single image file only has page 1"
|
|
)
|
|
original_image = Image.open(file_path)
|
|
|
|
# Get preprocessing service
|
|
preprocessing_service = get_layout_preprocessing_service()
|
|
|
|
# Apply preprocessing
|
|
preprocessed_image, preprocess_result = preprocessing_service.preprocess_to_pil(
|
|
original_image,
|
|
mode=request.mode,
|
|
config=request.config
|
|
)
|
|
|
|
# Create result directory for preview images
|
|
preview_dir = Path(settings.result_dir) / task_id / "preview"
|
|
preview_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Save preview images
|
|
original_filename = f"page_{page_num}_original.png"
|
|
preprocessed_filename = f"page_{page_num}_preprocessed.png"
|
|
|
|
original_path = preview_dir / original_filename
|
|
preprocessed_path = preview_dir / preprocessed_filename
|
|
|
|
original_image.save(str(original_path), "PNG")
|
|
preprocessed_image.save(str(preprocessed_path), "PNG")
|
|
|
|
# Build URLs (relative paths that can be served)
|
|
base_url = f"/api/v2/tasks/{task_id}/preview/image"
|
|
original_url = f"{base_url}?type=original&page={page_num}"
|
|
preprocessed_url = f"{base_url}?type=preprocessed&page={page_num}"
|
|
|
|
return PreprocessingPreviewResponse(
|
|
original_url=original_url,
|
|
preprocessed_url=preprocessed_url,
|
|
quality_metrics=preprocess_result.quality_metrics,
|
|
auto_config=preprocess_result.config_used,
|
|
mode_used=request.mode
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.exception(f"Failed to preview preprocessing for task {task_id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to preview preprocessing: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.get("/{task_id}/preview/image", summary="Get preview image")
|
|
async def get_preview_image(
|
|
task_id: str,
|
|
type: str = Query(..., description="Image type: 'original' or 'preprocessed'"),
|
|
page: int = Query(1, ge=1, description="Page number"),
|
|
db: Session = Depends(get_db),
|
|
current_user: User = Depends(get_current_user)
|
|
):
|
|
"""
|
|
Get a preview image (original or preprocessed).
|
|
|
|
- **task_id**: Task UUID
|
|
- **type**: Image type ('original' or 'preprocessed')
|
|
- **page**: Page number
|
|
"""
|
|
try:
|
|
# Verify task ownership
|
|
task = task_service.get_task_by_id(
|
|
db=db,
|
|
task_id=task_id,
|
|
user_id=current_user.id
|
|
)
|
|
|
|
if not task:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Task not found"
|
|
)
|
|
|
|
# Validate type parameter
|
|
if type not in ['original', 'preprocessed']:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="Invalid type. Must be 'original' or 'preprocessed'"
|
|
)
|
|
|
|
# Build image path
|
|
preview_dir = Path(settings.result_dir) / task_id / "preview"
|
|
image_filename = f"page_{page}_{type}.png"
|
|
image_path = preview_dir / image_filename
|
|
|
|
if not image_path.exists():
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail=f"Preview image not found. Please call preview/preprocessing first."
|
|
)
|
|
|
|
return FileResponse(
|
|
path=str(image_path),
|
|
media_type="image/png",
|
|
filename=image_filename
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.exception(f"Failed to get preview image for task {task_id}")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=f"Failed to get preview image: {str(e)}"
|
|
)
|