""" Tool_OCR - Task Management Service Handles OCR task CRUD operations with user isolation """ from typing import List, Optional, Tuple from sqlalchemy.orm import Session from sqlalchemy import and_, or_, desc from datetime import datetime, timedelta import uuid import logging from app.models.task import Task, TaskFile, TaskStatus from app.core.config import settings logger = logging.getLogger(__name__) class TaskService: """Service for task management with user isolation""" def create_task( self, db: Session, user_id: int, filename: Optional[str] = None, file_type: Optional[str] = None, ) -> Task: """ Create a new task for a user Args: db: Database session user_id: User ID (for isolation) filename: Original filename file_type: File MIME type Returns: Created Task object """ # Generate unique task ID task_id = str(uuid.uuid4()) # Check user's task limit (excluding soft-deleted tasks) if settings.max_tasks_per_user > 0: user_task_count = db.query(Task).filter( and_(Task.user_id == user_id, Task.deleted_at.is_(None)) ).count() if user_task_count >= settings.max_tasks_per_user: # Auto-delete oldest completed tasks to make room self._cleanup_old_tasks(db, user_id, limit=10) # Create task task = Task( user_id=user_id, task_id=task_id, filename=filename, file_type=file_type, status=TaskStatus.PENDING, ) db.add(task) db.commit() db.refresh(task) logger.info(f"Created task {task_id} for user {user_id}") return task def get_task_by_id( self, db: Session, task_id: str, user_id: int, include_deleted: bool = False ) -> Optional[Task]: """ Get task by ID with user isolation Args: db: Database session task_id: Task ID (UUID) user_id: User ID (for isolation) include_deleted: If True, include soft-deleted tasks Returns: Task object or None if not found/unauthorized """ query = db.query(Task).filter( and_(Task.task_id == task_id, Task.user_id == user_id) ) # Filter out soft-deleted tasks by default if not include_deleted: query = query.filter(Task.deleted_at.is_(None)) return query.first() def get_user_tasks( self, db: Session, user_id: int, status: Optional[TaskStatus] = None, filename_search: Optional[str] = None, date_from: Optional[datetime] = None, date_to: Optional[datetime] = None, skip: int = 0, limit: int = 50, order_by: str = "created_at", order_desc: bool = True, include_deleted: bool = False, ) -> Tuple[List[Task], int]: """ Get user's tasks with pagination and filtering Args: db: Database session user_id: User ID (for isolation) status: Filter by status (optional) filename_search: Search by filename (partial match, optional) date_from: Filter tasks created from this date (optional) date_to: Filter tasks created until this date (optional) skip: Pagination offset limit: Pagination limit order_by: Sort field (created_at, updated_at, completed_at) order_desc: Sort descending include_deleted: If True, include soft-deleted tasks Returns: Tuple of (tasks list, total count) """ # Base query with user isolation query = db.query(Task).filter(Task.user_id == user_id) # Filter out soft-deleted tasks by default if not include_deleted: query = query.filter(Task.deleted_at.is_(None)) # Apply status filter if status: query = query.filter(Task.status == status) # Apply filename search (case-insensitive partial match) if filename_search: query = query.filter(Task.filename.ilike(f"%{filename_search}%")) # Apply date range filter if date_from: query = query.filter(Task.created_at >= date_from) if date_to: # Add one day to include the entire end date date_to_end = date_to + timedelta(days=1) query = query.filter(Task.created_at < date_to_end) # Get total count total = query.count() # Apply sorting sort_column = getattr(Task, order_by, Task.created_at) if order_desc: query = query.order_by(desc(sort_column)) else: query = query.order_by(sort_column) # Apply pagination tasks = query.offset(skip).limit(limit).all() return tasks, total def update_task_status( self, db: Session, task_id: str, user_id: int, status: TaskStatus, error_message: Optional[str] = None, processing_time_ms: Optional[int] = None, ) -> Optional[Task]: """ Update task status with user isolation Args: db: Database session task_id: Task ID (UUID) user_id: User ID (for isolation) status: New status error_message: Error message if failed processing_time_ms: Processing time in milliseconds Returns: Updated Task object or None if not found/unauthorized """ task = self.get_task_by_id(db, task_id, user_id) if not task: logger.warning( f"Task {task_id} not found for user {user_id} during status update" ) return None task.status = status task.updated_at = datetime.utcnow() if status == TaskStatus.COMPLETED: task.completed_at = datetime.utcnow() if error_message: task.error_message = error_message if processing_time_ms is not None: task.processing_time_ms = processing_time_ms db.commit() db.refresh(task) logger.info(f"Updated task {task_id} status to {status.value}") return task def update_task_results( self, db: Session, task_id: str, user_id: int, result_json_path: Optional[str] = None, result_markdown_path: Optional[str] = None, result_pdf_path: Optional[str] = None, ) -> Optional[Task]: """ Update task result file paths Args: db: Database session task_id: Task ID (UUID) user_id: User ID (for isolation) result_json_path: Path to JSON result result_markdown_path: Path to Markdown result result_pdf_path: Path to searchable PDF Returns: Updated Task object or None if not found/unauthorized """ task = self.get_task_by_id(db, task_id, user_id) if not task: return None if result_json_path: task.result_json_path = result_json_path if result_markdown_path: task.result_markdown_path = result_markdown_path if result_pdf_path: task.result_pdf_path = result_pdf_path task.updated_at = datetime.utcnow() db.commit() db.refresh(task) logger.info(f"Updated task {task_id} result paths") return task def delete_task( self, db: Session, task_id: str, user_id: int ) -> bool: """ Soft delete task with user isolation. Sets deleted_at timestamp instead of removing record. Database records are preserved for statistics tracking. Args: db: Database session task_id: Task ID (UUID) user_id: User ID (for isolation) Returns: True if soft deleted, False if not found/unauthorized """ task = self.get_task_by_id(db, task_id, user_id) if not task: return False # Soft delete: set deleted_at timestamp task.deleted_at = datetime.utcnow() task.updated_at = datetime.utcnow() db.commit() logger.info(f"Soft deleted task {task_id} for user {user_id}") return True def _cleanup_old_tasks( self, db: Session, user_id: int, limit: int = 10 ) -> int: """ Clean up old completed tasks for a user Args: db: Database session user_id: User ID limit: Number of tasks to delete Returns: Number of tasks deleted """ # Find oldest completed tasks old_tasks = ( db.query(Task) .filter( and_( Task.user_id == user_id, Task.status == TaskStatus.COMPLETED, ) ) .order_by(Task.completed_at) .limit(limit) .all() ) count = 0 for task in old_tasks: db.delete(task) count += 1 if count > 0: db.commit() logger.info(f"Cleaned up {count} old tasks for user {user_id}") return count def auto_cleanup_expired_tasks(self, db: Session) -> int: """ Auto-cleanup tasks older than TASK_RETENTION_DAYS Args: db: Database session Returns: Number of tasks deleted """ if settings.task_retention_days <= 0: return 0 cutoff_date = datetime.utcnow() - timedelta(days=settings.task_retention_days) # Find expired tasks expired_tasks = ( db.query(Task) .filter( and_( Task.status == TaskStatus.COMPLETED, Task.completed_at < cutoff_date, ) ) .all() ) count = 0 for task in expired_tasks: task.file_deleted = True # TODO: Delete actual files from disk db.delete(task) count += 1 if count > 0: db.commit() logger.info(f"Auto-cleaned up {count} expired tasks") return count def get_user_stats(self, db: Session, user_id: int) -> dict: """ Get statistics for a user's tasks (excluding soft-deleted tasks) Args: db: Database session user_id: User ID Returns: Dictionary with task statistics """ # Base filter: user's non-deleted tasks base_filter = and_(Task.user_id == user_id, Task.deleted_at.is_(None)) total = db.query(Task).filter(base_filter).count() pending = ( db.query(Task) .filter(and_(base_filter, Task.status == TaskStatus.PENDING)) .count() ) processing = ( db.query(Task) .filter(and_(base_filter, Task.status == TaskStatus.PROCESSING)) .count() ) completed = ( db.query(Task) .filter(and_(base_filter, Task.status == TaskStatus.COMPLETED)) .count() ) failed = ( db.query(Task) .filter(and_(base_filter, Task.status == TaskStatus.FAILED)) .count() ) return { "total": total, "pending": pending, "processing": processing, "completed": completed, "failed": failed, } def get_all_tasks_admin( self, db: Session, user_id: Optional[int] = None, status: Optional[TaskStatus] = None, include_deleted: bool = True, include_files_deleted: bool = True, skip: int = 0, limit: int = 50, order_by: str = "created_at", order_desc: bool = True, ) -> Tuple[List[Task], int]: """ Get all tasks for admin view (no user isolation). Includes soft-deleted tasks by default. Args: db: Database session user_id: Filter by user ID (optional) status: Filter by status (optional) include_deleted: Include soft-deleted tasks (default True) include_files_deleted: Include tasks with deleted files (default True) skip: Pagination offset limit: Pagination limit order_by: Sort field order_desc: Sort descending Returns: Tuple of (tasks list, total count) """ query = db.query(Task) # Optional user filter if user_id is not None: query = query.filter(Task.user_id == user_id) # Filter soft-deleted if requested if not include_deleted: query = query.filter(Task.deleted_at.is_(None)) # Filter file-deleted if requested if not include_files_deleted: query = query.filter(Task.file_deleted == False) # Apply status filter if status: query = query.filter(Task.status == status) # Get total count total = query.count() # Apply sorting sort_column = getattr(Task, order_by, Task.created_at) if order_desc: query = query.order_by(desc(sort_column)) else: query = query.order_by(sort_column) # Apply pagination tasks = query.offset(skip).limit(limit).all() return tasks, total def get_task_by_id_admin(self, db: Session, task_id: str) -> Optional[Task]: """ Get task by ID for admin (no user isolation, includes deleted). Args: db: Database session task_id: Task ID (UUID) Returns: Task object or None if not found """ return db.query(Task).filter(Task.task_id == task_id).first() # Global service instance task_service = TaskService()