- Fix duplicate logging in multi-worker mode with file lock for cleanup scheduler - Add Pydantic V2 model_config to suppress protected_namespaces warning - Suppress PaddlePaddle ccache warnings - Fix admin.py using non-existent User.username (now uses email) - Fix get_user_stats to exclude soft-deleted tasks from statistics - Fix create_task to exclude soft-deleted tasks from user limit check - Change LOG_LEVEL default to INFO 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
489 lines
14 KiB
Python
489 lines
14 KiB
Python
"""
|
|
Tool_OCR - Task Management Service
|
|
Handles OCR task CRUD operations with user isolation
|
|
"""
|
|
|
|
from typing import List, Optional, Tuple
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy import and_, or_, desc
|
|
from datetime import datetime, timedelta
|
|
import uuid
|
|
import logging
|
|
|
|
from app.models.task import Task, TaskFile, TaskStatus
|
|
from app.core.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TaskService:
|
|
"""Service for task management with user isolation"""
|
|
|
|
def create_task(
|
|
self,
|
|
db: Session,
|
|
user_id: int,
|
|
filename: Optional[str] = None,
|
|
file_type: Optional[str] = None,
|
|
) -> Task:
|
|
"""
|
|
Create a new task for a user
|
|
|
|
Args:
|
|
db: Database session
|
|
user_id: User ID (for isolation)
|
|
filename: Original filename
|
|
file_type: File MIME type
|
|
|
|
Returns:
|
|
Created Task object
|
|
"""
|
|
# Generate unique task ID
|
|
task_id = str(uuid.uuid4())
|
|
|
|
# Check user's task limit (excluding soft-deleted tasks)
|
|
if settings.max_tasks_per_user > 0:
|
|
user_task_count = db.query(Task).filter(
|
|
and_(Task.user_id == user_id, Task.deleted_at.is_(None))
|
|
).count()
|
|
if user_task_count >= settings.max_tasks_per_user:
|
|
# Auto-delete oldest completed tasks to make room
|
|
self._cleanup_old_tasks(db, user_id, limit=10)
|
|
|
|
# Create task
|
|
task = Task(
|
|
user_id=user_id,
|
|
task_id=task_id,
|
|
filename=filename,
|
|
file_type=file_type,
|
|
status=TaskStatus.PENDING,
|
|
)
|
|
|
|
db.add(task)
|
|
db.commit()
|
|
db.refresh(task)
|
|
|
|
logger.info(f"Created task {task_id} for user {user_id}")
|
|
return task
|
|
|
|
def get_task_by_id(
|
|
self, db: Session, task_id: str, user_id: int, include_deleted: bool = False
|
|
) -> Optional[Task]:
|
|
"""
|
|
Get task by ID with user isolation
|
|
|
|
Args:
|
|
db: Database session
|
|
task_id: Task ID (UUID)
|
|
user_id: User ID (for isolation)
|
|
include_deleted: If True, include soft-deleted tasks
|
|
|
|
Returns:
|
|
Task object or None if not found/unauthorized
|
|
"""
|
|
query = db.query(Task).filter(
|
|
and_(Task.task_id == task_id, Task.user_id == user_id)
|
|
)
|
|
|
|
# Filter out soft-deleted tasks by default
|
|
if not include_deleted:
|
|
query = query.filter(Task.deleted_at.is_(None))
|
|
|
|
return query.first()
|
|
|
|
def get_user_tasks(
|
|
self,
|
|
db: Session,
|
|
user_id: int,
|
|
status: Optional[TaskStatus] = None,
|
|
filename_search: Optional[str] = None,
|
|
date_from: Optional[datetime] = None,
|
|
date_to: Optional[datetime] = None,
|
|
skip: int = 0,
|
|
limit: int = 50,
|
|
order_by: str = "created_at",
|
|
order_desc: bool = True,
|
|
include_deleted: bool = False,
|
|
) -> Tuple[List[Task], int]:
|
|
"""
|
|
Get user's tasks with pagination and filtering
|
|
|
|
Args:
|
|
db: Database session
|
|
user_id: User ID (for isolation)
|
|
status: Filter by status (optional)
|
|
filename_search: Search by filename (partial match, optional)
|
|
date_from: Filter tasks created from this date (optional)
|
|
date_to: Filter tasks created until this date (optional)
|
|
skip: Pagination offset
|
|
limit: Pagination limit
|
|
order_by: Sort field (created_at, updated_at, completed_at)
|
|
order_desc: Sort descending
|
|
include_deleted: If True, include soft-deleted tasks
|
|
|
|
Returns:
|
|
Tuple of (tasks list, total count)
|
|
"""
|
|
# Base query with user isolation
|
|
query = db.query(Task).filter(Task.user_id == user_id)
|
|
|
|
# Filter out soft-deleted tasks by default
|
|
if not include_deleted:
|
|
query = query.filter(Task.deleted_at.is_(None))
|
|
|
|
# Apply status filter
|
|
if status:
|
|
query = query.filter(Task.status == status)
|
|
|
|
# Apply filename search (case-insensitive partial match)
|
|
if filename_search:
|
|
query = query.filter(Task.filename.ilike(f"%{filename_search}%"))
|
|
|
|
# Apply date range filter
|
|
if date_from:
|
|
query = query.filter(Task.created_at >= date_from)
|
|
if date_to:
|
|
# Add one day to include the entire end date
|
|
date_to_end = date_to + timedelta(days=1)
|
|
query = query.filter(Task.created_at < date_to_end)
|
|
|
|
# Get total count
|
|
total = query.count()
|
|
|
|
# Apply sorting
|
|
sort_column = getattr(Task, order_by, Task.created_at)
|
|
if order_desc:
|
|
query = query.order_by(desc(sort_column))
|
|
else:
|
|
query = query.order_by(sort_column)
|
|
|
|
# Apply pagination
|
|
tasks = query.offset(skip).limit(limit).all()
|
|
|
|
return tasks, total
|
|
|
|
def update_task_status(
|
|
self,
|
|
db: Session,
|
|
task_id: str,
|
|
user_id: int,
|
|
status: TaskStatus,
|
|
error_message: Optional[str] = None,
|
|
processing_time_ms: Optional[int] = None,
|
|
) -> Optional[Task]:
|
|
"""
|
|
Update task status with user isolation
|
|
|
|
Args:
|
|
db: Database session
|
|
task_id: Task ID (UUID)
|
|
user_id: User ID (for isolation)
|
|
status: New status
|
|
error_message: Error message if failed
|
|
processing_time_ms: Processing time in milliseconds
|
|
|
|
Returns:
|
|
Updated Task object or None if not found/unauthorized
|
|
"""
|
|
task = self.get_task_by_id(db, task_id, user_id)
|
|
if not task:
|
|
logger.warning(
|
|
f"Task {task_id} not found for user {user_id} during status update"
|
|
)
|
|
return None
|
|
|
|
task.status = status
|
|
task.updated_at = datetime.utcnow()
|
|
|
|
if status == TaskStatus.COMPLETED:
|
|
task.completed_at = datetime.utcnow()
|
|
|
|
if error_message:
|
|
task.error_message = error_message
|
|
|
|
if processing_time_ms is not None:
|
|
task.processing_time_ms = processing_time_ms
|
|
|
|
db.commit()
|
|
db.refresh(task)
|
|
|
|
logger.info(f"Updated task {task_id} status to {status.value}")
|
|
return task
|
|
|
|
def update_task_results(
|
|
self,
|
|
db: Session,
|
|
task_id: str,
|
|
user_id: int,
|
|
result_json_path: Optional[str] = None,
|
|
result_markdown_path: Optional[str] = None,
|
|
result_pdf_path: Optional[str] = None,
|
|
) -> Optional[Task]:
|
|
"""
|
|
Update task result file paths
|
|
|
|
Args:
|
|
db: Database session
|
|
task_id: Task ID (UUID)
|
|
user_id: User ID (for isolation)
|
|
result_json_path: Path to JSON result
|
|
result_markdown_path: Path to Markdown result
|
|
result_pdf_path: Path to searchable PDF
|
|
|
|
Returns:
|
|
Updated Task object or None if not found/unauthorized
|
|
"""
|
|
task = self.get_task_by_id(db, task_id, user_id)
|
|
if not task:
|
|
return None
|
|
|
|
if result_json_path:
|
|
task.result_json_path = result_json_path
|
|
if result_markdown_path:
|
|
task.result_markdown_path = result_markdown_path
|
|
if result_pdf_path:
|
|
task.result_pdf_path = result_pdf_path
|
|
|
|
task.updated_at = datetime.utcnow()
|
|
|
|
db.commit()
|
|
db.refresh(task)
|
|
|
|
logger.info(f"Updated task {task_id} result paths")
|
|
return task
|
|
|
|
def delete_task(
|
|
self, db: Session, task_id: str, user_id: int
|
|
) -> bool:
|
|
"""
|
|
Soft delete task with user isolation.
|
|
Sets deleted_at timestamp instead of removing record.
|
|
Database records are preserved for statistics tracking.
|
|
|
|
Args:
|
|
db: Database session
|
|
task_id: Task ID (UUID)
|
|
user_id: User ID (for isolation)
|
|
|
|
Returns:
|
|
True if soft deleted, False if not found/unauthorized
|
|
"""
|
|
task = self.get_task_by_id(db, task_id, user_id)
|
|
if not task:
|
|
return False
|
|
|
|
# Soft delete: set deleted_at timestamp
|
|
task.deleted_at = datetime.utcnow()
|
|
task.updated_at = datetime.utcnow()
|
|
db.commit()
|
|
|
|
logger.info(f"Soft deleted task {task_id} for user {user_id}")
|
|
return True
|
|
|
|
def _cleanup_old_tasks(
|
|
self, db: Session, user_id: int, limit: int = 10
|
|
) -> int:
|
|
"""
|
|
Clean up old completed tasks for a user
|
|
|
|
Args:
|
|
db: Database session
|
|
user_id: User ID
|
|
limit: Number of tasks to delete
|
|
|
|
Returns:
|
|
Number of tasks deleted
|
|
"""
|
|
# Find oldest completed tasks
|
|
old_tasks = (
|
|
db.query(Task)
|
|
.filter(
|
|
and_(
|
|
Task.user_id == user_id,
|
|
Task.status == TaskStatus.COMPLETED,
|
|
)
|
|
)
|
|
.order_by(Task.completed_at)
|
|
.limit(limit)
|
|
.all()
|
|
)
|
|
|
|
count = 0
|
|
for task in old_tasks:
|
|
db.delete(task)
|
|
count += 1
|
|
|
|
if count > 0:
|
|
db.commit()
|
|
logger.info(f"Cleaned up {count} old tasks for user {user_id}")
|
|
|
|
return count
|
|
|
|
def auto_cleanup_expired_tasks(self, db: Session) -> int:
|
|
"""
|
|
Auto-cleanup tasks older than TASK_RETENTION_DAYS
|
|
|
|
Args:
|
|
db: Database session
|
|
|
|
Returns:
|
|
Number of tasks deleted
|
|
"""
|
|
if settings.task_retention_days <= 0:
|
|
return 0
|
|
|
|
cutoff_date = datetime.utcnow() - timedelta(days=settings.task_retention_days)
|
|
|
|
# Find expired tasks
|
|
expired_tasks = (
|
|
db.query(Task)
|
|
.filter(
|
|
and_(
|
|
Task.status == TaskStatus.COMPLETED,
|
|
Task.completed_at < cutoff_date,
|
|
)
|
|
)
|
|
.all()
|
|
)
|
|
|
|
count = 0
|
|
for task in expired_tasks:
|
|
task.file_deleted = True
|
|
# TODO: Delete actual files from disk
|
|
db.delete(task)
|
|
count += 1
|
|
|
|
if count > 0:
|
|
db.commit()
|
|
logger.info(f"Auto-cleaned up {count} expired tasks")
|
|
|
|
return count
|
|
|
|
def get_user_stats(self, db: Session, user_id: int) -> dict:
|
|
"""
|
|
Get statistics for a user's tasks (excluding soft-deleted tasks)
|
|
|
|
Args:
|
|
db: Database session
|
|
user_id: User ID
|
|
|
|
Returns:
|
|
Dictionary with task statistics
|
|
"""
|
|
# Base filter: user's non-deleted tasks
|
|
base_filter = and_(Task.user_id == user_id, Task.deleted_at.is_(None))
|
|
|
|
total = db.query(Task).filter(base_filter).count()
|
|
|
|
pending = (
|
|
db.query(Task)
|
|
.filter(and_(base_filter, Task.status == TaskStatus.PENDING))
|
|
.count()
|
|
)
|
|
|
|
processing = (
|
|
db.query(Task)
|
|
.filter(and_(base_filter, Task.status == TaskStatus.PROCESSING))
|
|
.count()
|
|
)
|
|
|
|
completed = (
|
|
db.query(Task)
|
|
.filter(and_(base_filter, Task.status == TaskStatus.COMPLETED))
|
|
.count()
|
|
)
|
|
|
|
failed = (
|
|
db.query(Task)
|
|
.filter(and_(base_filter, Task.status == TaskStatus.FAILED))
|
|
.count()
|
|
)
|
|
|
|
return {
|
|
"total": total,
|
|
"pending": pending,
|
|
"processing": processing,
|
|
"completed": completed,
|
|
"failed": failed,
|
|
}
|
|
|
|
def get_all_tasks_admin(
|
|
self,
|
|
db: Session,
|
|
user_id: Optional[int] = None,
|
|
status: Optional[TaskStatus] = None,
|
|
include_deleted: bool = True,
|
|
include_files_deleted: bool = True,
|
|
skip: int = 0,
|
|
limit: int = 50,
|
|
order_by: str = "created_at",
|
|
order_desc: bool = True,
|
|
) -> Tuple[List[Task], int]:
|
|
"""
|
|
Get all tasks for admin view (no user isolation).
|
|
Includes soft-deleted tasks by default.
|
|
|
|
Args:
|
|
db: Database session
|
|
user_id: Filter by user ID (optional)
|
|
status: Filter by status (optional)
|
|
include_deleted: Include soft-deleted tasks (default True)
|
|
include_files_deleted: Include tasks with deleted files (default True)
|
|
skip: Pagination offset
|
|
limit: Pagination limit
|
|
order_by: Sort field
|
|
order_desc: Sort descending
|
|
|
|
Returns:
|
|
Tuple of (tasks list, total count)
|
|
"""
|
|
query = db.query(Task)
|
|
|
|
# Optional user filter
|
|
if user_id is not None:
|
|
query = query.filter(Task.user_id == user_id)
|
|
|
|
# Filter soft-deleted if requested
|
|
if not include_deleted:
|
|
query = query.filter(Task.deleted_at.is_(None))
|
|
|
|
# Filter file-deleted if requested
|
|
if not include_files_deleted:
|
|
query = query.filter(Task.file_deleted == False)
|
|
|
|
# Apply status filter
|
|
if status:
|
|
query = query.filter(Task.status == status)
|
|
|
|
# Get total count
|
|
total = query.count()
|
|
|
|
# Apply sorting
|
|
sort_column = getattr(Task, order_by, Task.created_at)
|
|
if order_desc:
|
|
query = query.order_by(desc(sort_column))
|
|
else:
|
|
query = query.order_by(sort_column)
|
|
|
|
# Apply pagination
|
|
tasks = query.offset(skip).limit(limit).all()
|
|
|
|
return tasks, total
|
|
|
|
def get_task_by_id_admin(self, db: Session, task_id: str) -> Optional[Task]:
|
|
"""
|
|
Get task by ID for admin (no user isolation, includes deleted).
|
|
|
|
Args:
|
|
db: Database session
|
|
task_id: Task ID (UUID)
|
|
|
|
Returns:
|
|
Task object or None if not found
|
|
"""
|
|
return db.query(Task).filter(Task.task_id == task_id).first()
|
|
|
|
|
|
# Global service instance
|
|
task_service = TaskService()
|