feat: complete external auth V2 migration with advanced features
This commit implements comprehensive external Azure AD authentication with complete task management, file download, and admin monitoring systems. ## Core Features Implemented (80% Complete) ### 1. Token Auto-Refresh Mechanism ✅ - Backend: POST /api/v2/auth/refresh endpoint - Frontend: Auto-refresh 5 minutes before expiration - Auto-retry on 401 errors with seamless token refresh ### 2. File Download System ✅ - Three format support: JSON / Markdown / PDF - Endpoints: GET /api/v2/tasks/{id}/download/{format} - File access control with ownership validation - Frontend download buttons in TaskHistoryPage ### 3. Complete Task Management ✅ Backend Endpoints: - POST /api/v2/tasks/{id}/start - Start task - POST /api/v2/tasks/{id}/cancel - Cancel task - POST /api/v2/tasks/{id}/retry - Retry failed task - GET /api/v2/tasks - List with filters (status, filename, date range) - GET /api/v2/tasks/stats - User statistics Frontend Features: - Status-based action buttons (Start/Cancel/Retry) - Advanced search and filtering (status, filename, date range) - Pagination and sorting - Task statistics dashboard (5 stat cards) ### 4. Admin Monitoring System ✅ (Backend) Admin APIs: - GET /api/v2/admin/stats - System statistics - GET /api/v2/admin/users - User list with stats - GET /api/v2/admin/users/top - User leaderboard - GET /api/v2/admin/audit-logs - Audit log query system - GET /api/v2/admin/audit-logs/user/{id}/summary Admin Features: - Email-based admin check (ymirliu@panjit.com.tw) - Comprehensive system metrics (users, tasks, sessions, activity) - Audit logging service for security tracking ### 5. User Isolation & Security ✅ - Row-level security on all task queries - File access control with ownership validation - Strict user_id filtering on all operations - Session validation and expiry checking - Admin privilege verification ## New Files Created Backend: - backend/app/models/user_v2.py - User model for external auth - backend/app/models/task.py - Task model with user isolation - backend/app/models/session.py - Session management - backend/app/models/audit_log.py - Audit log model - backend/app/services/external_auth_service.py - External API client - backend/app/services/task_service.py - Task CRUD with isolation - backend/app/services/file_access_service.py - File access control - backend/app/services/admin_service.py - Admin operations - backend/app/services/audit_service.py - Audit logging - backend/app/routers/auth_v2.py - V2 auth endpoints - backend/app/routers/tasks.py - Task management endpoints - backend/app/routers/admin.py - Admin endpoints - backend/alembic/versions/5e75a59fb763_*.py - DB migration Frontend: - frontend/src/services/apiV2.ts - Complete V2 API client - frontend/src/types/apiV2.ts - V2 type definitions - frontend/src/pages/TaskHistoryPage.tsx - Task history UI Modified Files: - backend/app/core/deps.py - Added get_current_admin_user_v2 - backend/app/main.py - Registered admin router - frontend/src/pages/LoginPage.tsx - V2 login integration - frontend/src/components/Layout.tsx - User display and logout - frontend/src/App.tsx - Added /tasks route ## Documentation - openspec/changes/.../PROGRESS_UPDATE.md - Detailed progress report ## Pending Items (20%) 1. Database migration execution for audit_logs table 2. Frontend admin dashboard page 3. Frontend audit log viewer ## Testing Status - Manual testing: ✅ Authentication flow verified - Unit tests: ⏳ Pending - Integration tests: ⏳ Pending ## Security Enhancements - ✅ User isolation (row-level security) - ✅ File access control - ✅ Token expiry validation - ✅ Admin privilege verification - ✅ Audit logging infrastructure - ⏳ Token encryption (noted, low priority) - ⏳ Rate limiting (noted, low priority) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
394
backend/app/services/task_service.py
Normal file
394
backend/app/services/task_service.py
Normal file
@@ -0,0 +1,394 @@
|
||||
"""
|
||||
Tool_OCR - Task Management Service
|
||||
Handles OCR task CRUD operations with user isolation
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, or_, desc
|
||||
from datetime import datetime, timedelta
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
from app.models.task import Task, TaskFile, TaskStatus
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TaskService:
|
||||
"""Service for task management with user isolation"""
|
||||
|
||||
def create_task(
|
||||
self,
|
||||
db: Session,
|
||||
user_id: int,
|
||||
filename: Optional[str] = None,
|
||||
file_type: Optional[str] = None,
|
||||
) -> Task:
|
||||
"""
|
||||
Create a new task for a user
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: User ID (for isolation)
|
||||
filename: Original filename
|
||||
file_type: File MIME type
|
||||
|
||||
Returns:
|
||||
Created Task object
|
||||
"""
|
||||
# Generate unique task ID
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
# Check user's task limit
|
||||
if settings.max_tasks_per_user > 0:
|
||||
user_task_count = db.query(Task).filter(Task.user_id == user_id).count()
|
||||
if user_task_count >= settings.max_tasks_per_user:
|
||||
# Auto-delete oldest completed tasks to make room
|
||||
self._cleanup_old_tasks(db, user_id, limit=10)
|
||||
|
||||
# Create task
|
||||
task = Task(
|
||||
user_id=user_id,
|
||||
task_id=task_id,
|
||||
filename=filename,
|
||||
file_type=file_type,
|
||||
status=TaskStatus.PENDING,
|
||||
)
|
||||
|
||||
db.add(task)
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
logger.info(f"Created task {task_id} for user {user_id}")
|
||||
return task
|
||||
|
||||
def get_task_by_id(
|
||||
self, db: Session, task_id: str, user_id: int
|
||||
) -> Optional[Task]:
|
||||
"""
|
||||
Get task by ID with user isolation
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
task_id: Task ID (UUID)
|
||||
user_id: User ID (for isolation)
|
||||
|
||||
Returns:
|
||||
Task object or None if not found/unauthorized
|
||||
"""
|
||||
task = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.task_id == task_id, Task.user_id == user_id))
|
||||
.first()
|
||||
)
|
||||
return task
|
||||
|
||||
def get_user_tasks(
|
||||
self,
|
||||
db: Session,
|
||||
user_id: int,
|
||||
status: Optional[TaskStatus] = None,
|
||||
filename_search: Optional[str] = None,
|
||||
date_from: Optional[datetime] = None,
|
||||
date_to: Optional[datetime] = None,
|
||||
skip: int = 0,
|
||||
limit: int = 50,
|
||||
order_by: str = "created_at",
|
||||
order_desc: bool = True,
|
||||
) -> Tuple[List[Task], int]:
|
||||
"""
|
||||
Get user's tasks with pagination and filtering
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: User ID (for isolation)
|
||||
status: Filter by status (optional)
|
||||
filename_search: Search by filename (partial match, optional)
|
||||
date_from: Filter tasks created from this date (optional)
|
||||
date_to: Filter tasks created until this date (optional)
|
||||
skip: Pagination offset
|
||||
limit: Pagination limit
|
||||
order_by: Sort field (created_at, updated_at, completed_at)
|
||||
order_desc: Sort descending
|
||||
|
||||
Returns:
|
||||
Tuple of (tasks list, total count)
|
||||
"""
|
||||
# Base query with user isolation
|
||||
query = db.query(Task).filter(Task.user_id == user_id)
|
||||
|
||||
# Apply status filter
|
||||
if status:
|
||||
query = query.filter(Task.status == status)
|
||||
|
||||
# Apply filename search (case-insensitive partial match)
|
||||
if filename_search:
|
||||
query = query.filter(Task.filename.ilike(f"%{filename_search}%"))
|
||||
|
||||
# Apply date range filter
|
||||
if date_from:
|
||||
query = query.filter(Task.created_at >= date_from)
|
||||
if date_to:
|
||||
# Add one day to include the entire end date
|
||||
date_to_end = date_to + timedelta(days=1)
|
||||
query = query.filter(Task.created_at < date_to_end)
|
||||
|
||||
# Get total count
|
||||
total = query.count()
|
||||
|
||||
# Apply sorting
|
||||
sort_column = getattr(Task, order_by, Task.created_at)
|
||||
if order_desc:
|
||||
query = query.order_by(desc(sort_column))
|
||||
else:
|
||||
query = query.order_by(sort_column)
|
||||
|
||||
# Apply pagination
|
||||
tasks = query.offset(skip).limit(limit).all()
|
||||
|
||||
return tasks, total
|
||||
|
||||
def update_task_status(
|
||||
self,
|
||||
db: Session,
|
||||
task_id: str,
|
||||
user_id: int,
|
||||
status: TaskStatus,
|
||||
error_message: Optional[str] = None,
|
||||
processing_time_ms: Optional[int] = None,
|
||||
) -> Optional[Task]:
|
||||
"""
|
||||
Update task status with user isolation
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
task_id: Task ID (UUID)
|
||||
user_id: User ID (for isolation)
|
||||
status: New status
|
||||
error_message: Error message if failed
|
||||
processing_time_ms: Processing time in milliseconds
|
||||
|
||||
Returns:
|
||||
Updated Task object or None if not found/unauthorized
|
||||
"""
|
||||
task = self.get_task_by_id(db, task_id, user_id)
|
||||
if not task:
|
||||
logger.warning(
|
||||
f"Task {task_id} not found for user {user_id} during status update"
|
||||
)
|
||||
return None
|
||||
|
||||
task.status = status
|
||||
task.updated_at = datetime.utcnow()
|
||||
|
||||
if status == TaskStatus.COMPLETED:
|
||||
task.completed_at = datetime.utcnow()
|
||||
|
||||
if error_message:
|
||||
task.error_message = error_message
|
||||
|
||||
if processing_time_ms is not None:
|
||||
task.processing_time_ms = processing_time_ms
|
||||
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
logger.info(f"Updated task {task_id} status to {status.value}")
|
||||
return task
|
||||
|
||||
def update_task_results(
|
||||
self,
|
||||
db: Session,
|
||||
task_id: str,
|
||||
user_id: int,
|
||||
result_json_path: Optional[str] = None,
|
||||
result_markdown_path: Optional[str] = None,
|
||||
result_pdf_path: Optional[str] = None,
|
||||
) -> Optional[Task]:
|
||||
"""
|
||||
Update task result file paths
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
task_id: Task ID (UUID)
|
||||
user_id: User ID (for isolation)
|
||||
result_json_path: Path to JSON result
|
||||
result_markdown_path: Path to Markdown result
|
||||
result_pdf_path: Path to searchable PDF
|
||||
|
||||
Returns:
|
||||
Updated Task object or None if not found/unauthorized
|
||||
"""
|
||||
task = self.get_task_by_id(db, task_id, user_id)
|
||||
if not task:
|
||||
return None
|
||||
|
||||
if result_json_path:
|
||||
task.result_json_path = result_json_path
|
||||
if result_markdown_path:
|
||||
task.result_markdown_path = result_markdown_path
|
||||
if result_pdf_path:
|
||||
task.result_pdf_path = result_pdf_path
|
||||
|
||||
task.updated_at = datetime.utcnow()
|
||||
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
logger.info(f"Updated task {task_id} result paths")
|
||||
return task
|
||||
|
||||
def delete_task(
|
||||
self, db: Session, task_id: str, user_id: int
|
||||
) -> bool:
|
||||
"""
|
||||
Delete task with user isolation
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
task_id: Task ID (UUID)
|
||||
user_id: User ID (for isolation)
|
||||
|
||||
Returns:
|
||||
True if deleted, False if not found/unauthorized
|
||||
"""
|
||||
task = self.get_task_by_id(db, task_id, user_id)
|
||||
if not task:
|
||||
return False
|
||||
|
||||
# Cascade delete will handle task_files
|
||||
db.delete(task)
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Deleted task {task_id} for user {user_id}")
|
||||
return True
|
||||
|
||||
def _cleanup_old_tasks(
|
||||
self, db: Session, user_id: int, limit: int = 10
|
||||
) -> int:
|
||||
"""
|
||||
Clean up old completed tasks for a user
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: User ID
|
||||
limit: Number of tasks to delete
|
||||
|
||||
Returns:
|
||||
Number of tasks deleted
|
||||
"""
|
||||
# Find oldest completed tasks
|
||||
old_tasks = (
|
||||
db.query(Task)
|
||||
.filter(
|
||||
and_(
|
||||
Task.user_id == user_id,
|
||||
Task.status == TaskStatus.COMPLETED,
|
||||
)
|
||||
)
|
||||
.order_by(Task.completed_at)
|
||||
.limit(limit)
|
||||
.all()
|
||||
)
|
||||
|
||||
count = 0
|
||||
for task in old_tasks:
|
||||
db.delete(task)
|
||||
count += 1
|
||||
|
||||
if count > 0:
|
||||
db.commit()
|
||||
logger.info(f"Cleaned up {count} old tasks for user {user_id}")
|
||||
|
||||
return count
|
||||
|
||||
def auto_cleanup_expired_tasks(self, db: Session) -> int:
|
||||
"""
|
||||
Auto-cleanup tasks older than TASK_RETENTION_DAYS
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Number of tasks deleted
|
||||
"""
|
||||
if settings.task_retention_days <= 0:
|
||||
return 0
|
||||
|
||||
cutoff_date = datetime.utcnow() - timedelta(days=settings.task_retention_days)
|
||||
|
||||
# Find expired tasks
|
||||
expired_tasks = (
|
||||
db.query(Task)
|
||||
.filter(
|
||||
and_(
|
||||
Task.status == TaskStatus.COMPLETED,
|
||||
Task.completed_at < cutoff_date,
|
||||
)
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
count = 0
|
||||
for task in expired_tasks:
|
||||
task.file_deleted = True
|
||||
# TODO: Delete actual files from disk
|
||||
db.delete(task)
|
||||
count += 1
|
||||
|
||||
if count > 0:
|
||||
db.commit()
|
||||
logger.info(f"Auto-cleaned up {count} expired tasks")
|
||||
|
||||
return count
|
||||
|
||||
def get_user_stats(self, db: Session, user_id: int) -> dict:
|
||||
"""
|
||||
Get statistics for a user's tasks
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
Dictionary with task statistics
|
||||
"""
|
||||
total = db.query(Task).filter(Task.user_id == user_id).count()
|
||||
|
||||
pending = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PENDING))
|
||||
.count()
|
||||
)
|
||||
|
||||
processing = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PROCESSING))
|
||||
.count()
|
||||
)
|
||||
|
||||
completed = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.COMPLETED))
|
||||
.count()
|
||||
)
|
||||
|
||||
failed = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.FAILED))
|
||||
.count()
|
||||
)
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"pending": pending,
|
||||
"processing": processing,
|
||||
"completed": completed,
|
||||
"failed": failed,
|
||||
}
|
||||
|
||||
|
||||
# Global service instance
|
||||
task_service = TaskService()
|
||||
Reference in New Issue
Block a user