feat: add storage cleanup mechanism with soft delete and auto scheduler
- Add soft delete (deleted_at column) to preserve task records for statistics - Implement cleanup service to delete old files while keeping DB records - Add automatic cleanup scheduler (configurable interval, default 24h) - Add admin endpoints: storage stats, cleanup trigger, scheduler status - Update task service with admin views (include deleted/files_deleted) - Add frontend storage management UI in admin dashboard - Add i18n translations for storage management 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
"""add_deleted_at_to_tasks
|
||||
|
||||
Revision ID: f3d499f5d0cf
|
||||
Revises: g2b3c4d5e6f7
|
||||
Create Date: 2025-12-14 12:17:25.176482
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'f3d499f5d0cf'
|
||||
down_revision: Union[str, None] = 'g2b3c4d5e6f7'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Add deleted_at column for soft delete support."""
|
||||
op.add_column(
|
||||
'tool_ocr_tasks',
|
||||
sa.Column('deleted_at', sa.DateTime(), nullable=True,
|
||||
comment='Soft delete timestamp - NULL means not deleted')
|
||||
)
|
||||
op.create_index('ix_tool_ocr_tasks_deleted_at', 'tool_ocr_tasks', ['deleted_at'])
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove deleted_at column."""
|
||||
op.drop_index('ix_tool_ocr_tasks_deleted_at', table_name='tool_ocr_tasks')
|
||||
op.drop_column('tool_ocr_tasks', 'deleted_at')
|
||||
@@ -55,6 +55,11 @@ class Settings(BaseSettings):
|
||||
task_retention_days: int = Field(default=30)
|
||||
max_tasks_per_user: int = Field(default=1000)
|
||||
|
||||
# ===== Storage Cleanup Configuration =====
|
||||
cleanup_enabled: bool = Field(default=True, description="Enable automatic file cleanup")
|
||||
cleanup_interval_hours: int = Field(default=24, description="Hours between cleanup runs")
|
||||
max_files_per_user: int = Field(default=50, description="Max task files to keep per user")
|
||||
|
||||
# ===== OCR Configuration =====
|
||||
# Note: PaddleOCR models are stored in ~/.paddleocr/ and ~/.paddlex/ by default
|
||||
ocr_languages: str = Field(default="ch,en,japan,korean")
|
||||
|
||||
@@ -216,6 +216,15 @@ async def lifespan(app: FastAPI):
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to initialize prediction semaphore: {e}")
|
||||
|
||||
# Initialize cleanup scheduler if enabled
|
||||
if settings.cleanup_enabled:
|
||||
try:
|
||||
from app.services.cleanup_scheduler import start_cleanup_scheduler
|
||||
await start_cleanup_scheduler()
|
||||
logger.info("Cleanup scheduler initialized")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to initialize cleanup scheduler: {e}")
|
||||
|
||||
logger.info("Application startup complete")
|
||||
|
||||
yield
|
||||
@@ -223,6 +232,15 @@ async def lifespan(app: FastAPI):
|
||||
# Shutdown
|
||||
logger.info("Shutting down Tool_OCR application...")
|
||||
|
||||
# Stop cleanup scheduler
|
||||
if settings.cleanup_enabled:
|
||||
try:
|
||||
from app.services.cleanup_scheduler import stop_cleanup_scheduler
|
||||
await stop_cleanup_scheduler()
|
||||
logger.info("Cleanup scheduler stopped")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error stopping cleanup scheduler: {e}")
|
||||
|
||||
# Connection draining - wait for active requests to complete
|
||||
await drain_connections(timeout=30.0)
|
||||
|
||||
|
||||
@@ -55,6 +55,8 @@ class Task(Base):
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
file_deleted = Column(Boolean, default=False, nullable=False,
|
||||
comment="Track if files were auto-deleted")
|
||||
deleted_at = Column(DateTime, nullable=True, index=True,
|
||||
comment="Soft delete timestamp - NULL means not deleted")
|
||||
|
||||
# Relationships
|
||||
user = relationship("User", back_populates="tasks")
|
||||
@@ -79,7 +81,8 @@ class Task(Base):
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
||||
"completed_at": self.completed_at.isoformat() if self.completed_at else None,
|
||||
"file_deleted": self.file_deleted
|
||||
"file_deleted": self.file_deleted,
|
||||
"deleted_at": self.deleted_at.isoformat() if self.deleted_at else None
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -11,9 +11,14 @@ from fastapi import APIRouter, Depends, HTTPException, status, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.deps import get_db, get_current_admin_user
|
||||
from app.core.config import settings
|
||||
from app.models.user import User
|
||||
from app.models.task import TaskStatus
|
||||
from app.services.admin_service import admin_service
|
||||
from app.services.audit_service import audit_service
|
||||
from app.services.task_service import task_service
|
||||
from app.services.cleanup_service import cleanup_service
|
||||
from app.services.cleanup_scheduler import get_cleanup_scheduler
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -217,3 +222,198 @@ async def get_translation_stats(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get translation statistics: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/tasks", summary="List all tasks (admin)")
|
||||
async def list_all_tasks(
|
||||
user_id: Optional[int] = Query(None, description="Filter by user ID"),
|
||||
status_filter: Optional[str] = Query(None, description="Filter by status"),
|
||||
include_deleted: bool = Query(True, description="Include soft-deleted tasks"),
|
||||
include_files_deleted: bool = Query(True, description="Include tasks with deleted files"),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=100),
|
||||
db: Session = Depends(get_db),
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Get list of all tasks across all users.
|
||||
Includes soft-deleted tasks and tasks with deleted files by default.
|
||||
|
||||
- **user_id**: Filter by user ID (optional)
|
||||
- **status_filter**: Filter by status (pending, processing, completed, failed)
|
||||
- **include_deleted**: Include soft-deleted tasks (default: true)
|
||||
- **include_files_deleted**: Include tasks with deleted files (default: true)
|
||||
|
||||
Requires admin privileges.
|
||||
"""
|
||||
try:
|
||||
# Parse status filter
|
||||
task_status = None
|
||||
if status_filter:
|
||||
try:
|
||||
task_status = TaskStatus(status_filter)
|
||||
except ValueError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Invalid status: {status_filter}"
|
||||
)
|
||||
|
||||
skip = (page - 1) * page_size
|
||||
|
||||
tasks, total = task_service.get_all_tasks_admin(
|
||||
db=db,
|
||||
user_id=user_id,
|
||||
status=task_status,
|
||||
include_deleted=include_deleted,
|
||||
include_files_deleted=include_files_deleted,
|
||||
skip=skip,
|
||||
limit=page_size
|
||||
)
|
||||
|
||||
return {
|
||||
"tasks": [task.to_dict() for task in tasks],
|
||||
"total": total,
|
||||
"page": page,
|
||||
"page_size": page_size,
|
||||
"has_more": (skip + len(tasks)) < total
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("Failed to list tasks")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to list tasks: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/tasks/{task_id}", summary="Get task details (admin)")
|
||||
async def get_task_admin(
|
||||
task_id: str,
|
||||
db: Session = Depends(get_db),
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Get detailed information about a specific task (admin view).
|
||||
Can access any task regardless of ownership or deletion status.
|
||||
|
||||
Requires admin privileges.
|
||||
"""
|
||||
try:
|
||||
task = task_service.get_task_by_id_admin(db, task_id)
|
||||
if not task:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Task not found: {task_id}"
|
||||
)
|
||||
|
||||
return task.to_dict()
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"Failed to get task {task_id}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get task: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/storage/stats", summary="Get storage statistics")
|
||||
async def get_storage_stats(
|
||||
db: Session = Depends(get_db),
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Get storage usage statistics.
|
||||
|
||||
Returns:
|
||||
- total_tasks: Total number of tasks
|
||||
- tasks_with_files: Tasks that still have files on disk
|
||||
- tasks_files_deleted: Tasks where files have been cleaned up
|
||||
- soft_deleted_tasks: Tasks that have been soft-deleted
|
||||
- disk_usage: Actual disk usage in bytes and MB
|
||||
- per_user: Breakdown by user
|
||||
|
||||
Requires admin privileges.
|
||||
"""
|
||||
try:
|
||||
stats = cleanup_service.get_storage_stats(db)
|
||||
return stats
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to get storage stats")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get storage stats: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/cleanup/status", summary="Get cleanup scheduler status")
|
||||
async def get_cleanup_status(
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Get the status of the automatic cleanup scheduler.
|
||||
|
||||
Returns:
|
||||
- enabled: Whether cleanup is enabled in configuration
|
||||
- running: Whether scheduler is currently running
|
||||
- interval_hours: Hours between cleanup runs
|
||||
- max_files_per_user: Files to keep per user
|
||||
- last_run: Timestamp of last cleanup
|
||||
- next_run: Estimated next cleanup time
|
||||
- last_result: Result of last cleanup
|
||||
|
||||
Requires admin privileges.
|
||||
"""
|
||||
try:
|
||||
scheduler = get_cleanup_scheduler()
|
||||
return scheduler.status
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to get cleanup status")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to get cleanup status: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/cleanup/trigger", summary="Trigger file cleanup")
|
||||
async def trigger_cleanup(
|
||||
max_files_per_user: Optional[int] = Query(None, description="Override max files per user"),
|
||||
db: Session = Depends(get_db),
|
||||
admin_user: User = Depends(get_current_admin_user)
|
||||
):
|
||||
"""
|
||||
Manually trigger file cleanup process.
|
||||
Deletes old files while preserving database records.
|
||||
|
||||
- **max_files_per_user**: Override the default retention count (optional)
|
||||
|
||||
Returns cleanup statistics including files deleted and space freed.
|
||||
|
||||
Requires admin privileges.
|
||||
"""
|
||||
try:
|
||||
files_to_keep = max_files_per_user or settings.max_files_per_user
|
||||
result = cleanup_service.cleanup_all_users(db, max_files_per_user=files_to_keep)
|
||||
|
||||
logger.info(
|
||||
f"Manual cleanup triggered by admin {admin_user.username}: "
|
||||
f"{result['total_files_deleted']} files, {result['total_bytes_freed']} bytes"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Cleanup completed successfully",
|
||||
**result
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to trigger cleanup")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to trigger cleanup: {str(e)}"
|
||||
)
|
||||
|
||||
173
backend/app/services/cleanup_scheduler.py
Normal file
173
backend/app/services/cleanup_scheduler.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
Tool_OCR - Cleanup Scheduler
|
||||
Background scheduler for periodic file cleanup
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import SessionLocal
|
||||
from app.services.cleanup_service import cleanup_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CleanupScheduler:
|
||||
"""
|
||||
Background scheduler for periodic file cleanup.
|
||||
Uses asyncio for non-blocking background execution.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
self._running: bool = False
|
||||
self._last_run: Optional[datetime] = None
|
||||
self._next_run: Optional[datetime] = None
|
||||
self._last_result: Optional[dict] = None
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Check if scheduler is running"""
|
||||
return self._running and self._task is not None and not self._task.done()
|
||||
|
||||
@property
|
||||
def status(self) -> dict:
|
||||
"""Get scheduler status"""
|
||||
return {
|
||||
"enabled": settings.cleanup_enabled,
|
||||
"running": self.is_running,
|
||||
"interval_hours": settings.cleanup_interval_hours,
|
||||
"max_files_per_user": settings.max_files_per_user,
|
||||
"last_run": self._last_run.isoformat() if self._last_run else None,
|
||||
"next_run": self._next_run.isoformat() if self._next_run else None,
|
||||
"last_result": self._last_result
|
||||
}
|
||||
|
||||
async def start(self):
|
||||
"""Start the cleanup scheduler"""
|
||||
if not settings.cleanup_enabled:
|
||||
logger.info("Cleanup scheduler is disabled in configuration")
|
||||
return
|
||||
|
||||
if self.is_running:
|
||||
logger.warning("Cleanup scheduler is already running")
|
||||
return
|
||||
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._run_loop())
|
||||
logger.info(
|
||||
f"Cleanup scheduler started (interval: {settings.cleanup_interval_hours}h, "
|
||||
f"max_files_per_user: {settings.max_files_per_user})"
|
||||
)
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the cleanup scheduler"""
|
||||
self._running = False
|
||||
|
||||
if self._task is not None:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._task = None
|
||||
|
||||
logger.info("Cleanup scheduler stopped")
|
||||
|
||||
async def _run_loop(self):
|
||||
"""Main scheduler loop"""
|
||||
interval_seconds = settings.cleanup_interval_hours * 3600
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
# Calculate next run time
|
||||
self._next_run = datetime.utcnow()
|
||||
|
||||
# Run cleanup
|
||||
await self._execute_cleanup()
|
||||
|
||||
# Update next run time after successful execution
|
||||
self._next_run = datetime.utcnow()
|
||||
self._next_run = self._next_run.replace(
|
||||
hour=(self._next_run.hour + settings.cleanup_interval_hours) % 24
|
||||
)
|
||||
|
||||
# Wait for next interval
|
||||
logger.debug(f"Cleanup scheduler sleeping for {interval_seconds} seconds")
|
||||
await asyncio.sleep(interval_seconds)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Cleanup scheduler loop cancelled")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in cleanup scheduler loop: {e}")
|
||||
# Wait a bit before retrying to avoid tight error loops
|
||||
await asyncio.sleep(60)
|
||||
|
||||
async def _execute_cleanup(self):
|
||||
"""Execute the cleanup task"""
|
||||
logger.info("Starting scheduled cleanup...")
|
||||
self._last_run = datetime.utcnow()
|
||||
|
||||
# Run cleanup in thread pool to avoid blocking
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(None, self._run_cleanup_sync)
|
||||
|
||||
self._last_result = result
|
||||
logger.info(
|
||||
f"Scheduled cleanup completed: {result.get('total_files_deleted', 0)} files deleted, "
|
||||
f"{result.get('total_bytes_freed', 0)} bytes freed"
|
||||
)
|
||||
|
||||
def _run_cleanup_sync(self) -> dict:
|
||||
"""Synchronous cleanup execution (runs in thread pool)"""
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
result = cleanup_service.cleanup_all_users(
|
||||
db=db,
|
||||
max_files_per_user=settings.max_files_per_user
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.exception(f"Cleanup execution failed: {e}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
async def run_now(self) -> dict:
|
||||
"""Trigger immediate cleanup (outside of scheduled interval)"""
|
||||
logger.info("Manual cleanup triggered")
|
||||
await self._execute_cleanup()
|
||||
return self._last_result or {}
|
||||
|
||||
|
||||
# Global scheduler instance
|
||||
_scheduler: Optional[CleanupScheduler] = None
|
||||
|
||||
|
||||
def get_cleanup_scheduler() -> CleanupScheduler:
|
||||
"""Get the global cleanup scheduler instance"""
|
||||
global _scheduler
|
||||
if _scheduler is None:
|
||||
_scheduler = CleanupScheduler()
|
||||
return _scheduler
|
||||
|
||||
|
||||
async def start_cleanup_scheduler():
|
||||
"""Start the global cleanup scheduler"""
|
||||
scheduler = get_cleanup_scheduler()
|
||||
await scheduler.start()
|
||||
|
||||
|
||||
async def stop_cleanup_scheduler():
|
||||
"""Stop the global cleanup scheduler"""
|
||||
scheduler = get_cleanup_scheduler()
|
||||
await scheduler.stop()
|
||||
246
backend/app/services/cleanup_service.py
Normal file
246
backend/app/services/cleanup_service.py
Normal file
@@ -0,0 +1,246 @@
|
||||
"""
|
||||
Tool_OCR - Cleanup Service
|
||||
Handles file cleanup while preserving database records for statistics
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import logging
|
||||
from typing import Dict, List, Tuple
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import and_, func
|
||||
|
||||
from app.models.task import Task, TaskFile, TaskStatus
|
||||
from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CleanupService:
|
||||
"""Service for cleaning up files while preserving database records"""
|
||||
|
||||
def cleanup_user_files(
|
||||
self,
|
||||
db: Session,
|
||||
user_id: int,
|
||||
max_files_to_keep: int = 50
|
||||
) -> Dict:
|
||||
"""
|
||||
Clean up old files for a user, keeping only the newest N tasks' files.
|
||||
Database records are preserved for statistics.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: User ID
|
||||
max_files_to_keep: Number of newest tasks to keep files for
|
||||
|
||||
Returns:
|
||||
Dict with cleanup statistics
|
||||
"""
|
||||
# Get all completed tasks with files (not yet deleted)
|
||||
tasks_with_files = (
|
||||
db.query(Task)
|
||||
.filter(
|
||||
and_(
|
||||
Task.user_id == user_id,
|
||||
Task.status == TaskStatus.COMPLETED,
|
||||
Task.file_deleted == False,
|
||||
Task.deleted_at.is_(None) # Don't process already soft-deleted
|
||||
)
|
||||
)
|
||||
.order_by(Task.created_at.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
# Keep newest N tasks, clean files from older ones
|
||||
tasks_to_clean = tasks_with_files[max_files_to_keep:]
|
||||
|
||||
files_deleted = 0
|
||||
bytes_freed = 0
|
||||
tasks_cleaned = 0
|
||||
|
||||
for task in tasks_to_clean:
|
||||
task_bytes, task_files = self._delete_task_files(task)
|
||||
if task_files > 0:
|
||||
task.file_deleted = True
|
||||
task.updated_at = datetime.utcnow()
|
||||
files_deleted += task_files
|
||||
bytes_freed += task_bytes
|
||||
tasks_cleaned += 1
|
||||
|
||||
if tasks_cleaned > 0:
|
||||
db.commit()
|
||||
logger.info(
|
||||
f"Cleaned up {files_deleted} files ({bytes_freed} bytes) "
|
||||
f"from {tasks_cleaned} tasks for user {user_id}"
|
||||
)
|
||||
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"tasks_cleaned": tasks_cleaned,
|
||||
"files_deleted": files_deleted,
|
||||
"bytes_freed": bytes_freed,
|
||||
"tasks_with_files_remaining": min(len(tasks_with_files), max_files_to_keep)
|
||||
}
|
||||
|
||||
def cleanup_all_users(
|
||||
self,
|
||||
db: Session,
|
||||
max_files_per_user: int = 50
|
||||
) -> Dict:
|
||||
"""
|
||||
Run cleanup for all users.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
max_files_per_user: Number of newest tasks to keep files for per user
|
||||
|
||||
Returns:
|
||||
Dict with overall cleanup statistics
|
||||
"""
|
||||
# Get all distinct user IDs with tasks
|
||||
user_ids = (
|
||||
db.query(Task.user_id)
|
||||
.filter(Task.file_deleted == False)
|
||||
.distinct()
|
||||
.all()
|
||||
)
|
||||
|
||||
total_tasks_cleaned = 0
|
||||
total_files_deleted = 0
|
||||
total_bytes_freed = 0
|
||||
users_processed = 0
|
||||
|
||||
for (user_id,) in user_ids:
|
||||
result = self.cleanup_user_files(db, user_id, max_files_per_user)
|
||||
total_tasks_cleaned += result["tasks_cleaned"]
|
||||
total_files_deleted += result["files_deleted"]
|
||||
total_bytes_freed += result["bytes_freed"]
|
||||
users_processed += 1
|
||||
|
||||
logger.info(
|
||||
f"Cleanup completed: {users_processed} users, "
|
||||
f"{total_tasks_cleaned} tasks, {total_files_deleted} files, "
|
||||
f"{total_bytes_freed} bytes freed"
|
||||
)
|
||||
|
||||
return {
|
||||
"users_processed": users_processed,
|
||||
"total_tasks_cleaned": total_tasks_cleaned,
|
||||
"total_files_deleted": total_files_deleted,
|
||||
"total_bytes_freed": total_bytes_freed,
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
def _delete_task_files(self, task: Task) -> Tuple[int, int]:
|
||||
"""
|
||||
Delete actual files for a task from disk.
|
||||
|
||||
Args:
|
||||
task: Task object
|
||||
|
||||
Returns:
|
||||
Tuple of (bytes_deleted, files_deleted)
|
||||
"""
|
||||
bytes_deleted = 0
|
||||
files_deleted = 0
|
||||
|
||||
# Delete result directory
|
||||
result_dir = os.path.join(settings.result_dir, task.task_id)
|
||||
if os.path.exists(result_dir):
|
||||
try:
|
||||
dir_size = self._get_dir_size(result_dir)
|
||||
shutil.rmtree(result_dir)
|
||||
bytes_deleted += dir_size
|
||||
files_deleted += 1
|
||||
logger.debug(f"Deleted result directory: {result_dir}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete result directory {result_dir}: {e}")
|
||||
|
||||
# Delete uploaded files from task_files
|
||||
for task_file in task.files:
|
||||
if task_file.stored_path and os.path.exists(task_file.stored_path):
|
||||
try:
|
||||
file_size = os.path.getsize(task_file.stored_path)
|
||||
os.remove(task_file.stored_path)
|
||||
bytes_deleted += file_size
|
||||
files_deleted += 1
|
||||
logger.debug(f"Deleted uploaded file: {task_file.stored_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete file {task_file.stored_path}: {e}")
|
||||
|
||||
return bytes_deleted, files_deleted
|
||||
|
||||
def _get_dir_size(self, path: str) -> int:
|
||||
"""Get total size of a directory in bytes."""
|
||||
total = 0
|
||||
try:
|
||||
for entry in os.scandir(path):
|
||||
if entry.is_file():
|
||||
total += entry.stat().st_size
|
||||
elif entry.is_dir():
|
||||
total += self._get_dir_size(entry.path)
|
||||
except Exception:
|
||||
pass
|
||||
return total
|
||||
|
||||
def get_storage_stats(self, db: Session) -> Dict:
|
||||
"""
|
||||
Get storage statistics for admin dashboard.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Dict with storage statistics
|
||||
"""
|
||||
# Count tasks by file_deleted status
|
||||
total_tasks = db.query(Task).count()
|
||||
tasks_with_files = db.query(Task).filter(Task.file_deleted == False).count()
|
||||
tasks_files_deleted = db.query(Task).filter(Task.file_deleted == True).count()
|
||||
soft_deleted_tasks = db.query(Task).filter(Task.deleted_at.isnot(None)).count()
|
||||
|
||||
# Get per-user statistics
|
||||
user_stats = (
|
||||
db.query(
|
||||
Task.user_id,
|
||||
func.count(Task.id).label("total_tasks"),
|
||||
func.sum(func.if_(Task.file_deleted == False, 1, 0)).label("tasks_with_files"),
|
||||
func.sum(func.if_(Task.deleted_at.isnot(None), 1, 0)).label("deleted_tasks")
|
||||
)
|
||||
.group_by(Task.user_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Calculate actual disk usage
|
||||
uploads_size = self._get_dir_size(settings.upload_dir)
|
||||
results_size = self._get_dir_size(settings.result_dir)
|
||||
|
||||
return {
|
||||
"total_tasks": total_tasks,
|
||||
"tasks_with_files": tasks_with_files,
|
||||
"tasks_files_deleted": tasks_files_deleted,
|
||||
"soft_deleted_tasks": soft_deleted_tasks,
|
||||
"disk_usage": {
|
||||
"uploads_bytes": uploads_size,
|
||||
"results_bytes": results_size,
|
||||
"total_bytes": uploads_size + results_size,
|
||||
"uploads_mb": round(uploads_size / (1024 * 1024), 2),
|
||||
"results_mb": round(results_size / (1024 * 1024), 2),
|
||||
"total_mb": round((uploads_size + results_size) / (1024 * 1024), 2)
|
||||
},
|
||||
"per_user": [
|
||||
{
|
||||
"user_id": stat.user_id,
|
||||
"total_tasks": stat.total_tasks,
|
||||
"tasks_with_files": int(stat.tasks_with_files or 0),
|
||||
"deleted_tasks": int(stat.deleted_tasks or 0)
|
||||
}
|
||||
for stat in user_stats
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# Global service instance
|
||||
cleanup_service = CleanupService()
|
||||
@@ -65,7 +65,7 @@ class TaskService:
|
||||
return task
|
||||
|
||||
def get_task_by_id(
|
||||
self, db: Session, task_id: str, user_id: int
|
||||
self, db: Session, task_id: str, user_id: int, include_deleted: bool = False
|
||||
) -> Optional[Task]:
|
||||
"""
|
||||
Get task by ID with user isolation
|
||||
@@ -74,16 +74,20 @@ class TaskService:
|
||||
db: Database session
|
||||
task_id: Task ID (UUID)
|
||||
user_id: User ID (for isolation)
|
||||
include_deleted: If True, include soft-deleted tasks
|
||||
|
||||
Returns:
|
||||
Task object or None if not found/unauthorized
|
||||
"""
|
||||
task = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.task_id == task_id, Task.user_id == user_id))
|
||||
.first()
|
||||
query = db.query(Task).filter(
|
||||
and_(Task.task_id == task_id, Task.user_id == user_id)
|
||||
)
|
||||
return task
|
||||
|
||||
# Filter out soft-deleted tasks by default
|
||||
if not include_deleted:
|
||||
query = query.filter(Task.deleted_at.is_(None))
|
||||
|
||||
return query.first()
|
||||
|
||||
def get_user_tasks(
|
||||
self,
|
||||
@@ -97,6 +101,7 @@ class TaskService:
|
||||
limit: int = 50,
|
||||
order_by: str = "created_at",
|
||||
order_desc: bool = True,
|
||||
include_deleted: bool = False,
|
||||
) -> Tuple[List[Task], int]:
|
||||
"""
|
||||
Get user's tasks with pagination and filtering
|
||||
@@ -112,6 +117,7 @@ class TaskService:
|
||||
limit: Pagination limit
|
||||
order_by: Sort field (created_at, updated_at, completed_at)
|
||||
order_desc: Sort descending
|
||||
include_deleted: If True, include soft-deleted tasks
|
||||
|
||||
Returns:
|
||||
Tuple of (tasks list, total count)
|
||||
@@ -119,6 +125,10 @@ class TaskService:
|
||||
# Base query with user isolation
|
||||
query = db.query(Task).filter(Task.user_id == user_id)
|
||||
|
||||
# Filter out soft-deleted tasks by default
|
||||
if not include_deleted:
|
||||
query = query.filter(Task.deleted_at.is_(None))
|
||||
|
||||
# Apply status filter
|
||||
if status:
|
||||
query = query.filter(Task.status == status)
|
||||
@@ -244,7 +254,9 @@ class TaskService:
|
||||
self, db: Session, task_id: str, user_id: int
|
||||
) -> bool:
|
||||
"""
|
||||
Delete task with user isolation
|
||||
Soft delete task with user isolation.
|
||||
Sets deleted_at timestamp instead of removing record.
|
||||
Database records are preserved for statistics tracking.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
@@ -252,17 +264,18 @@ class TaskService:
|
||||
user_id: User ID (for isolation)
|
||||
|
||||
Returns:
|
||||
True if deleted, False if not found/unauthorized
|
||||
True if soft deleted, False if not found/unauthorized
|
||||
"""
|
||||
task = self.get_task_by_id(db, task_id, user_id)
|
||||
if not task:
|
||||
return False
|
||||
|
||||
# Cascade delete will handle task_files
|
||||
db.delete(task)
|
||||
# Soft delete: set deleted_at timestamp
|
||||
task.deleted_at = datetime.utcnow()
|
||||
task.updated_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Deleted task {task_id} for user {user_id}")
|
||||
logger.info(f"Soft deleted task {task_id} for user {user_id}")
|
||||
return True
|
||||
|
||||
def _cleanup_old_tasks(
|
||||
@@ -389,6 +402,82 @@ class TaskService:
|
||||
"failed": failed,
|
||||
}
|
||||
|
||||
def get_all_tasks_admin(
|
||||
self,
|
||||
db: Session,
|
||||
user_id: Optional[int] = None,
|
||||
status: Optional[TaskStatus] = None,
|
||||
include_deleted: bool = True,
|
||||
include_files_deleted: bool = True,
|
||||
skip: int = 0,
|
||||
limit: int = 50,
|
||||
order_by: str = "created_at",
|
||||
order_desc: bool = True,
|
||||
) -> Tuple[List[Task], int]:
|
||||
"""
|
||||
Get all tasks for admin view (no user isolation).
|
||||
Includes soft-deleted tasks by default.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
user_id: Filter by user ID (optional)
|
||||
status: Filter by status (optional)
|
||||
include_deleted: Include soft-deleted tasks (default True)
|
||||
include_files_deleted: Include tasks with deleted files (default True)
|
||||
skip: Pagination offset
|
||||
limit: Pagination limit
|
||||
order_by: Sort field
|
||||
order_desc: Sort descending
|
||||
|
||||
Returns:
|
||||
Tuple of (tasks list, total count)
|
||||
"""
|
||||
query = db.query(Task)
|
||||
|
||||
# Optional user filter
|
||||
if user_id is not None:
|
||||
query = query.filter(Task.user_id == user_id)
|
||||
|
||||
# Filter soft-deleted if requested
|
||||
if not include_deleted:
|
||||
query = query.filter(Task.deleted_at.is_(None))
|
||||
|
||||
# Filter file-deleted if requested
|
||||
if not include_files_deleted:
|
||||
query = query.filter(Task.file_deleted == False)
|
||||
|
||||
# Apply status filter
|
||||
if status:
|
||||
query = query.filter(Task.status == status)
|
||||
|
||||
# Get total count
|
||||
total = query.count()
|
||||
|
||||
# Apply sorting
|
||||
sort_column = getattr(Task, order_by, Task.created_at)
|
||||
if order_desc:
|
||||
query = query.order_by(desc(sort_column))
|
||||
else:
|
||||
query = query.order_by(sort_column)
|
||||
|
||||
# Apply pagination
|
||||
tasks = query.offset(skip).limit(limit).all()
|
||||
|
||||
return tasks, total
|
||||
|
||||
def get_task_by_id_admin(self, db: Session, task_id: str) -> Optional[Task]:
|
||||
"""
|
||||
Get task by ID for admin (no user isolation, includes deleted).
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
task_id: Task ID (UUID)
|
||||
|
||||
Returns:
|
||||
Task object or None if not found
|
||||
"""
|
||||
return db.query(Task).filter(Task.task_id == task_id).first()
|
||||
|
||||
|
||||
# Global service instance
|
||||
task_service = TaskService()
|
||||
|
||||
Reference in New Issue
Block a user