feat: add storage cleanup mechanism with soft delete and auto scheduler

- Add soft delete (deleted_at column) to preserve task records for statistics
- Implement cleanup service to delete old files while keeping DB records
- Add automatic cleanup scheduler (configurable interval, default 24h)
- Add admin endpoints: storage stats, cleanup trigger, scheduler status
- Update task service with admin views (include deleted/files_deleted)
- Add frontend storage management UI in admin dashboard
- Add i18n translations for storage management

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-14 12:41:01 +08:00
parent 81a0a3ab0f
commit 73112db055
23 changed files with 1359 additions and 634 deletions

View File

@@ -11,9 +11,14 @@ from fastapi import APIRouter, Depends, HTTPException, status, Query
from sqlalchemy.orm import Session
from app.core.deps import get_db, get_current_admin_user
from app.core.config import settings
from app.models.user import User
from app.models.task import TaskStatus
from app.services.admin_service import admin_service
from app.services.audit_service import audit_service
from app.services.task_service import task_service
from app.services.cleanup_service import cleanup_service
from app.services.cleanup_scheduler import get_cleanup_scheduler
logger = logging.getLogger(__name__)
@@ -217,3 +222,198 @@ async def get_translation_stats(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get translation statistics: {str(e)}"
)
@router.get("/tasks", summary="List all tasks (admin)")
async def list_all_tasks(
user_id: Optional[int] = Query(None, description="Filter by user ID"),
status_filter: Optional[str] = Query(None, description="Filter by status"),
include_deleted: bool = Query(True, description="Include soft-deleted tasks"),
include_files_deleted: bool = Query(True, description="Include tasks with deleted files"),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=100),
db: Session = Depends(get_db),
admin_user: User = Depends(get_current_admin_user)
):
"""
Get list of all tasks across all users.
Includes soft-deleted tasks and tasks with deleted files by default.
- **user_id**: Filter by user ID (optional)
- **status_filter**: Filter by status (pending, processing, completed, failed)
- **include_deleted**: Include soft-deleted tasks (default: true)
- **include_files_deleted**: Include tasks with deleted files (default: true)
Requires admin privileges.
"""
try:
# Parse status filter
task_status = None
if status_filter:
try:
task_status = TaskStatus(status_filter)
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid status: {status_filter}"
)
skip = (page - 1) * page_size
tasks, total = task_service.get_all_tasks_admin(
db=db,
user_id=user_id,
status=task_status,
include_deleted=include_deleted,
include_files_deleted=include_files_deleted,
skip=skip,
limit=page_size
)
return {
"tasks": [task.to_dict() for task in tasks],
"total": total,
"page": page,
"page_size": page_size,
"has_more": (skip + len(tasks)) < total
}
except HTTPException:
raise
except Exception as e:
logger.exception("Failed to list tasks")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to list tasks: {str(e)}"
)
@router.get("/tasks/{task_id}", summary="Get task details (admin)")
async def get_task_admin(
task_id: str,
db: Session = Depends(get_db),
admin_user: User = Depends(get_current_admin_user)
):
"""
Get detailed information about a specific task (admin view).
Can access any task regardless of ownership or deletion status.
Requires admin privileges.
"""
try:
task = task_service.get_task_by_id_admin(db, task_id)
if not task:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Task not found: {task_id}"
)
return task.to_dict()
except HTTPException:
raise
except Exception as e:
logger.exception(f"Failed to get task {task_id}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get task: {str(e)}"
)
@router.get("/storage/stats", summary="Get storage statistics")
async def get_storage_stats(
db: Session = Depends(get_db),
admin_user: User = Depends(get_current_admin_user)
):
"""
Get storage usage statistics.
Returns:
- total_tasks: Total number of tasks
- tasks_with_files: Tasks that still have files on disk
- tasks_files_deleted: Tasks where files have been cleaned up
- soft_deleted_tasks: Tasks that have been soft-deleted
- disk_usage: Actual disk usage in bytes and MB
- per_user: Breakdown by user
Requires admin privileges.
"""
try:
stats = cleanup_service.get_storage_stats(db)
return stats
except Exception as e:
logger.exception("Failed to get storage stats")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get storage stats: {str(e)}"
)
@router.get("/cleanup/status", summary="Get cleanup scheduler status")
async def get_cleanup_status(
admin_user: User = Depends(get_current_admin_user)
):
"""
Get the status of the automatic cleanup scheduler.
Returns:
- enabled: Whether cleanup is enabled in configuration
- running: Whether scheduler is currently running
- interval_hours: Hours between cleanup runs
- max_files_per_user: Files to keep per user
- last_run: Timestamp of last cleanup
- next_run: Estimated next cleanup time
- last_result: Result of last cleanup
Requires admin privileges.
"""
try:
scheduler = get_cleanup_scheduler()
return scheduler.status
except Exception as e:
logger.exception("Failed to get cleanup status")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get cleanup status: {str(e)}"
)
@router.post("/cleanup/trigger", summary="Trigger file cleanup")
async def trigger_cleanup(
max_files_per_user: Optional[int] = Query(None, description="Override max files per user"),
db: Session = Depends(get_db),
admin_user: User = Depends(get_current_admin_user)
):
"""
Manually trigger file cleanup process.
Deletes old files while preserving database records.
- **max_files_per_user**: Override the default retention count (optional)
Returns cleanup statistics including files deleted and space freed.
Requires admin privileges.
"""
try:
files_to_keep = max_files_per_user or settings.max_files_per_user
result = cleanup_service.cleanup_all_users(db, max_files_per_user=files_to_keep)
logger.info(
f"Manual cleanup triggered by admin {admin_user.username}: "
f"{result['total_files_deleted']} files, {result['total_bytes_freed']} bytes"
)
return {
"success": True,
"message": "Cleanup completed successfully",
**result
}
except Exception as e:
logger.exception("Failed to trigger cleanup")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to trigger cleanup: {str(e)}"
)