feat: add storage cleanup mechanism with soft delete and auto scheduler
- Add soft delete (deleted_at column) to preserve task records for statistics - Implement cleanup service to delete old files while keeping DB records - Add automatic cleanup scheduler (configurable interval, default 24h) - Add admin endpoints: storage stats, cleanup trigger, scheduler status - Update task service with admin views (include deleted/files_deleted) - Add frontend storage management UI in admin dashboard - Add i18n translations for storage management 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
173
backend/app/services/cleanup_scheduler.py
Normal file
173
backend/app/services/cleanup_scheduler.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
Tool_OCR - Cleanup Scheduler
|
||||
Background scheduler for periodic file cleanup
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import SessionLocal
|
||||
from app.services.cleanup_service import cleanup_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CleanupScheduler:
|
||||
"""
|
||||
Background scheduler for periodic file cleanup.
|
||||
Uses asyncio for non-blocking background execution.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
self._running: bool = False
|
||||
self._last_run: Optional[datetime] = None
|
||||
self._next_run: Optional[datetime] = None
|
||||
self._last_result: Optional[dict] = None
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Check if scheduler is running"""
|
||||
return self._running and self._task is not None and not self._task.done()
|
||||
|
||||
@property
|
||||
def status(self) -> dict:
|
||||
"""Get scheduler status"""
|
||||
return {
|
||||
"enabled": settings.cleanup_enabled,
|
||||
"running": self.is_running,
|
||||
"interval_hours": settings.cleanup_interval_hours,
|
||||
"max_files_per_user": settings.max_files_per_user,
|
||||
"last_run": self._last_run.isoformat() if self._last_run else None,
|
||||
"next_run": self._next_run.isoformat() if self._next_run else None,
|
||||
"last_result": self._last_result
|
||||
}
|
||||
|
||||
async def start(self):
|
||||
"""Start the cleanup scheduler"""
|
||||
if not settings.cleanup_enabled:
|
||||
logger.info("Cleanup scheduler is disabled in configuration")
|
||||
return
|
||||
|
||||
if self.is_running:
|
||||
logger.warning("Cleanup scheduler is already running")
|
||||
return
|
||||
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._run_loop())
|
||||
logger.info(
|
||||
f"Cleanup scheduler started (interval: {settings.cleanup_interval_hours}h, "
|
||||
f"max_files_per_user: {settings.max_files_per_user})"
|
||||
)
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the cleanup scheduler"""
|
||||
self._running = False
|
||||
|
||||
if self._task is not None:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._task = None
|
||||
|
||||
logger.info("Cleanup scheduler stopped")
|
||||
|
||||
async def _run_loop(self):
|
||||
"""Main scheduler loop"""
|
||||
interval_seconds = settings.cleanup_interval_hours * 3600
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
# Calculate next run time
|
||||
self._next_run = datetime.utcnow()
|
||||
|
||||
# Run cleanup
|
||||
await self._execute_cleanup()
|
||||
|
||||
# Update next run time after successful execution
|
||||
self._next_run = datetime.utcnow()
|
||||
self._next_run = self._next_run.replace(
|
||||
hour=(self._next_run.hour + settings.cleanup_interval_hours) % 24
|
||||
)
|
||||
|
||||
# Wait for next interval
|
||||
logger.debug(f"Cleanup scheduler sleeping for {interval_seconds} seconds")
|
||||
await asyncio.sleep(interval_seconds)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
logger.info("Cleanup scheduler loop cancelled")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in cleanup scheduler loop: {e}")
|
||||
# Wait a bit before retrying to avoid tight error loops
|
||||
await asyncio.sleep(60)
|
||||
|
||||
async def _execute_cleanup(self):
|
||||
"""Execute the cleanup task"""
|
||||
logger.info("Starting scheduled cleanup...")
|
||||
self._last_run = datetime.utcnow()
|
||||
|
||||
# Run cleanup in thread pool to avoid blocking
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(None, self._run_cleanup_sync)
|
||||
|
||||
self._last_result = result
|
||||
logger.info(
|
||||
f"Scheduled cleanup completed: {result.get('total_files_deleted', 0)} files deleted, "
|
||||
f"{result.get('total_bytes_freed', 0)} bytes freed"
|
||||
)
|
||||
|
||||
def _run_cleanup_sync(self) -> dict:
|
||||
"""Synchronous cleanup execution (runs in thread pool)"""
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
result = cleanup_service.cleanup_all_users(
|
||||
db=db,
|
||||
max_files_per_user=settings.max_files_per_user
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.exception(f"Cleanup execution failed: {e}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
async def run_now(self) -> dict:
|
||||
"""Trigger immediate cleanup (outside of scheduled interval)"""
|
||||
logger.info("Manual cleanup triggered")
|
||||
await self._execute_cleanup()
|
||||
return self._last_result or {}
|
||||
|
||||
|
||||
# Global scheduler instance
|
||||
_scheduler: Optional[CleanupScheduler] = None
|
||||
|
||||
|
||||
def get_cleanup_scheduler() -> CleanupScheduler:
|
||||
"""Get the global cleanup scheduler instance"""
|
||||
global _scheduler
|
||||
if _scheduler is None:
|
||||
_scheduler = CleanupScheduler()
|
||||
return _scheduler
|
||||
|
||||
|
||||
async def start_cleanup_scheduler():
|
||||
"""Start the global cleanup scheduler"""
|
||||
scheduler = get_cleanup_scheduler()
|
||||
await scheduler.start()
|
||||
|
||||
|
||||
async def stop_cleanup_scheduler():
|
||||
"""Stop the global cleanup scheduler"""
|
||||
scheduler = get_cleanup_scheduler()
|
||||
await scheduler.stop()
|
||||
Reference in New Issue
Block a user