- Add soft delete (deleted_at column) to preserve task records for statistics - Implement cleanup service to delete old files while keeping DB records - Add automatic cleanup scheduler (configurable interval, default 24h) - Add admin endpoints: storage stats, cleanup trigger, scheduler status - Update task service with admin views (include deleted/files_deleted) - Add frontend storage management UI in admin dashboard - Add i18n translations for storage management 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
174 lines
5.5 KiB
Python
174 lines
5.5 KiB
Python
"""
|
|
Tool_OCR - Cleanup Scheduler
|
|
Background scheduler for periodic file cleanup
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.core.config import settings
|
|
from app.core.database import SessionLocal
|
|
from app.services.cleanup_service import cleanup_service
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class CleanupScheduler:
|
|
"""
|
|
Background scheduler for periodic file cleanup.
|
|
Uses asyncio for non-blocking background execution.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._task: Optional[asyncio.Task] = None
|
|
self._running: bool = False
|
|
self._last_run: Optional[datetime] = None
|
|
self._next_run: Optional[datetime] = None
|
|
self._last_result: Optional[dict] = None
|
|
|
|
@property
|
|
def is_running(self) -> bool:
|
|
"""Check if scheduler is running"""
|
|
return self._running and self._task is not None and not self._task.done()
|
|
|
|
@property
|
|
def status(self) -> dict:
|
|
"""Get scheduler status"""
|
|
return {
|
|
"enabled": settings.cleanup_enabled,
|
|
"running": self.is_running,
|
|
"interval_hours": settings.cleanup_interval_hours,
|
|
"max_files_per_user": settings.max_files_per_user,
|
|
"last_run": self._last_run.isoformat() if self._last_run else None,
|
|
"next_run": self._next_run.isoformat() if self._next_run else None,
|
|
"last_result": self._last_result
|
|
}
|
|
|
|
async def start(self):
|
|
"""Start the cleanup scheduler"""
|
|
if not settings.cleanup_enabled:
|
|
logger.info("Cleanup scheduler is disabled in configuration")
|
|
return
|
|
|
|
if self.is_running:
|
|
logger.warning("Cleanup scheduler is already running")
|
|
return
|
|
|
|
self._running = True
|
|
self._task = asyncio.create_task(self._run_loop())
|
|
logger.info(
|
|
f"Cleanup scheduler started (interval: {settings.cleanup_interval_hours}h, "
|
|
f"max_files_per_user: {settings.max_files_per_user})"
|
|
)
|
|
|
|
async def stop(self):
|
|
"""Stop the cleanup scheduler"""
|
|
self._running = False
|
|
|
|
if self._task is not None:
|
|
self._task.cancel()
|
|
try:
|
|
await self._task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
self._task = None
|
|
|
|
logger.info("Cleanup scheduler stopped")
|
|
|
|
async def _run_loop(self):
|
|
"""Main scheduler loop"""
|
|
interval_seconds = settings.cleanup_interval_hours * 3600
|
|
|
|
while self._running:
|
|
try:
|
|
# Calculate next run time
|
|
self._next_run = datetime.utcnow()
|
|
|
|
# Run cleanup
|
|
await self._execute_cleanup()
|
|
|
|
# Update next run time after successful execution
|
|
self._next_run = datetime.utcnow()
|
|
self._next_run = self._next_run.replace(
|
|
hour=(self._next_run.hour + settings.cleanup_interval_hours) % 24
|
|
)
|
|
|
|
# Wait for next interval
|
|
logger.debug(f"Cleanup scheduler sleeping for {interval_seconds} seconds")
|
|
await asyncio.sleep(interval_seconds)
|
|
|
|
except asyncio.CancelledError:
|
|
logger.info("Cleanup scheduler loop cancelled")
|
|
break
|
|
except Exception as e:
|
|
logger.exception(f"Error in cleanup scheduler loop: {e}")
|
|
# Wait a bit before retrying to avoid tight error loops
|
|
await asyncio.sleep(60)
|
|
|
|
async def _execute_cleanup(self):
|
|
"""Execute the cleanup task"""
|
|
logger.info("Starting scheduled cleanup...")
|
|
self._last_run = datetime.utcnow()
|
|
|
|
# Run cleanup in thread pool to avoid blocking
|
|
loop = asyncio.get_event_loop()
|
|
result = await loop.run_in_executor(None, self._run_cleanup_sync)
|
|
|
|
self._last_result = result
|
|
logger.info(
|
|
f"Scheduled cleanup completed: {result.get('total_files_deleted', 0)} files deleted, "
|
|
f"{result.get('total_bytes_freed', 0)} bytes freed"
|
|
)
|
|
|
|
def _run_cleanup_sync(self) -> dict:
|
|
"""Synchronous cleanup execution (runs in thread pool)"""
|
|
db: Session = SessionLocal()
|
|
try:
|
|
result = cleanup_service.cleanup_all_users(
|
|
db=db,
|
|
max_files_per_user=settings.max_files_per_user
|
|
)
|
|
return result
|
|
except Exception as e:
|
|
logger.exception(f"Cleanup execution failed: {e}")
|
|
return {
|
|
"error": str(e),
|
|
"timestamp": datetime.utcnow().isoformat()
|
|
}
|
|
finally:
|
|
db.close()
|
|
|
|
async def run_now(self) -> dict:
|
|
"""Trigger immediate cleanup (outside of scheduled interval)"""
|
|
logger.info("Manual cleanup triggered")
|
|
await self._execute_cleanup()
|
|
return self._last_result or {}
|
|
|
|
|
|
# Global scheduler instance
|
|
_scheduler: Optional[CleanupScheduler] = None
|
|
|
|
|
|
def get_cleanup_scheduler() -> CleanupScheduler:
|
|
"""Get the global cleanup scheduler instance"""
|
|
global _scheduler
|
|
if _scheduler is None:
|
|
_scheduler = CleanupScheduler()
|
|
return _scheduler
|
|
|
|
|
|
async def start_cleanup_scheduler():
|
|
"""Start the global cleanup scheduler"""
|
|
scheduler = get_cleanup_scheduler()
|
|
await scheduler.start()
|
|
|
|
|
|
async def stop_cleanup_scheduler():
|
|
"""Stop the global cleanup scheduler"""
|
|
scheduler = get_cleanup_scheduler()
|
|
await scheduler.stop()
|