""" Tool_OCR - Cleanup Scheduler Background scheduler for periodic file cleanup """ import asyncio import logging from datetime import datetime from typing import Optional from sqlalchemy.orm import Session from app.core.config import settings from app.core.database import SessionLocal from app.services.cleanup_service import cleanup_service logger = logging.getLogger(__name__) class CleanupScheduler: """ Background scheduler for periodic file cleanup. Uses asyncio for non-blocking background execution. """ def __init__(self): self._task: Optional[asyncio.Task] = None self._running: bool = False self._last_run: Optional[datetime] = None self._next_run: Optional[datetime] = None self._last_result: Optional[dict] = None @property def is_running(self) -> bool: """Check if scheduler is running""" return self._running and self._task is not None and not self._task.done() @property def status(self) -> dict: """Get scheduler status""" return { "enabled": settings.cleanup_enabled, "running": self.is_running, "interval_hours": settings.cleanup_interval_hours, "max_files_per_user": settings.max_files_per_user, "last_run": self._last_run.isoformat() if self._last_run else None, "next_run": self._next_run.isoformat() if self._next_run else None, "last_result": self._last_result } async def start(self): """Start the cleanup scheduler""" if not settings.cleanup_enabled: logger.info("Cleanup scheduler is disabled in configuration") return if self.is_running: logger.warning("Cleanup scheduler is already running") return self._running = True self._task = asyncio.create_task(self._run_loop()) logger.info( f"Cleanup scheduler started (interval: {settings.cleanup_interval_hours}h, " f"max_files_per_user: {settings.max_files_per_user})" ) async def stop(self): """Stop the cleanup scheduler""" self._running = False if self._task is not None: self._task.cancel() try: await self._task except asyncio.CancelledError: pass self._task = None logger.info("Cleanup scheduler stopped") async def _run_loop(self): """Main scheduler loop""" interval_seconds = settings.cleanup_interval_hours * 3600 while self._running: try: # Calculate next run time self._next_run = datetime.utcnow() # Run cleanup await self._execute_cleanup() # Update next run time after successful execution self._next_run = datetime.utcnow() self._next_run = self._next_run.replace( hour=(self._next_run.hour + settings.cleanup_interval_hours) % 24 ) # Wait for next interval logger.debug(f"Cleanup scheduler sleeping for {interval_seconds} seconds") await asyncio.sleep(interval_seconds) except asyncio.CancelledError: logger.info("Cleanup scheduler loop cancelled") break except Exception as e: logger.exception(f"Error in cleanup scheduler loop: {e}") # Wait a bit before retrying to avoid tight error loops await asyncio.sleep(60) async def _execute_cleanup(self): """Execute the cleanup task""" logger.info("Starting scheduled cleanup...") self._last_run = datetime.utcnow() # Run cleanup in thread pool to avoid blocking loop = asyncio.get_event_loop() result = await loop.run_in_executor(None, self._run_cleanup_sync) self._last_result = result logger.info( f"Scheduled cleanup completed: {result.get('total_files_deleted', 0)} files deleted, " f"{result.get('total_bytes_freed', 0)} bytes freed" ) def _run_cleanup_sync(self) -> dict: """Synchronous cleanup execution (runs in thread pool)""" db: Session = SessionLocal() try: result = cleanup_service.cleanup_all_users( db=db, max_files_per_user=settings.max_files_per_user ) return result except Exception as e: logger.exception(f"Cleanup execution failed: {e}") return { "error": str(e), "timestamp": datetime.utcnow().isoformat() } finally: db.close() async def run_now(self) -> dict: """Trigger immediate cleanup (outside of scheduled interval)""" logger.info("Manual cleanup triggered") await self._execute_cleanup() return self._last_result or {} # Global scheduler instance _scheduler: Optional[CleanupScheduler] = None def get_cleanup_scheduler() -> CleanupScheduler: """Get the global cleanup scheduler instance""" global _scheduler if _scheduler is None: _scheduler = CleanupScheduler() return _scheduler async def start_cleanup_scheduler(): """Start the global cleanup scheduler""" scheduler = get_cleanup_scheduler() await scheduler.start() async def stop_cleanup_scheduler(): """Stop the global cleanup scheduler""" scheduler = get_cleanup_scheduler() await scheduler.stop()