feat: add storage cleanup mechanism with soft delete and auto scheduler

- Add soft delete (deleted_at column) to preserve task records for statistics
- Implement cleanup service to delete old files while keeping DB records
- Add automatic cleanup scheduler (configurable interval, default 24h)
- Add admin endpoints: storage stats, cleanup trigger, scheduler status
- Update task service with admin views (include deleted/files_deleted)
- Add frontend storage management UI in admin dashboard
- Add i18n translations for storage management

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-14 12:41:01 +08:00
parent 81a0a3ab0f
commit 73112db055
23 changed files with 1359 additions and 634 deletions

View File

@@ -0,0 +1,173 @@
"""
Tool_OCR - Cleanup Scheduler
Background scheduler for periodic file cleanup
"""
import asyncio
import logging
from datetime import datetime
from typing import Optional
from sqlalchemy.orm import Session
from app.core.config import settings
from app.core.database import SessionLocal
from app.services.cleanup_service import cleanup_service
logger = logging.getLogger(__name__)
class CleanupScheduler:
"""
Background scheduler for periodic file cleanup.
Uses asyncio for non-blocking background execution.
"""
def __init__(self):
self._task: Optional[asyncio.Task] = None
self._running: bool = False
self._last_run: Optional[datetime] = None
self._next_run: Optional[datetime] = None
self._last_result: Optional[dict] = None
@property
def is_running(self) -> bool:
"""Check if scheduler is running"""
return self._running and self._task is not None and not self._task.done()
@property
def status(self) -> dict:
"""Get scheduler status"""
return {
"enabled": settings.cleanup_enabled,
"running": self.is_running,
"interval_hours": settings.cleanup_interval_hours,
"max_files_per_user": settings.max_files_per_user,
"last_run": self._last_run.isoformat() if self._last_run else None,
"next_run": self._next_run.isoformat() if self._next_run else None,
"last_result": self._last_result
}
async def start(self):
"""Start the cleanup scheduler"""
if not settings.cleanup_enabled:
logger.info("Cleanup scheduler is disabled in configuration")
return
if self.is_running:
logger.warning("Cleanup scheduler is already running")
return
self._running = True
self._task = asyncio.create_task(self._run_loop())
logger.info(
f"Cleanup scheduler started (interval: {settings.cleanup_interval_hours}h, "
f"max_files_per_user: {settings.max_files_per_user})"
)
async def stop(self):
"""Stop the cleanup scheduler"""
self._running = False
if self._task is not None:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
self._task = None
logger.info("Cleanup scheduler stopped")
async def _run_loop(self):
"""Main scheduler loop"""
interval_seconds = settings.cleanup_interval_hours * 3600
while self._running:
try:
# Calculate next run time
self._next_run = datetime.utcnow()
# Run cleanup
await self._execute_cleanup()
# Update next run time after successful execution
self._next_run = datetime.utcnow()
self._next_run = self._next_run.replace(
hour=(self._next_run.hour + settings.cleanup_interval_hours) % 24
)
# Wait for next interval
logger.debug(f"Cleanup scheduler sleeping for {interval_seconds} seconds")
await asyncio.sleep(interval_seconds)
except asyncio.CancelledError:
logger.info("Cleanup scheduler loop cancelled")
break
except Exception as e:
logger.exception(f"Error in cleanup scheduler loop: {e}")
# Wait a bit before retrying to avoid tight error loops
await asyncio.sleep(60)
async def _execute_cleanup(self):
"""Execute the cleanup task"""
logger.info("Starting scheduled cleanup...")
self._last_run = datetime.utcnow()
# Run cleanup in thread pool to avoid blocking
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(None, self._run_cleanup_sync)
self._last_result = result
logger.info(
f"Scheduled cleanup completed: {result.get('total_files_deleted', 0)} files deleted, "
f"{result.get('total_bytes_freed', 0)} bytes freed"
)
def _run_cleanup_sync(self) -> dict:
"""Synchronous cleanup execution (runs in thread pool)"""
db: Session = SessionLocal()
try:
result = cleanup_service.cleanup_all_users(
db=db,
max_files_per_user=settings.max_files_per_user
)
return result
except Exception as e:
logger.exception(f"Cleanup execution failed: {e}")
return {
"error": str(e),
"timestamp": datetime.utcnow().isoformat()
}
finally:
db.close()
async def run_now(self) -> dict:
"""Trigger immediate cleanup (outside of scheduled interval)"""
logger.info("Manual cleanup triggered")
await self._execute_cleanup()
return self._last_result or {}
# Global scheduler instance
_scheduler: Optional[CleanupScheduler] = None
def get_cleanup_scheduler() -> CleanupScheduler:
"""Get the global cleanup scheduler instance"""
global _scheduler
if _scheduler is None:
_scheduler = CleanupScheduler()
return _scheduler
async def start_cleanup_scheduler():
"""Start the global cleanup scheduler"""
scheduler = get_cleanup_scheduler()
await scheduler.start()
async def stop_cleanup_scheduler():
"""Stop the global cleanup scheduler"""
scheduler = get_cleanup_scheduler()
await scheduler.stop()