From 7233e9cb7b90b634a5c33f7430a181becb315a5a Mon Sep 17 00:00:00 2001 From: egg Date: Sun, 14 Dec 2025 15:40:31 +0800 Subject: [PATCH] fix: logging, warnings, and soft-delete consistency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix duplicate logging in multi-worker mode with file lock for cleanup scheduler - Add Pydantic V2 model_config to suppress protected_namespaces warning - Suppress PaddlePaddle ccache warnings - Fix admin.py using non-existent User.username (now uses email) - Fix get_user_stats to exclude soft-deleted tasks from statistics - Fix create_task to exclude soft-deleted tasks from user limit check - Change LOG_LEVEL default to INFO 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .env | 2 +- backend/app/core/config.py | 24 ++++---- backend/app/main.py | 69 +++++++++++++++++------ backend/app/routers/admin.py | 2 +- backend/app/services/cleanup_scheduler.py | 57 ++++++++++++++++++- backend/app/services/task_service.py | 21 ++++--- 6 files changed, 132 insertions(+), 43 deletions(-) diff --git a/.env b/.env index 563ed95..d791031 100644 --- a/.env +++ b/.env @@ -112,5 +112,5 @@ TASK_QUEUE_TYPE=memory CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173 # ===== Logging Configuration ===== -LOG_LEVEL=DEBUG +LOG_LEVEL=INFO LOG_FILE=./logs/app.log diff --git a/backend/app/core/config.py b/backend/app/core/config.py index eb03da1..90c540e 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -38,6 +38,17 @@ def _default_font_dir() -> str: class Settings(BaseSettings): """Application settings loaded from environment variables""" + # Pydantic V2 configuration + model_config = { + # Look for .env file in project root (one level up from backend/) + "env_file": str(PROJECT_ROOT / ".env"), + "env_file_encoding": "utf-8", + "case_sensitive": False, + "extra": "ignore", + # Allow field names starting with "model_" without conflict + "protected_namespaces": (), + } + # ===== Database Configuration ===== mysql_host: str = Field(default="localhost") mysql_port: int = Field(default=3306) @@ -552,19 +563,6 @@ class Settings(BaseSettings): return self - class Config: - # Look for .env files in project root (one level up from backend/) - # .env.local has higher priority and overrides .env - env_file = ( - str(PROJECT_ROOT / ".env"), - str(PROJECT_ROOT / ".env.local"), - ) - env_file_encoding = "utf-8" - case_sensitive = False - # Ignore extra environment variables not defined in Settings - # This allows backwards compatibility with old .env files (e.g., Docker) - extra = "ignore" - def _resolve_path(self, path_value: str) -> Path: """ Convert relative paths to backend-rooted absolute paths. diff --git a/backend/app/main.py b/backend/app/main.py index 170d4d3..9c78fd8 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -3,6 +3,11 @@ Tool_OCR - FastAPI Application Entry Point (V2) Main application setup with CORS, routes, and startup/shutdown events """ +import warnings +# Suppress noisy third-party warnings +warnings.filterwarnings("ignore", message=".*ccache.*") +warnings.filterwarnings("ignore", message=".*No ccache found.*") + from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from contextlib import asynccontextmanager @@ -107,27 +112,55 @@ def setup_signal_handlers(): # Ensure log directory exists before configuring logging Path(settings.log_file).parent.mkdir(parents=True, exist_ok=True) -# Configure logging - Force configuration to override uvicorn's settings -logging.basicConfig( - level=getattr(logging, settings.log_level), - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - handlers=[ - logging.FileHandler(settings.log_file), - logging.StreamHandler(), - ], - force=True # Force reconfiguration (Python 3.8+) -) +# Configure logging - prevent duplicate handlers +def setup_logging(): + """Configure logging with deduplication""" + log_level = getattr(logging, settings.log_level, logging.INFO) + log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" -# Also explicitly configure root logger and app loggers -root_logger = logging.getLogger() -root_logger.setLevel(getattr(logging, settings.log_level)) + # Get root logger + root_logger = logging.getLogger() -# Configure app-specific loggers -for logger_name in ['app', 'app.services', 'app.services.pdf_generator_service', 'app.services.ocr_service']: - app_logger = logging.getLogger(logger_name) - app_logger.setLevel(getattr(logging, settings.log_level)) - app_logger.propagate = True # Ensure logs propagate to root logger + # Clear existing handlers to prevent duplicates + root_logger.handlers.clear() + # Set root level + root_logger.setLevel(log_level) + + # Create formatter + formatter = logging.Formatter(log_format) + + # File handler + file_handler = logging.FileHandler(settings.log_file) + file_handler.setLevel(log_level) + file_handler.setFormatter(formatter) + root_logger.addHandler(file_handler) + + # Stream handler (console) + stream_handler = logging.StreamHandler() + stream_handler.setLevel(log_level) + stream_handler.setFormatter(formatter) + root_logger.addHandler(stream_handler) + + # Suppress uvicorn's duplicate access logs + logging.getLogger("uvicorn.access").handlers.clear() + logging.getLogger("uvicorn.error").handlers.clear() + + # Configure uvicorn loggers to use our handlers + for uvi_logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error"]: + uvi_logger = logging.getLogger(uvi_logger_name) + uvi_logger.handlers.clear() + uvi_logger.propagate = True + uvi_logger.setLevel(log_level) + + # Configure app-specific loggers (no separate handlers, just propagate) + for logger_name in ['app', 'app.services', 'app.routers']: + app_logger = logging.getLogger(logger_name) + app_logger.handlers.clear() # Remove any existing handlers + app_logger.setLevel(log_level) + app_logger.propagate = True # Propagate to root logger + +setup_logging() logger = logging.getLogger(__name__) diff --git a/backend/app/routers/admin.py b/backend/app/routers/admin.py index d8916f8..f5ae507 100644 --- a/backend/app/routers/admin.py +++ b/backend/app/routers/admin.py @@ -401,7 +401,7 @@ async def trigger_cleanup( result = cleanup_service.cleanup_all_users(db, max_files_per_user=files_to_keep) logger.info( - f"Manual cleanup triggered by admin {admin_user.username}: " + f"Manual cleanup triggered by admin {admin_user.email}: " f"{result['total_files_deleted']} files, {result['total_bytes_freed']} bytes" ) diff --git a/backend/app/services/cleanup_scheduler.py b/backend/app/services/cleanup_scheduler.py index 1414652..820b303 100644 --- a/backend/app/services/cleanup_scheduler.py +++ b/backend/app/services/cleanup_scheduler.py @@ -5,7 +5,10 @@ Background scheduler for periodic file cleanup import asyncio import logging +import os +import fcntl from datetime import datetime +from pathlib import Path from typing import Optional from sqlalchemy.orm import Session @@ -16,11 +19,50 @@ from app.services.cleanup_service import cleanup_service logger = logging.getLogger(__name__) +# Lock file path for multi-worker coordination +_LOCK_FILE = Path(settings.log_file).parent / ".cleanup_scheduler.lock" +_lock_fd = None + + +def _try_acquire_lock() -> bool: + """ + Try to acquire exclusive lock for scheduler. + Only one worker should run the scheduler. + Returns True if lock acquired, False otherwise. + """ + global _lock_fd + try: + _LOCK_FILE.parent.mkdir(parents=True, exist_ok=True) + _lock_fd = open(_LOCK_FILE, 'w') + fcntl.flock(_lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) + _lock_fd.write(str(os.getpid())) + _lock_fd.flush() + return True + except (IOError, OSError): + # Lock is held by another process + if _lock_fd: + _lock_fd.close() + _lock_fd = None + return False + + +def _release_lock(): + """Release the scheduler lock""" + global _lock_fd + if _lock_fd: + try: + fcntl.flock(_lock_fd.fileno(), fcntl.LOCK_UN) + _lock_fd.close() + except Exception: + pass + _lock_fd = None + class CleanupScheduler: """ Background scheduler for periodic file cleanup. Uses asyncio for non-blocking background execution. + Uses file lock to ensure only one instance runs across multiple workers. """ def __init__(self): @@ -29,6 +71,7 @@ class CleanupScheduler: self._last_run: Optional[datetime] = None self._next_run: Optional[datetime] = None self._last_result: Optional[dict] = None + self._has_lock: bool = False @property def is_running(self) -> bool: @@ -55,9 +98,15 @@ class CleanupScheduler: return if self.is_running: - logger.warning("Cleanup scheduler is already running") + logger.debug("Cleanup scheduler is already running") return + # Try to acquire lock - only one worker should run the scheduler + if not _try_acquire_lock(): + logger.debug("Another worker is running the cleanup scheduler, skipping") + return + + self._has_lock = True self._running = True self._task = asyncio.create_task(self._run_loop()) logger.info( @@ -77,7 +126,11 @@ class CleanupScheduler: pass self._task = None - logger.info("Cleanup scheduler stopped") + # Release the lock if we had it + if self._has_lock: + _release_lock() + self._has_lock = False + logger.info("Cleanup scheduler stopped") async def _run_loop(self): """Main scheduler loop""" diff --git a/backend/app/services/task_service.py b/backend/app/services/task_service.py index 8b2689a..1b00d93 100644 --- a/backend/app/services/task_service.py +++ b/backend/app/services/task_service.py @@ -41,9 +41,11 @@ class TaskService: # Generate unique task ID task_id = str(uuid.uuid4()) - # Check user's task limit + # Check user's task limit (excluding soft-deleted tasks) if settings.max_tasks_per_user > 0: - user_task_count = db.query(Task).filter(Task.user_id == user_id).count() + user_task_count = db.query(Task).filter( + and_(Task.user_id == user_id, Task.deleted_at.is_(None)) + ).count() if user_task_count >= settings.max_tasks_per_user: # Auto-delete oldest completed tasks to make room self._cleanup_old_tasks(db, user_id, limit=10) @@ -359,7 +361,7 @@ class TaskService: def get_user_stats(self, db: Session, user_id: int) -> dict: """ - Get statistics for a user's tasks + Get statistics for a user's tasks (excluding soft-deleted tasks) Args: db: Database session @@ -368,29 +370,32 @@ class TaskService: Returns: Dictionary with task statistics """ - total = db.query(Task).filter(Task.user_id == user_id).count() + # Base filter: user's non-deleted tasks + base_filter = and_(Task.user_id == user_id, Task.deleted_at.is_(None)) + + total = db.query(Task).filter(base_filter).count() pending = ( db.query(Task) - .filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PENDING)) + .filter(and_(base_filter, Task.status == TaskStatus.PENDING)) .count() ) processing = ( db.query(Task) - .filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PROCESSING)) + .filter(and_(base_filter, Task.status == TaskStatus.PROCESSING)) .count() ) completed = ( db.query(Task) - .filter(and_(Task.user_id == user_id, Task.status == TaskStatus.COMPLETED)) + .filter(and_(base_filter, Task.status == TaskStatus.COMPLETED)) .count() ) failed = ( db.query(Task) - .filter(and_(Task.user_id == user_id, Task.status == TaskStatus.FAILED)) + .filter(and_(base_filter, Task.status == TaskStatus.FAILED)) .count() )