fix: logging, warnings, and soft-delete consistency
- Fix duplicate logging in multi-worker mode with file lock for cleanup scheduler - Add Pydantic V2 model_config to suppress protected_namespaces warning - Suppress PaddlePaddle ccache warnings - Fix admin.py using non-existent User.username (now uses email) - Fix get_user_stats to exclude soft-deleted tasks from statistics - Fix create_task to exclude soft-deleted tasks from user limit check - Change LOG_LEVEL default to INFO 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2
.env
2
.env
@@ -112,5 +112,5 @@ TASK_QUEUE_TYPE=memory
|
||||
CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173
|
||||
|
||||
# ===== Logging Configuration =====
|
||||
LOG_LEVEL=DEBUG
|
||||
LOG_LEVEL=INFO
|
||||
LOG_FILE=./logs/app.log
|
||||
|
||||
@@ -38,6 +38,17 @@ def _default_font_dir() -> str:
|
||||
class Settings(BaseSettings):
|
||||
"""Application settings loaded from environment variables"""
|
||||
|
||||
# Pydantic V2 configuration
|
||||
model_config = {
|
||||
# Look for .env file in project root (one level up from backend/)
|
||||
"env_file": str(PROJECT_ROOT / ".env"),
|
||||
"env_file_encoding": "utf-8",
|
||||
"case_sensitive": False,
|
||||
"extra": "ignore",
|
||||
# Allow field names starting with "model_" without conflict
|
||||
"protected_namespaces": (),
|
||||
}
|
||||
|
||||
# ===== Database Configuration =====
|
||||
mysql_host: str = Field(default="localhost")
|
||||
mysql_port: int = Field(default=3306)
|
||||
@@ -552,19 +563,6 @@ class Settings(BaseSettings):
|
||||
|
||||
return self
|
||||
|
||||
class Config:
|
||||
# Look for .env files in project root (one level up from backend/)
|
||||
# .env.local has higher priority and overrides .env
|
||||
env_file = (
|
||||
str(PROJECT_ROOT / ".env"),
|
||||
str(PROJECT_ROOT / ".env.local"),
|
||||
)
|
||||
env_file_encoding = "utf-8"
|
||||
case_sensitive = False
|
||||
# Ignore extra environment variables not defined in Settings
|
||||
# This allows backwards compatibility with old .env files (e.g., Docker)
|
||||
extra = "ignore"
|
||||
|
||||
def _resolve_path(self, path_value: str) -> Path:
|
||||
"""
|
||||
Convert relative paths to backend-rooted absolute paths.
|
||||
|
||||
@@ -3,6 +3,11 @@ Tool_OCR - FastAPI Application Entry Point (V2)
|
||||
Main application setup with CORS, routes, and startup/shutdown events
|
||||
"""
|
||||
|
||||
import warnings
|
||||
# Suppress noisy third-party warnings
|
||||
warnings.filterwarnings("ignore", message=".*ccache.*")
|
||||
warnings.filterwarnings("ignore", message=".*No ccache found.*")
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
@@ -107,27 +112,55 @@ def setup_signal_handlers():
|
||||
# Ensure log directory exists before configuring logging
|
||||
Path(settings.log_file).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Configure logging - Force configuration to override uvicorn's settings
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, settings.log_level),
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(settings.log_file),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
force=True # Force reconfiguration (Python 3.8+)
|
||||
)
|
||||
# Configure logging - prevent duplicate handlers
|
||||
def setup_logging():
|
||||
"""Configure logging with deduplication"""
|
||||
log_level = getattr(logging, settings.log_level, logging.INFO)
|
||||
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
|
||||
# Also explicitly configure root logger and app loggers
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(getattr(logging, settings.log_level))
|
||||
# Get root logger
|
||||
root_logger = logging.getLogger()
|
||||
|
||||
# Configure app-specific loggers
|
||||
for logger_name in ['app', 'app.services', 'app.services.pdf_generator_service', 'app.services.ocr_service']:
|
||||
# Clear existing handlers to prevent duplicates
|
||||
root_logger.handlers.clear()
|
||||
|
||||
# Set root level
|
||||
root_logger.setLevel(log_level)
|
||||
|
||||
# Create formatter
|
||||
formatter = logging.Formatter(log_format)
|
||||
|
||||
# File handler
|
||||
file_handler = logging.FileHandler(settings.log_file)
|
||||
file_handler.setLevel(log_level)
|
||||
file_handler.setFormatter(formatter)
|
||||
root_logger.addHandler(file_handler)
|
||||
|
||||
# Stream handler (console)
|
||||
stream_handler = logging.StreamHandler()
|
||||
stream_handler.setLevel(log_level)
|
||||
stream_handler.setFormatter(formatter)
|
||||
root_logger.addHandler(stream_handler)
|
||||
|
||||
# Suppress uvicorn's duplicate access logs
|
||||
logging.getLogger("uvicorn.access").handlers.clear()
|
||||
logging.getLogger("uvicorn.error").handlers.clear()
|
||||
|
||||
# Configure uvicorn loggers to use our handlers
|
||||
for uvi_logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error"]:
|
||||
uvi_logger = logging.getLogger(uvi_logger_name)
|
||||
uvi_logger.handlers.clear()
|
||||
uvi_logger.propagate = True
|
||||
uvi_logger.setLevel(log_level)
|
||||
|
||||
# Configure app-specific loggers (no separate handlers, just propagate)
|
||||
for logger_name in ['app', 'app.services', 'app.routers']:
|
||||
app_logger = logging.getLogger(logger_name)
|
||||
app_logger.setLevel(getattr(logging, settings.log_level))
|
||||
app_logger.propagate = True # Ensure logs propagate to root logger
|
||||
app_logger.handlers.clear() # Remove any existing handlers
|
||||
app_logger.setLevel(log_level)
|
||||
app_logger.propagate = True # Propagate to root logger
|
||||
|
||||
setup_logging()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@@ -401,7 +401,7 @@ async def trigger_cleanup(
|
||||
result = cleanup_service.cleanup_all_users(db, max_files_per_user=files_to_keep)
|
||||
|
||||
logger.info(
|
||||
f"Manual cleanup triggered by admin {admin_user.username}: "
|
||||
f"Manual cleanup triggered by admin {admin_user.email}: "
|
||||
f"{result['total_files_deleted']} files, {result['total_bytes_freed']} bytes"
|
||||
)
|
||||
|
||||
|
||||
@@ -5,7 +5,10 @@ Background scheduler for periodic file cleanup
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import fcntl
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -16,11 +19,50 @@ from app.services.cleanup_service import cleanup_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Lock file path for multi-worker coordination
|
||||
_LOCK_FILE = Path(settings.log_file).parent / ".cleanup_scheduler.lock"
|
||||
_lock_fd = None
|
||||
|
||||
|
||||
def _try_acquire_lock() -> bool:
|
||||
"""
|
||||
Try to acquire exclusive lock for scheduler.
|
||||
Only one worker should run the scheduler.
|
||||
Returns True if lock acquired, False otherwise.
|
||||
"""
|
||||
global _lock_fd
|
||||
try:
|
||||
_LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
_lock_fd = open(_LOCK_FILE, 'w')
|
||||
fcntl.flock(_lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
_lock_fd.write(str(os.getpid()))
|
||||
_lock_fd.flush()
|
||||
return True
|
||||
except (IOError, OSError):
|
||||
# Lock is held by another process
|
||||
if _lock_fd:
|
||||
_lock_fd.close()
|
||||
_lock_fd = None
|
||||
return False
|
||||
|
||||
|
||||
def _release_lock():
|
||||
"""Release the scheduler lock"""
|
||||
global _lock_fd
|
||||
if _lock_fd:
|
||||
try:
|
||||
fcntl.flock(_lock_fd.fileno(), fcntl.LOCK_UN)
|
||||
_lock_fd.close()
|
||||
except Exception:
|
||||
pass
|
||||
_lock_fd = None
|
||||
|
||||
|
||||
class CleanupScheduler:
|
||||
"""
|
||||
Background scheduler for periodic file cleanup.
|
||||
Uses asyncio for non-blocking background execution.
|
||||
Uses file lock to ensure only one instance runs across multiple workers.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
@@ -29,6 +71,7 @@ class CleanupScheduler:
|
||||
self._last_run: Optional[datetime] = None
|
||||
self._next_run: Optional[datetime] = None
|
||||
self._last_result: Optional[dict] = None
|
||||
self._has_lock: bool = False
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
@@ -55,9 +98,15 @@ class CleanupScheduler:
|
||||
return
|
||||
|
||||
if self.is_running:
|
||||
logger.warning("Cleanup scheduler is already running")
|
||||
logger.debug("Cleanup scheduler is already running")
|
||||
return
|
||||
|
||||
# Try to acquire lock - only one worker should run the scheduler
|
||||
if not _try_acquire_lock():
|
||||
logger.debug("Another worker is running the cleanup scheduler, skipping")
|
||||
return
|
||||
|
||||
self._has_lock = True
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._run_loop())
|
||||
logger.info(
|
||||
@@ -77,6 +126,10 @@ class CleanupScheduler:
|
||||
pass
|
||||
self._task = None
|
||||
|
||||
# Release the lock if we had it
|
||||
if self._has_lock:
|
||||
_release_lock()
|
||||
self._has_lock = False
|
||||
logger.info("Cleanup scheduler stopped")
|
||||
|
||||
async def _run_loop(self):
|
||||
|
||||
@@ -41,9 +41,11 @@ class TaskService:
|
||||
# Generate unique task ID
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
# Check user's task limit
|
||||
# Check user's task limit (excluding soft-deleted tasks)
|
||||
if settings.max_tasks_per_user > 0:
|
||||
user_task_count = db.query(Task).filter(Task.user_id == user_id).count()
|
||||
user_task_count = db.query(Task).filter(
|
||||
and_(Task.user_id == user_id, Task.deleted_at.is_(None))
|
||||
).count()
|
||||
if user_task_count >= settings.max_tasks_per_user:
|
||||
# Auto-delete oldest completed tasks to make room
|
||||
self._cleanup_old_tasks(db, user_id, limit=10)
|
||||
@@ -359,7 +361,7 @@ class TaskService:
|
||||
|
||||
def get_user_stats(self, db: Session, user_id: int) -> dict:
|
||||
"""
|
||||
Get statistics for a user's tasks
|
||||
Get statistics for a user's tasks (excluding soft-deleted tasks)
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
@@ -368,29 +370,32 @@ class TaskService:
|
||||
Returns:
|
||||
Dictionary with task statistics
|
||||
"""
|
||||
total = db.query(Task).filter(Task.user_id == user_id).count()
|
||||
# Base filter: user's non-deleted tasks
|
||||
base_filter = and_(Task.user_id == user_id, Task.deleted_at.is_(None))
|
||||
|
||||
total = db.query(Task).filter(base_filter).count()
|
||||
|
||||
pending = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PENDING))
|
||||
.filter(and_(base_filter, Task.status == TaskStatus.PENDING))
|
||||
.count()
|
||||
)
|
||||
|
||||
processing = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PROCESSING))
|
||||
.filter(and_(base_filter, Task.status == TaskStatus.PROCESSING))
|
||||
.count()
|
||||
)
|
||||
|
||||
completed = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.COMPLETED))
|
||||
.filter(and_(base_filter, Task.status == TaskStatus.COMPLETED))
|
||||
.count()
|
||||
)
|
||||
|
||||
failed = (
|
||||
db.query(Task)
|
||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.FAILED))
|
||||
.filter(and_(base_filter, Task.status == TaskStatus.FAILED))
|
||||
.count()
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user