fix: logging, warnings, and soft-delete consistency
- Fix duplicate logging in multi-worker mode with file lock for cleanup scheduler - Add Pydantic V2 model_config to suppress protected_namespaces warning - Suppress PaddlePaddle ccache warnings - Fix admin.py using non-existent User.username (now uses email) - Fix get_user_stats to exclude soft-deleted tasks from statistics - Fix create_task to exclude soft-deleted tasks from user limit check - Change LOG_LEVEL default to INFO 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2
.env
2
.env
@@ -112,5 +112,5 @@ TASK_QUEUE_TYPE=memory
|
|||||||
CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173
|
CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173
|
||||||
|
|
||||||
# ===== Logging Configuration =====
|
# ===== Logging Configuration =====
|
||||||
LOG_LEVEL=DEBUG
|
LOG_LEVEL=INFO
|
||||||
LOG_FILE=./logs/app.log
|
LOG_FILE=./logs/app.log
|
||||||
|
|||||||
@@ -38,6 +38,17 @@ def _default_font_dir() -> str:
|
|||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
"""Application settings loaded from environment variables"""
|
"""Application settings loaded from environment variables"""
|
||||||
|
|
||||||
|
# Pydantic V2 configuration
|
||||||
|
model_config = {
|
||||||
|
# Look for .env file in project root (one level up from backend/)
|
||||||
|
"env_file": str(PROJECT_ROOT / ".env"),
|
||||||
|
"env_file_encoding": "utf-8",
|
||||||
|
"case_sensitive": False,
|
||||||
|
"extra": "ignore",
|
||||||
|
# Allow field names starting with "model_" without conflict
|
||||||
|
"protected_namespaces": (),
|
||||||
|
}
|
||||||
|
|
||||||
# ===== Database Configuration =====
|
# ===== Database Configuration =====
|
||||||
mysql_host: str = Field(default="localhost")
|
mysql_host: str = Field(default="localhost")
|
||||||
mysql_port: int = Field(default=3306)
|
mysql_port: int = Field(default=3306)
|
||||||
@@ -552,19 +563,6 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
class Config:
|
|
||||||
# Look for .env files in project root (one level up from backend/)
|
|
||||||
# .env.local has higher priority and overrides .env
|
|
||||||
env_file = (
|
|
||||||
str(PROJECT_ROOT / ".env"),
|
|
||||||
str(PROJECT_ROOT / ".env.local"),
|
|
||||||
)
|
|
||||||
env_file_encoding = "utf-8"
|
|
||||||
case_sensitive = False
|
|
||||||
# Ignore extra environment variables not defined in Settings
|
|
||||||
# This allows backwards compatibility with old .env files (e.g., Docker)
|
|
||||||
extra = "ignore"
|
|
||||||
|
|
||||||
def _resolve_path(self, path_value: str) -> Path:
|
def _resolve_path(self, path_value: str) -> Path:
|
||||||
"""
|
"""
|
||||||
Convert relative paths to backend-rooted absolute paths.
|
Convert relative paths to backend-rooted absolute paths.
|
||||||
|
|||||||
@@ -3,6 +3,11 @@ Tool_OCR - FastAPI Application Entry Point (V2)
|
|||||||
Main application setup with CORS, routes, and startup/shutdown events
|
Main application setup with CORS, routes, and startup/shutdown events
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
# Suppress noisy third-party warnings
|
||||||
|
warnings.filterwarnings("ignore", message=".*ccache.*")
|
||||||
|
warnings.filterwarnings("ignore", message=".*No ccache found.*")
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
@@ -107,27 +112,55 @@ def setup_signal_handlers():
|
|||||||
# Ensure log directory exists before configuring logging
|
# Ensure log directory exists before configuring logging
|
||||||
Path(settings.log_file).parent.mkdir(parents=True, exist_ok=True)
|
Path(settings.log_file).parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Configure logging - Force configuration to override uvicorn's settings
|
# Configure logging - prevent duplicate handlers
|
||||||
logging.basicConfig(
|
def setup_logging():
|
||||||
level=getattr(logging, settings.log_level),
|
"""Configure logging with deduplication"""
|
||||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
log_level = getattr(logging, settings.log_level, logging.INFO)
|
||||||
handlers=[
|
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
logging.FileHandler(settings.log_file),
|
|
||||||
logging.StreamHandler(),
|
|
||||||
],
|
|
||||||
force=True # Force reconfiguration (Python 3.8+)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Also explicitly configure root logger and app loggers
|
# Get root logger
|
||||||
root_logger = logging.getLogger()
|
root_logger = logging.getLogger()
|
||||||
root_logger.setLevel(getattr(logging, settings.log_level))
|
|
||||||
|
|
||||||
# Configure app-specific loggers
|
# Clear existing handlers to prevent duplicates
|
||||||
for logger_name in ['app', 'app.services', 'app.services.pdf_generator_service', 'app.services.ocr_service']:
|
root_logger.handlers.clear()
|
||||||
app_logger = logging.getLogger(logger_name)
|
|
||||||
app_logger.setLevel(getattr(logging, settings.log_level))
|
|
||||||
app_logger.propagate = True # Ensure logs propagate to root logger
|
|
||||||
|
|
||||||
|
# Set root level
|
||||||
|
root_logger.setLevel(log_level)
|
||||||
|
|
||||||
|
# Create formatter
|
||||||
|
formatter = logging.Formatter(log_format)
|
||||||
|
|
||||||
|
# File handler
|
||||||
|
file_handler = logging.FileHandler(settings.log_file)
|
||||||
|
file_handler.setLevel(log_level)
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
root_logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
# Stream handler (console)
|
||||||
|
stream_handler = logging.StreamHandler()
|
||||||
|
stream_handler.setLevel(log_level)
|
||||||
|
stream_handler.setFormatter(formatter)
|
||||||
|
root_logger.addHandler(stream_handler)
|
||||||
|
|
||||||
|
# Suppress uvicorn's duplicate access logs
|
||||||
|
logging.getLogger("uvicorn.access").handlers.clear()
|
||||||
|
logging.getLogger("uvicorn.error").handlers.clear()
|
||||||
|
|
||||||
|
# Configure uvicorn loggers to use our handlers
|
||||||
|
for uvi_logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error"]:
|
||||||
|
uvi_logger = logging.getLogger(uvi_logger_name)
|
||||||
|
uvi_logger.handlers.clear()
|
||||||
|
uvi_logger.propagate = True
|
||||||
|
uvi_logger.setLevel(log_level)
|
||||||
|
|
||||||
|
# Configure app-specific loggers (no separate handlers, just propagate)
|
||||||
|
for logger_name in ['app', 'app.services', 'app.routers']:
|
||||||
|
app_logger = logging.getLogger(logger_name)
|
||||||
|
app_logger.handlers.clear() # Remove any existing handlers
|
||||||
|
app_logger.setLevel(log_level)
|
||||||
|
app_logger.propagate = True # Propagate to root logger
|
||||||
|
|
||||||
|
setup_logging()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -401,7 +401,7 @@ async def trigger_cleanup(
|
|||||||
result = cleanup_service.cleanup_all_users(db, max_files_per_user=files_to_keep)
|
result = cleanup_service.cleanup_all_users(db, max_files_per_user=files_to_keep)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Manual cleanup triggered by admin {admin_user.username}: "
|
f"Manual cleanup triggered by admin {admin_user.email}: "
|
||||||
f"{result['total_files_deleted']} files, {result['total_bytes_freed']} bytes"
|
f"{result['total_files_deleted']} files, {result['total_bytes_freed']} bytes"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,10 @@ Background scheduler for periodic file cleanup
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import fcntl
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
@@ -16,11 +19,50 @@ from app.services.cleanup_service import cleanup_service
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Lock file path for multi-worker coordination
|
||||||
|
_LOCK_FILE = Path(settings.log_file).parent / ".cleanup_scheduler.lock"
|
||||||
|
_lock_fd = None
|
||||||
|
|
||||||
|
|
||||||
|
def _try_acquire_lock() -> bool:
|
||||||
|
"""
|
||||||
|
Try to acquire exclusive lock for scheduler.
|
||||||
|
Only one worker should run the scheduler.
|
||||||
|
Returns True if lock acquired, False otherwise.
|
||||||
|
"""
|
||||||
|
global _lock_fd
|
||||||
|
try:
|
||||||
|
_LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
_lock_fd = open(_LOCK_FILE, 'w')
|
||||||
|
fcntl.flock(_lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
_lock_fd.write(str(os.getpid()))
|
||||||
|
_lock_fd.flush()
|
||||||
|
return True
|
||||||
|
except (IOError, OSError):
|
||||||
|
# Lock is held by another process
|
||||||
|
if _lock_fd:
|
||||||
|
_lock_fd.close()
|
||||||
|
_lock_fd = None
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _release_lock():
|
||||||
|
"""Release the scheduler lock"""
|
||||||
|
global _lock_fd
|
||||||
|
if _lock_fd:
|
||||||
|
try:
|
||||||
|
fcntl.flock(_lock_fd.fileno(), fcntl.LOCK_UN)
|
||||||
|
_lock_fd.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
_lock_fd = None
|
||||||
|
|
||||||
|
|
||||||
class CleanupScheduler:
|
class CleanupScheduler:
|
||||||
"""
|
"""
|
||||||
Background scheduler for periodic file cleanup.
|
Background scheduler for periodic file cleanup.
|
||||||
Uses asyncio for non-blocking background execution.
|
Uses asyncio for non-blocking background execution.
|
||||||
|
Uses file lock to ensure only one instance runs across multiple workers.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -29,6 +71,7 @@ class CleanupScheduler:
|
|||||||
self._last_run: Optional[datetime] = None
|
self._last_run: Optional[datetime] = None
|
||||||
self._next_run: Optional[datetime] = None
|
self._next_run: Optional[datetime] = None
|
||||||
self._last_result: Optional[dict] = None
|
self._last_result: Optional[dict] = None
|
||||||
|
self._has_lock: bool = False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_running(self) -> bool:
|
def is_running(self) -> bool:
|
||||||
@@ -55,9 +98,15 @@ class CleanupScheduler:
|
|||||||
return
|
return
|
||||||
|
|
||||||
if self.is_running:
|
if self.is_running:
|
||||||
logger.warning("Cleanup scheduler is already running")
|
logger.debug("Cleanup scheduler is already running")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Try to acquire lock - only one worker should run the scheduler
|
||||||
|
if not _try_acquire_lock():
|
||||||
|
logger.debug("Another worker is running the cleanup scheduler, skipping")
|
||||||
|
return
|
||||||
|
|
||||||
|
self._has_lock = True
|
||||||
self._running = True
|
self._running = True
|
||||||
self._task = asyncio.create_task(self._run_loop())
|
self._task = asyncio.create_task(self._run_loop())
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -77,7 +126,11 @@ class CleanupScheduler:
|
|||||||
pass
|
pass
|
||||||
self._task = None
|
self._task = None
|
||||||
|
|
||||||
logger.info("Cleanup scheduler stopped")
|
# Release the lock if we had it
|
||||||
|
if self._has_lock:
|
||||||
|
_release_lock()
|
||||||
|
self._has_lock = False
|
||||||
|
logger.info("Cleanup scheduler stopped")
|
||||||
|
|
||||||
async def _run_loop(self):
|
async def _run_loop(self):
|
||||||
"""Main scheduler loop"""
|
"""Main scheduler loop"""
|
||||||
|
|||||||
@@ -41,9 +41,11 @@ class TaskService:
|
|||||||
# Generate unique task ID
|
# Generate unique task ID
|
||||||
task_id = str(uuid.uuid4())
|
task_id = str(uuid.uuid4())
|
||||||
|
|
||||||
# Check user's task limit
|
# Check user's task limit (excluding soft-deleted tasks)
|
||||||
if settings.max_tasks_per_user > 0:
|
if settings.max_tasks_per_user > 0:
|
||||||
user_task_count = db.query(Task).filter(Task.user_id == user_id).count()
|
user_task_count = db.query(Task).filter(
|
||||||
|
and_(Task.user_id == user_id, Task.deleted_at.is_(None))
|
||||||
|
).count()
|
||||||
if user_task_count >= settings.max_tasks_per_user:
|
if user_task_count >= settings.max_tasks_per_user:
|
||||||
# Auto-delete oldest completed tasks to make room
|
# Auto-delete oldest completed tasks to make room
|
||||||
self._cleanup_old_tasks(db, user_id, limit=10)
|
self._cleanup_old_tasks(db, user_id, limit=10)
|
||||||
@@ -359,7 +361,7 @@ class TaskService:
|
|||||||
|
|
||||||
def get_user_stats(self, db: Session, user_id: int) -> dict:
|
def get_user_stats(self, db: Session, user_id: int) -> dict:
|
||||||
"""
|
"""
|
||||||
Get statistics for a user's tasks
|
Get statistics for a user's tasks (excluding soft-deleted tasks)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db: Database session
|
db: Database session
|
||||||
@@ -368,29 +370,32 @@ class TaskService:
|
|||||||
Returns:
|
Returns:
|
||||||
Dictionary with task statistics
|
Dictionary with task statistics
|
||||||
"""
|
"""
|
||||||
total = db.query(Task).filter(Task.user_id == user_id).count()
|
# Base filter: user's non-deleted tasks
|
||||||
|
base_filter = and_(Task.user_id == user_id, Task.deleted_at.is_(None))
|
||||||
|
|
||||||
|
total = db.query(Task).filter(base_filter).count()
|
||||||
|
|
||||||
pending = (
|
pending = (
|
||||||
db.query(Task)
|
db.query(Task)
|
||||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PENDING))
|
.filter(and_(base_filter, Task.status == TaskStatus.PENDING))
|
||||||
.count()
|
.count()
|
||||||
)
|
)
|
||||||
|
|
||||||
processing = (
|
processing = (
|
||||||
db.query(Task)
|
db.query(Task)
|
||||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PROCESSING))
|
.filter(and_(base_filter, Task.status == TaskStatus.PROCESSING))
|
||||||
.count()
|
.count()
|
||||||
)
|
)
|
||||||
|
|
||||||
completed = (
|
completed = (
|
||||||
db.query(Task)
|
db.query(Task)
|
||||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.COMPLETED))
|
.filter(and_(base_filter, Task.status == TaskStatus.COMPLETED))
|
||||||
.count()
|
.count()
|
||||||
)
|
)
|
||||||
|
|
||||||
failed = (
|
failed = (
|
||||||
db.query(Task)
|
db.query(Task)
|
||||||
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.FAILED))
|
.filter(and_(base_filter, Task.status == TaskStatus.FAILED))
|
||||||
.count()
|
.count()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user