fix: logging, warnings, and soft-delete consistency

- Fix duplicate logging in multi-worker mode with file lock for cleanup scheduler
- Add Pydantic V2 model_config to suppress protected_namespaces warning
- Suppress PaddlePaddle ccache warnings
- Fix admin.py using non-existent User.username (now uses email)
- Fix get_user_stats to exclude soft-deleted tasks from statistics
- Fix create_task to exclude soft-deleted tasks from user limit check
- Change LOG_LEVEL default to INFO

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-14 15:40:31 +08:00
parent f46402f6c9
commit 7233e9cb7b
6 changed files with 132 additions and 43 deletions

2
.env
View File

@@ -112,5 +112,5 @@ TASK_QUEUE_TYPE=memory
CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173 CORS_ORIGINS=http://localhost:5173,http://127.0.0.1:5173
# ===== Logging Configuration ===== # ===== Logging Configuration =====
LOG_LEVEL=DEBUG LOG_LEVEL=INFO
LOG_FILE=./logs/app.log LOG_FILE=./logs/app.log

View File

@@ -38,6 +38,17 @@ def _default_font_dir() -> str:
class Settings(BaseSettings): class Settings(BaseSettings):
"""Application settings loaded from environment variables""" """Application settings loaded from environment variables"""
# Pydantic V2 configuration
model_config = {
# Look for .env file in project root (one level up from backend/)
"env_file": str(PROJECT_ROOT / ".env"),
"env_file_encoding": "utf-8",
"case_sensitive": False,
"extra": "ignore",
# Allow field names starting with "model_" without conflict
"protected_namespaces": (),
}
# ===== Database Configuration ===== # ===== Database Configuration =====
mysql_host: str = Field(default="localhost") mysql_host: str = Field(default="localhost")
mysql_port: int = Field(default=3306) mysql_port: int = Field(default=3306)
@@ -552,19 +563,6 @@ class Settings(BaseSettings):
return self return self
class Config:
# Look for .env files in project root (one level up from backend/)
# .env.local has higher priority and overrides .env
env_file = (
str(PROJECT_ROOT / ".env"),
str(PROJECT_ROOT / ".env.local"),
)
env_file_encoding = "utf-8"
case_sensitive = False
# Ignore extra environment variables not defined in Settings
# This allows backwards compatibility with old .env files (e.g., Docker)
extra = "ignore"
def _resolve_path(self, path_value: str) -> Path: def _resolve_path(self, path_value: str) -> Path:
""" """
Convert relative paths to backend-rooted absolute paths. Convert relative paths to backend-rooted absolute paths.

View File

@@ -3,6 +3,11 @@ Tool_OCR - FastAPI Application Entry Point (V2)
Main application setup with CORS, routes, and startup/shutdown events Main application setup with CORS, routes, and startup/shutdown events
""" """
import warnings
# Suppress noisy third-party warnings
warnings.filterwarnings("ignore", message=".*ccache.*")
warnings.filterwarnings("ignore", message=".*No ccache found.*")
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
@@ -107,27 +112,55 @@ def setup_signal_handlers():
# Ensure log directory exists before configuring logging # Ensure log directory exists before configuring logging
Path(settings.log_file).parent.mkdir(parents=True, exist_ok=True) Path(settings.log_file).parent.mkdir(parents=True, exist_ok=True)
# Configure logging - Force configuration to override uvicorn's settings # Configure logging - prevent duplicate handlers
logging.basicConfig( def setup_logging():
level=getattr(logging, settings.log_level), """Configure logging with deduplication"""
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", log_level = getattr(logging, settings.log_level, logging.INFO)
handlers=[ log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
logging.FileHandler(settings.log_file),
logging.StreamHandler(),
],
force=True # Force reconfiguration (Python 3.8+)
)
# Also explicitly configure root logger and app loggers # Get root logger
root_logger = logging.getLogger() root_logger = logging.getLogger()
root_logger.setLevel(getattr(logging, settings.log_level))
# Configure app-specific loggers # Clear existing handlers to prevent duplicates
for logger_name in ['app', 'app.services', 'app.services.pdf_generator_service', 'app.services.ocr_service']: root_logger.handlers.clear()
app_logger = logging.getLogger(logger_name)
app_logger.setLevel(getattr(logging, settings.log_level))
app_logger.propagate = True # Ensure logs propagate to root logger
# Set root level
root_logger.setLevel(log_level)
# Create formatter
formatter = logging.Formatter(log_format)
# File handler
file_handler = logging.FileHandler(settings.log_file)
file_handler.setLevel(log_level)
file_handler.setFormatter(formatter)
root_logger.addHandler(file_handler)
# Stream handler (console)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(log_level)
stream_handler.setFormatter(formatter)
root_logger.addHandler(stream_handler)
# Suppress uvicorn's duplicate access logs
logging.getLogger("uvicorn.access").handlers.clear()
logging.getLogger("uvicorn.error").handlers.clear()
# Configure uvicorn loggers to use our handlers
for uvi_logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error"]:
uvi_logger = logging.getLogger(uvi_logger_name)
uvi_logger.handlers.clear()
uvi_logger.propagate = True
uvi_logger.setLevel(log_level)
# Configure app-specific loggers (no separate handlers, just propagate)
for logger_name in ['app', 'app.services', 'app.routers']:
app_logger = logging.getLogger(logger_name)
app_logger.handlers.clear() # Remove any existing handlers
app_logger.setLevel(log_level)
app_logger.propagate = True # Propagate to root logger
setup_logging()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -401,7 +401,7 @@ async def trigger_cleanup(
result = cleanup_service.cleanup_all_users(db, max_files_per_user=files_to_keep) result = cleanup_service.cleanup_all_users(db, max_files_per_user=files_to_keep)
logger.info( logger.info(
f"Manual cleanup triggered by admin {admin_user.username}: " f"Manual cleanup triggered by admin {admin_user.email}: "
f"{result['total_files_deleted']} files, {result['total_bytes_freed']} bytes" f"{result['total_files_deleted']} files, {result['total_bytes_freed']} bytes"
) )

View File

@@ -5,7 +5,10 @@ Background scheduler for periodic file cleanup
import asyncio import asyncio
import logging import logging
import os
import fcntl
from datetime import datetime from datetime import datetime
from pathlib import Path
from typing import Optional from typing import Optional
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
@@ -16,11 +19,50 @@ from app.services.cleanup_service import cleanup_service
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Lock file path for multi-worker coordination
_LOCK_FILE = Path(settings.log_file).parent / ".cleanup_scheduler.lock"
_lock_fd = None
def _try_acquire_lock() -> bool:
"""
Try to acquire exclusive lock for scheduler.
Only one worker should run the scheduler.
Returns True if lock acquired, False otherwise.
"""
global _lock_fd
try:
_LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
_lock_fd = open(_LOCK_FILE, 'w')
fcntl.flock(_lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
_lock_fd.write(str(os.getpid()))
_lock_fd.flush()
return True
except (IOError, OSError):
# Lock is held by another process
if _lock_fd:
_lock_fd.close()
_lock_fd = None
return False
def _release_lock():
"""Release the scheduler lock"""
global _lock_fd
if _lock_fd:
try:
fcntl.flock(_lock_fd.fileno(), fcntl.LOCK_UN)
_lock_fd.close()
except Exception:
pass
_lock_fd = None
class CleanupScheduler: class CleanupScheduler:
""" """
Background scheduler for periodic file cleanup. Background scheduler for periodic file cleanup.
Uses asyncio for non-blocking background execution. Uses asyncio for non-blocking background execution.
Uses file lock to ensure only one instance runs across multiple workers.
""" """
def __init__(self): def __init__(self):
@@ -29,6 +71,7 @@ class CleanupScheduler:
self._last_run: Optional[datetime] = None self._last_run: Optional[datetime] = None
self._next_run: Optional[datetime] = None self._next_run: Optional[datetime] = None
self._last_result: Optional[dict] = None self._last_result: Optional[dict] = None
self._has_lock: bool = False
@property @property
def is_running(self) -> bool: def is_running(self) -> bool:
@@ -55,9 +98,15 @@ class CleanupScheduler:
return return
if self.is_running: if self.is_running:
logger.warning("Cleanup scheduler is already running") logger.debug("Cleanup scheduler is already running")
return return
# Try to acquire lock - only one worker should run the scheduler
if not _try_acquire_lock():
logger.debug("Another worker is running the cleanup scheduler, skipping")
return
self._has_lock = True
self._running = True self._running = True
self._task = asyncio.create_task(self._run_loop()) self._task = asyncio.create_task(self._run_loop())
logger.info( logger.info(
@@ -77,7 +126,11 @@ class CleanupScheduler:
pass pass
self._task = None self._task = None
logger.info("Cleanup scheduler stopped") # Release the lock if we had it
if self._has_lock:
_release_lock()
self._has_lock = False
logger.info("Cleanup scheduler stopped")
async def _run_loop(self): async def _run_loop(self):
"""Main scheduler loop""" """Main scheduler loop"""

View File

@@ -41,9 +41,11 @@ class TaskService:
# Generate unique task ID # Generate unique task ID
task_id = str(uuid.uuid4()) task_id = str(uuid.uuid4())
# Check user's task limit # Check user's task limit (excluding soft-deleted tasks)
if settings.max_tasks_per_user > 0: if settings.max_tasks_per_user > 0:
user_task_count = db.query(Task).filter(Task.user_id == user_id).count() user_task_count = db.query(Task).filter(
and_(Task.user_id == user_id, Task.deleted_at.is_(None))
).count()
if user_task_count >= settings.max_tasks_per_user: if user_task_count >= settings.max_tasks_per_user:
# Auto-delete oldest completed tasks to make room # Auto-delete oldest completed tasks to make room
self._cleanup_old_tasks(db, user_id, limit=10) self._cleanup_old_tasks(db, user_id, limit=10)
@@ -359,7 +361,7 @@ class TaskService:
def get_user_stats(self, db: Session, user_id: int) -> dict: def get_user_stats(self, db: Session, user_id: int) -> dict:
""" """
Get statistics for a user's tasks Get statistics for a user's tasks (excluding soft-deleted tasks)
Args: Args:
db: Database session db: Database session
@@ -368,29 +370,32 @@ class TaskService:
Returns: Returns:
Dictionary with task statistics Dictionary with task statistics
""" """
total = db.query(Task).filter(Task.user_id == user_id).count() # Base filter: user's non-deleted tasks
base_filter = and_(Task.user_id == user_id, Task.deleted_at.is_(None))
total = db.query(Task).filter(base_filter).count()
pending = ( pending = (
db.query(Task) db.query(Task)
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PENDING)) .filter(and_(base_filter, Task.status == TaskStatus.PENDING))
.count() .count()
) )
processing = ( processing = (
db.query(Task) db.query(Task)
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.PROCESSING)) .filter(and_(base_filter, Task.status == TaskStatus.PROCESSING))
.count() .count()
) )
completed = ( completed = (
db.query(Task) db.query(Task)
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.COMPLETED)) .filter(and_(base_filter, Task.status == TaskStatus.COMPLETED))
.count() .count()
) )
failed = ( failed = (
db.query(Task) db.query(Task)
.filter(and_(Task.user_id == user_id, Task.status == TaskStatus.FAILED)) .filter(and_(base_filter, Task.status == TaskStatus.FAILED))
.count() .count()
) )