feat: Phase 2 連線池化與優雅關閉改進

- NullPool 改為 QueuePool 連線池
  - pool_size=5, max_overflow=10, pool_recycle=1800
  - pool_pre_ping 確保連線有效性
- 新增連線池監控事件 (checkout/checkin/invalidate/connect)
- 新增 keep-alive 機制 (每 5 分鐘 ping)
- 新增 dispose_engine() 清理函數
- Gunicorn worker_exit hook 自動清理連線
- 降低 graceful_timeout 至 10 秒加速重啟
- 降低 tcp_connect_timeout 至 10 秒減少等待

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
beabigegg
2026-01-27 18:34:31 +08:00
parent 99eb77b149
commit fcfa4fb919
2 changed files with 115 additions and 22 deletions

View File

@@ -7,5 +7,18 @@ worker_class = "gthread"
# Timeout settings - critical for dashboard stability
timeout = 60 # Worker timeout: 60 seconds max per request
graceful_timeout = 30 # Graceful shutdown timeout
graceful_timeout = 10 # Graceful shutdown timeout (reduced for faster restart)
keepalive = 5 # Keep-alive connections timeout
# ============================================================
# Worker Lifecycle Hooks
# ============================================================
def worker_exit(server, worker):
"""Clean up database connections when worker exits."""
try:
from mes_dashboard.core.database import dispose_engine
dispose_engine()
except Exception as e:
server.log.warning(f"Error disposing database engine: {e}")

View File

@@ -5,14 +5,15 @@ from __future__ import annotations
import logging
import re
import threading
import time
from typing import Optional, Dict, Any
import oracledb
import pandas as pd
from flask import g, current_app
from sqlalchemy import create_engine, text
from sqlalchemy.pool import NullPool
from sqlalchemy import create_engine, event, text
from sqlalchemy.pool import QueuePool
from mes_dashboard.config.database import DB_CONFIG, CONNECTION_STRING
from mes_dashboard.config.settings import DevelopmentConfig
@@ -21,37 +22,73 @@ from mes_dashboard.config.settings import DevelopmentConfig
logger = logging.getLogger('mes_dashboard.database')
# ============================================================
# SQLAlchemy Engine (NullPool - no connection pooling)
# SQLAlchemy Engine (QueuePool - connection pooling)
# ============================================================
# Using NullPool for dashboard applications that need long-term stability.
# Each query creates a new connection and closes it immediately after use.
# This avoids issues with idle connections being dropped by firewalls/NAT.
# Using QueuePool for better performance and connection reuse.
# pool_pre_ping ensures connections are valid before use.
# pool_recycle prevents stale connections from firewalls/NAT.
_ENGINE = None
def get_engine():
"""Get SQLAlchemy engine without connection pooling.
"""Get SQLAlchemy engine with connection pooling.
Uses NullPool to create fresh connections for each request.
This is more reliable for long-running dashboard applications
where idle connections may be dropped by network infrastructure.
Uses QueuePool for connection reuse and better performance.
- pool_size: Base number of persistent connections
- max_overflow: Additional connections during peak load
- pool_timeout: Max wait time for available connection
- pool_recycle: Recycle connections after 30 minutes
- pool_pre_ping: Validate connection before checkout
"""
global _ENGINE
if _ENGINE is None:
_ENGINE = create_engine(
CONNECTION_STRING,
poolclass=NullPool, # No connection pooling - fresh connection each time
poolclass=QueuePool,
pool_size=5, # Base connections
max_overflow=10, # Peak extra connections (total max: 15)
pool_timeout=30, # Wait up to 30s for connection
pool_recycle=1800, # Recycle connections every 30 minutes
pool_pre_ping=True, # Validate connection before use
connect_args={
"tcp_connect_timeout": 15, # TCP connect timeout 15s
"retry_count": 2, # Retry twice on connection failure
"tcp_connect_timeout": 10, # TCP connect timeout 10s (reduced)
"retry_count": 1, # Retry once on connection failure
"retry_delay": 1, # 1s delay between retries
}
)
logger.info("Database engine created with NullPool")
# Register pool event listeners for monitoring
_register_pool_events(_ENGINE)
logger.info(
"Database engine created with QueuePool "
f"(pool_size=5, max_overflow=10, pool_recycle=1800)"
)
return _ENGINE
def _register_pool_events(engine):
"""Register event listeners for connection pool monitoring."""
@event.listens_for(engine, "checkout")
def on_checkout(dbapi_conn, connection_record, connection_proxy):
logger.debug("Connection checked out from pool")
@event.listens_for(engine, "checkin")
def on_checkin(dbapi_conn, connection_record):
logger.debug("Connection returned to pool")
@event.listens_for(engine, "invalidate")
def on_invalidate(dbapi_conn, connection_record, exception):
if exception:
logger.warning(f"Connection invalidated due to: {exception}")
else:
logger.debug("Connection invalidated (soft)")
@event.listens_for(engine, "connect")
def on_connect(dbapi_conn, connection_record):
logger.info("New database connection established")
# ============================================================
# Request-scoped Connection
# ============================================================
@@ -77,19 +114,62 @@ def init_db(app) -> None:
# ============================================================
# Keep-Alive (No-op with NullPool)
# Keep-Alive for Connection Pool
# ============================================================
# Keep-alive is not needed with NullPool since each query creates
# a fresh connection. These functions are kept for API compatibility.
# Periodic keep-alive prevents idle connections from being dropped
# by firewalls/NAT. Runs every 5 minutes in a background thread.
_KEEPALIVE_THREAD = None
_KEEPALIVE_STOP = threading.Event()
KEEPALIVE_INTERVAL = 300 # 5 minutes
def _keepalive_worker():
"""Background worker that pings the database periodically."""
while not _KEEPALIVE_STOP.wait(KEEPALIVE_INTERVAL):
try:
engine = get_engine()
with engine.connect() as conn:
conn.execute(text("SELECT 1 FROM DUAL"))
logger.debug("Keep-alive ping successful")
except Exception as exc:
logger.warning(f"Keep-alive ping failed: {exc}")
def start_keepalive():
"""No-op: Keep-alive not needed with NullPool."""
logger.debug("Using NullPool - no keep-alive needed")
"""Start background keep-alive thread for connection pool."""
global _KEEPALIVE_THREAD
if _KEEPALIVE_THREAD is None or not _KEEPALIVE_THREAD.is_alive():
_KEEPALIVE_STOP.clear()
_KEEPALIVE_THREAD = threading.Thread(
target=_keepalive_worker,
daemon=True,
name="db-keepalive"
)
_KEEPALIVE_THREAD.start()
logger.info(f"Keep-alive thread started (interval: {KEEPALIVE_INTERVAL}s)")
def stop_keepalive():
"""No-op: Keep-alive not needed with NullPool."""
pass
"""Stop the keep-alive background thread."""
global _KEEPALIVE_THREAD
if _KEEPALIVE_THREAD and _KEEPALIVE_THREAD.is_alive():
_KEEPALIVE_STOP.set()
_KEEPALIVE_THREAD.join(timeout=5)
logger.info("Keep-alive thread stopped")
def dispose_engine():
"""Dispose the database engine and all pooled connections.
Call this during application shutdown to cleanly release resources.
"""
global _ENGINE
stop_keepalive()
if _ENGINE is not None:
_ENGINE.dispose()
logger.info("Database engine disposed, all connections closed")
_ENGINE = None
# ============================================================