chore: reinitialize project with vite architecture
This commit is contained in:
289
scripts/deploy.sh
Normal file
289
scripts/deploy.sh
Normal file
@@ -0,0 +1,289 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# MES Dashboard Deployment Script
|
||||
# Usage: ./deploy.sh [--skip-db-check]
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
# ============================================================
|
||||
# Configuration
|
||||
# ============================================================
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
CONDA_ENV="mes-dashboard"
|
||||
PYTHON_VERSION="3.11"
|
||||
REDIS_CONF="/etc/redis/redis.conf"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# ============================================================
|
||||
# Helper Functions
|
||||
# ============================================================
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[OK]${NC} $1"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
log_important() {
|
||||
echo -e "${YELLOW}[IMPORTANT]${NC} $1"
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Deployment Functions
|
||||
# ============================================================
|
||||
|
||||
check_prerequisites() {
|
||||
log_info "Checking prerequisites..."
|
||||
|
||||
# Check conda
|
||||
if ! command -v conda &> /dev/null; then
|
||||
log_error "Conda not found. Please install Miniconda/Anaconda first."
|
||||
log_info "Download from: https://docs.conda.io/en/latest/miniconda.html"
|
||||
exit 1
|
||||
fi
|
||||
log_success "Conda found"
|
||||
|
||||
# Source conda
|
||||
source "$(conda info --base)/etc/profile.d/conda.sh"
|
||||
}
|
||||
|
||||
check_redis() {
|
||||
log_info "Checking Redis installation..."
|
||||
|
||||
# Check if redis-server is installed
|
||||
if ! command -v redis-server &> /dev/null; then
|
||||
log_error "Redis server not found."
|
||||
log_info "Install with: sudo apt install redis-server"
|
||||
exit 1
|
||||
fi
|
||||
log_success "Redis server found"
|
||||
|
||||
# Check if redis-cli is installed
|
||||
if ! command -v redis-cli &> /dev/null; then
|
||||
log_error "Redis CLI not found."
|
||||
exit 1
|
||||
fi
|
||||
log_success "Redis CLI found"
|
||||
|
||||
# Check if Redis service is enabled
|
||||
if systemctl is-enabled redis-server &>/dev/null; then
|
||||
log_success "Redis service is enabled"
|
||||
else
|
||||
log_warn "Redis service is not enabled for auto-start"
|
||||
log_info "Enable with: sudo systemctl enable redis-server"
|
||||
fi
|
||||
|
||||
# Check if Redis is running
|
||||
if systemctl is-active redis-server &>/dev/null; then
|
||||
log_success "Redis service is running"
|
||||
else
|
||||
log_warn "Redis service is not running"
|
||||
log_info "Start with: sudo systemctl start redis-server"
|
||||
fi
|
||||
|
||||
# Test Redis connectivity
|
||||
if redis-cli ping &>/dev/null; then
|
||||
log_success "Redis connectivity OK (PONG received)"
|
||||
else
|
||||
log_warn "Cannot connect to Redis (service may need to be started)"
|
||||
fi
|
||||
}
|
||||
|
||||
setup_conda_env() {
|
||||
log_info "Setting up conda environment..."
|
||||
|
||||
# Check if environment exists
|
||||
if conda env list | grep -q "^${CONDA_ENV} "; then
|
||||
log_success "Environment '${CONDA_ENV}' already exists"
|
||||
else
|
||||
log_info "Creating conda environment '${CONDA_ENV}' with Python ${PYTHON_VERSION}..."
|
||||
conda create -n "$CONDA_ENV" python="$PYTHON_VERSION" -y
|
||||
log_success "Environment '${CONDA_ENV}' created"
|
||||
fi
|
||||
|
||||
# Activate environment
|
||||
conda activate "$CONDA_ENV"
|
||||
log_success "Environment '${CONDA_ENV}' activated"
|
||||
}
|
||||
|
||||
install_dependencies() {
|
||||
log_info "Installing dependencies..."
|
||||
|
||||
if [ -f "${ROOT}/requirements.txt" ]; then
|
||||
pip install -r "${ROOT}/requirements.txt" --quiet
|
||||
log_success "Dependencies installed"
|
||||
else
|
||||
log_error "requirements.txt not found"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
install_frontend() {
|
||||
if [ ! -f "${ROOT}/frontend/package.json" ]; then
|
||||
log_info "No frontend package.json found, skipping Vite setup"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if ! command -v npm &> /dev/null; then
|
||||
log_warn "npm not found. Skip frontend build (Flask fallback mode only)."
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_info "Installing frontend dependencies..."
|
||||
npm --prefix "${ROOT}/frontend" install --no-audit --no-fund
|
||||
|
||||
log_info "Building frontend assets (Vite)..."
|
||||
npm --prefix "${ROOT}/frontend" run build
|
||||
log_success "Frontend assets built"
|
||||
}
|
||||
|
||||
setup_env_file() {
|
||||
log_info "Setting up configuration..."
|
||||
|
||||
if [ -f "${ROOT}/.env" ]; then
|
||||
log_success ".env file already exists"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ ! -f "${ROOT}/.env.example" ]; then
|
||||
log_error ".env.example not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
log_warn ".env file not found"
|
||||
log_info "Copying .env.example to .env"
|
||||
cp "${ROOT}/.env.example" "${ROOT}/.env"
|
||||
|
||||
echo ""
|
||||
log_important "Please edit .env with your database credentials:"
|
||||
echo " nano ${ROOT}/.env"
|
||||
echo ""
|
||||
echo "Required settings:"
|
||||
echo " - DB_USER: Your database username"
|
||||
echo " - DB_PASSWORD: Your database password"
|
||||
echo " - SECRET_KEY: A secure random key for production"
|
||||
echo ""
|
||||
|
||||
read -p "Press Enter after editing .env to continue..."
|
||||
echo ""
|
||||
}
|
||||
|
||||
verify_database() {
|
||||
local skip_db="${1:-false}"
|
||||
|
||||
if [ "$skip_db" = "true" ]; then
|
||||
log_warn "Skipping database verification"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_info "Verifying database connection..."
|
||||
|
||||
# Load .env
|
||||
if [ -f "${ROOT}/.env" ]; then
|
||||
set -a
|
||||
source "${ROOT}/.env"
|
||||
set +a
|
||||
fi
|
||||
|
||||
export PYTHONPATH="${ROOT}/src:${PYTHONPATH:-}"
|
||||
|
||||
if python -c "
|
||||
from sqlalchemy import text
|
||||
from mes_dashboard.core.database import get_engine
|
||||
engine = get_engine()
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text('SELECT 1 FROM DUAL'))
|
||||
" 2>/dev/null; then
|
||||
log_success "Database connection successful"
|
||||
else
|
||||
log_warn "Database connection failed"
|
||||
log_info "You can still proceed, but the application may not work correctly"
|
||||
log_info "Please check your DB_* settings in .env"
|
||||
fi
|
||||
}
|
||||
|
||||
show_next_steps() {
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " Deployment Complete!"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "Start the server:"
|
||||
echo " ./scripts/start_server.sh start"
|
||||
echo ""
|
||||
echo "View logs:"
|
||||
echo " ./scripts/start_server.sh logs follow"
|
||||
echo ""
|
||||
echo "Check status:"
|
||||
echo " ./scripts/start_server.sh status"
|
||||
echo ""
|
||||
echo "Access URL:"
|
||||
local port=$(grep -E "^GUNICORN_BIND=" "${ROOT}/.env" 2>/dev/null | cut -d: -f2 || echo "8080")
|
||||
echo " http://localhost:${port:-8080}"
|
||||
echo ""
|
||||
echo "Optional: install conda+systemd services"
|
||||
echo " sudo mkdir -p /etc/mes-dashboard"
|
||||
echo " sudo cp .env /etc/mes-dashboard/mes-dashboard.env"
|
||||
echo " sudo cp deploy/mes-dashboard.service /etc/systemd/system/"
|
||||
echo " sudo cp deploy/mes-dashboard-watchdog.service /etc/systemd/system/"
|
||||
echo " sudo systemctl daemon-reload"
|
||||
echo " sudo systemctl enable --now mes-dashboard mes-dashboard-watchdog"
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Main
|
||||
# ============================================================
|
||||
main() {
|
||||
local skip_db=false
|
||||
|
||||
# Parse arguments
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--skip-db-check)
|
||||
skip_db=true
|
||||
;;
|
||||
--help|-h)
|
||||
echo "Usage: $0 [--skip-db-check]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --skip-db-check Skip database connection verification"
|
||||
echo " --help, -h Show this help message"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " MES Dashboard Deployment"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
check_prerequisites
|
||||
check_redis
|
||||
setup_conda_env
|
||||
install_dependencies
|
||||
install_frontend
|
||||
setup_env_file
|
||||
verify_database "$skip_db"
|
||||
show_next_steps
|
||||
}
|
||||
|
||||
main "$@"
|
||||
195
scripts/run_stress_tests.py
Normal file
195
scripts/run_stress_tests.py
Normal file
@@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Stress Test Runner for MES Dashboard
|
||||
|
||||
Runs comprehensive stress tests including:
|
||||
- Backend API load tests
|
||||
- Frontend browser stress tests
|
||||
|
||||
Usage:
|
||||
python scripts/run_stress_tests.py [options]
|
||||
|
||||
Options:
|
||||
--backend-only Run only backend API tests
|
||||
--frontend-only Run only frontend Playwright tests
|
||||
--quick Quick test with minimal load (good for CI)
|
||||
--heavy Heavy load test (10x normal)
|
||||
--url URL Target URL (default: http://127.0.0.1:5000)
|
||||
--report FILE Save report to file
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def run_backend_tests(url: str, config: dict) -> dict:
|
||||
"""Run backend API stress tests."""
|
||||
env = os.environ.copy()
|
||||
env['STRESS_TEST_URL'] = url
|
||||
env['STRESS_CONCURRENT_USERS'] = str(config.get('concurrent_users', 10))
|
||||
env['STRESS_REQUESTS_PER_USER'] = str(config.get('requests_per_user', 20))
|
||||
env['STRESS_TIMEOUT'] = str(config.get('timeout', 30))
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Running Backend API Load Tests")
|
||||
print("=" * 60)
|
||||
print(f" URL: {url}")
|
||||
print(f" Concurrent Users: {config.get('concurrent_users', 10)}")
|
||||
print(f" Requests/User: {config.get('requests_per_user', 20)}")
|
||||
print()
|
||||
|
||||
start_time = time.time()
|
||||
result = subprocess.run(
|
||||
['python', '-m', 'pytest', 'tests/stress/test_api_load.py', '-v', '-s', '--tb=short'],
|
||||
env=env,
|
||||
capture_output=False,
|
||||
cwd=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
duration = time.time() - start_time
|
||||
|
||||
return {
|
||||
'name': 'Backend API Load Tests',
|
||||
'passed': result.returncode == 0,
|
||||
'duration': duration,
|
||||
'returncode': result.returncode
|
||||
}
|
||||
|
||||
|
||||
def run_frontend_tests(url: str, config: dict) -> dict:
|
||||
"""Run frontend Playwright stress tests."""
|
||||
env = os.environ.copy()
|
||||
env['STRESS_TEST_URL'] = url
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Running Frontend Playwright Stress Tests")
|
||||
print("=" * 60)
|
||||
print(f" URL: {url}")
|
||||
print()
|
||||
|
||||
start_time = time.time()
|
||||
result = subprocess.run(
|
||||
['python', '-m', 'pytest', 'tests/stress/test_frontend_stress.py', '-v', '-s', '--tb=short'],
|
||||
env=env,
|
||||
capture_output=False,
|
||||
cwd=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
duration = time.time() - start_time
|
||||
|
||||
return {
|
||||
'name': 'Frontend Playwright Stress Tests',
|
||||
'passed': result.returncode == 0,
|
||||
'duration': duration,
|
||||
'returncode': result.returncode
|
||||
}
|
||||
|
||||
|
||||
def generate_report(results: list, url: str, config: dict) -> str:
|
||||
"""Generate a text report of stress test results."""
|
||||
report_lines = [
|
||||
"=" * 60,
|
||||
"MES Dashboard Stress Test Report",
|
||||
"=" * 60,
|
||||
f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
||||
f"Target URL: {url}",
|
||||
f"Configuration: {config}",
|
||||
"",
|
||||
"-" * 60,
|
||||
"Test Results:",
|
||||
"-" * 60,
|
||||
]
|
||||
|
||||
total_duration = 0
|
||||
passed_count = 0
|
||||
|
||||
for result in results:
|
||||
status = "PASSED" if result['passed'] else "FAILED"
|
||||
report_lines.append(f" {result['name']}: {status}")
|
||||
report_lines.append(f" Duration: {result['duration']:.2f}s")
|
||||
total_duration += result['duration']
|
||||
if result['passed']:
|
||||
passed_count += 1
|
||||
|
||||
report_lines.extend([
|
||||
"",
|
||||
"-" * 60,
|
||||
"Summary:",
|
||||
"-" * 60,
|
||||
f" Total Tests: {len(results)}",
|
||||
f" Passed: {passed_count}",
|
||||
f" Failed: {len(results) - passed_count}",
|
||||
f" Total Duration: {total_duration:.2f}s",
|
||||
"=" * 60,
|
||||
])
|
||||
|
||||
return "\n".join(report_lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run MES Dashboard stress tests')
|
||||
parser.add_argument('--backend-only', action='store_true', help='Run only backend tests')
|
||||
parser.add_argument('--frontend-only', action='store_true', help='Run only frontend tests')
|
||||
parser.add_argument('--quick', action='store_true', help='Quick test with minimal load')
|
||||
parser.add_argument('--heavy', action='store_true', help='Heavy load test')
|
||||
parser.add_argument('--url', default='http://127.0.0.1:5000', help='Target URL')
|
||||
parser.add_argument('--report', help='Save report to file')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Configure load levels
|
||||
if args.quick:
|
||||
config = {
|
||||
'concurrent_users': 3,
|
||||
'requests_per_user': 5,
|
||||
'timeout': 30
|
||||
}
|
||||
elif args.heavy:
|
||||
config = {
|
||||
'concurrent_users': 50,
|
||||
'requests_per_user': 50,
|
||||
'timeout': 60
|
||||
}
|
||||
else:
|
||||
config = {
|
||||
'concurrent_users': 10,
|
||||
'requests_per_user': 20,
|
||||
'timeout': 30
|
||||
}
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("MES Dashboard Stress Test Suite")
|
||||
print("=" * 60)
|
||||
print(f"Target: {args.url}")
|
||||
print(f"Mode: {'Quick' if args.quick else 'Heavy' if args.heavy else 'Normal'}")
|
||||
print()
|
||||
|
||||
results = []
|
||||
|
||||
# Run tests based on flags
|
||||
if not args.frontend_only:
|
||||
results.append(run_backend_tests(args.url, config))
|
||||
|
||||
if not args.backend_only:
|
||||
results.append(run_frontend_tests(args.url, config))
|
||||
|
||||
# Generate report
|
||||
report = generate_report(results, args.url, config)
|
||||
print("\n" + report)
|
||||
|
||||
# Save report if requested
|
||||
if args.report:
|
||||
with open(args.report, 'w', encoding='utf-8') as f:
|
||||
f.write(report)
|
||||
print(f"\nReport saved to: {args.report}")
|
||||
|
||||
# Exit with appropriate code
|
||||
all_passed = all(r['passed'] for r in results)
|
||||
sys.exit(0 if all_passed else 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
689
scripts/start_server.sh
Normal file
689
scripts/start_server.sh
Normal file
@@ -0,0 +1,689 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# MES Dashboard Server Management Script
|
||||
# Usage: ./start_server.sh [start|stop|restart|status|logs]
|
||||
#
|
||||
set -uo pipefail
|
||||
|
||||
# ============================================================
|
||||
# Configuration
|
||||
# ============================================================
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
CONDA_ENV="mes-dashboard"
|
||||
APP_NAME="mes-dashboard"
|
||||
PID_FILE_DEFAULT="${ROOT}/tmp/gunicorn.pid"
|
||||
PID_FILE="${WATCHDOG_PID_FILE:-${PID_FILE_DEFAULT}}"
|
||||
LOG_DIR="${ROOT}/logs"
|
||||
ACCESS_LOG="${LOG_DIR}/access.log"
|
||||
ERROR_LOG="${LOG_DIR}/error.log"
|
||||
STARTUP_LOG="${LOG_DIR}/startup.log"
|
||||
DEFAULT_PORT="${GUNICORN_BIND:-0.0.0.0:8080}"
|
||||
PORT=$(echo "$DEFAULT_PORT" | cut -d: -f2)
|
||||
|
||||
# Redis configuration
|
||||
REDIS_ENABLED="${REDIS_ENABLED:-true}"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# ============================================================
|
||||
# Helper Functions
|
||||
# ============================================================
|
||||
log_info() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_success() {
|
||||
echo -e "${GREEN}[OK]${NC} $1"
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
timestamp() {
|
||||
date '+%Y-%m-%d %H:%M:%S'
|
||||
}
|
||||
|
||||
resolve_runtime_paths() {
|
||||
WATCHDOG_RUNTIME_DIR="${WATCHDOG_RUNTIME_DIR:-${ROOT}/tmp}"
|
||||
WATCHDOG_RESTART_FLAG="${WATCHDOG_RESTART_FLAG:-${WATCHDOG_RUNTIME_DIR}/mes_dashboard_restart.flag}"
|
||||
WATCHDOG_PID_FILE="${WATCHDOG_PID_FILE:-${PID_FILE_DEFAULT}}"
|
||||
WATCHDOG_STATE_FILE="${WATCHDOG_STATE_FILE:-${WATCHDOG_RUNTIME_DIR}/mes_dashboard_restart_state.json}"
|
||||
PID_FILE="${WATCHDOG_PID_FILE}"
|
||||
export WATCHDOG_RUNTIME_DIR WATCHDOG_RESTART_FLAG WATCHDOG_PID_FILE WATCHDOG_STATE_FILE
|
||||
}
|
||||
|
||||
# Load .env file if exists
|
||||
load_env() {
|
||||
if [ -f "${ROOT}/.env" ]; then
|
||||
log_info "Loading environment from .env"
|
||||
set -a # Mark all variables for export
|
||||
source "${ROOT}/.env"
|
||||
set +a
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Environment Check Functions
|
||||
# ============================================================
|
||||
check_conda() {
|
||||
if ! command -v conda &> /dev/null; then
|
||||
log_error "Conda not found. Please install Miniconda/Anaconda."
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Source conda
|
||||
source "$(conda info --base)/etc/profile.d/conda.sh"
|
||||
|
||||
# Check if environment exists
|
||||
if ! conda env list | grep -q "^${CONDA_ENV} "; then
|
||||
log_error "Conda environment '${CONDA_ENV}' not found."
|
||||
log_info "Create it with: conda create -n ${CONDA_ENV} python=3.11"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_success "Conda environment '${CONDA_ENV}' found"
|
||||
return 0
|
||||
}
|
||||
|
||||
check_dependencies() {
|
||||
conda activate "$CONDA_ENV"
|
||||
|
||||
local missing=()
|
||||
|
||||
# Check critical packages
|
||||
python -c "import flask" 2>/dev/null || missing+=("flask")
|
||||
python -c "import gunicorn" 2>/dev/null || missing+=("gunicorn")
|
||||
python -c "import pandas" 2>/dev/null || missing+=("pandas")
|
||||
python -c "import oracledb" 2>/dev/null || missing+=("oracledb")
|
||||
|
||||
if [ ${#missing[@]} -gt 0 ]; then
|
||||
log_error "Missing dependencies: ${missing[*]}"
|
||||
log_info "Install with: pip install ${missing[*]}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_success "All dependencies installed"
|
||||
return 0
|
||||
}
|
||||
|
||||
check_env_file() {
|
||||
if [ ! -f "${ROOT}/.env" ]; then
|
||||
if [ -f "${ROOT}/.env.example" ]; then
|
||||
log_warn ".env file not found, but .env.example exists"
|
||||
log_info "Copy and configure: cp .env.example .env"
|
||||
else
|
||||
log_warn ".env file not found (optional but recommended)"
|
||||
fi
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_success ".env file found"
|
||||
return 0
|
||||
}
|
||||
|
||||
check_port() {
|
||||
if lsof -i ":${PORT}" -sTCP:LISTEN &>/dev/null; then
|
||||
local pid=$(lsof -t -i ":${PORT}" -sTCP:LISTEN 2>/dev/null | head -1)
|
||||
log_error "Port ${PORT} is already in use (PID: ${pid})"
|
||||
log_info "Stop the existing process or change GUNICORN_BIND"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_success "Port ${PORT} is available"
|
||||
return 0
|
||||
}
|
||||
|
||||
check_database() {
|
||||
conda activate "$CONDA_ENV"
|
||||
export PYTHONPATH="${ROOT}/src:${PYTHONPATH:-}"
|
||||
|
||||
if python -c "
|
||||
from sqlalchemy import text
|
||||
from mes_dashboard.core.database import get_engine
|
||||
engine = get_engine()
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text('SELECT 1 FROM DUAL'))
|
||||
" 2>/dev/null; then
|
||||
log_success "Database connection OK"
|
||||
return 0
|
||||
else
|
||||
log_warn "Database connection failed (service may still start)"
|
||||
return 0 # Non-fatal, allow startup
|
||||
fi
|
||||
}
|
||||
|
||||
build_frontend_assets() {
|
||||
if [ "${FRONTEND_BUILD_ON_START:-true}" != "true" ]; then
|
||||
log_info "Skip frontend build (FRONTEND_BUILD_ON_START=${FRONTEND_BUILD_ON_START})"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ ! -f "${ROOT}/frontend/package.json" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
if ! command -v npm &> /dev/null; then
|
||||
log_warn "npm not found, skip frontend build"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local required_entries=(
|
||||
"portal.js"
|
||||
"resource-status.js"
|
||||
"resource-history.js"
|
||||
"job-query.js"
|
||||
"excel-query.js"
|
||||
"tables.js"
|
||||
)
|
||||
local needs_build=false
|
||||
local newest_entry=""
|
||||
|
||||
for entry in "${required_entries[@]}"; do
|
||||
local entry_path="${ROOT}/src/mes_dashboard/static/dist/${entry}"
|
||||
if [ ! -f "${entry_path}" ]; then
|
||||
needs_build=true
|
||||
break
|
||||
fi
|
||||
if [ -z "${newest_entry}" ] || [ "${entry_path}" -nt "${newest_entry}" ]; then
|
||||
newest_entry="${entry_path}"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$needs_build" = false ] && find "${ROOT}/frontend/src" -type f -newer "${newest_entry}" | grep -q .; then
|
||||
needs_build=true
|
||||
fi
|
||||
if [ "$needs_build" = false ] && ([ "${ROOT}/frontend/package.json" -nt "${newest_entry}" ] || [ "${ROOT}/frontend/vite.config.js" -nt "${newest_entry}" ]); then
|
||||
needs_build=true
|
||||
fi
|
||||
|
||||
if [ "$needs_build" = false ]; then
|
||||
log_success "Frontend assets are up to date"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_info "Building frontend assets with Vite..."
|
||||
if npm --prefix "${ROOT}/frontend" run build >/dev/null 2>&1; then
|
||||
log_success "Frontend assets built"
|
||||
else
|
||||
log_warn "Frontend build failed; continuing with fallback inline scripts"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Redis Management Functions
|
||||
# ============================================================
|
||||
check_redis() {
|
||||
if [ "$REDIS_ENABLED" != "true" ]; then
|
||||
log_info "Redis is disabled (REDIS_ENABLED=${REDIS_ENABLED})"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if ! command -v redis-cli &> /dev/null; then
|
||||
log_warn "Redis CLI not found (Redis features will be disabled)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if redis-cli ping &>/dev/null; then
|
||||
log_success "Redis connection OK"
|
||||
return 0
|
||||
else
|
||||
log_warn "Redis not responding (will attempt to start)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
start_redis() {
|
||||
if [ "$REDIS_ENABLED" != "true" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
if ! command -v redis-cli &> /dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if Redis is already running
|
||||
if redis-cli ping &>/dev/null; then
|
||||
log_success "Redis is already running"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Try to start Redis via systemctl
|
||||
if command -v systemctl &> /dev/null; then
|
||||
log_info "Starting Redis service..."
|
||||
if sudo systemctl start redis-server 2>/dev/null; then
|
||||
sleep 1
|
||||
if redis-cli ping &>/dev/null; then
|
||||
log_success "Redis service started"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
log_warn "Could not start Redis (fallback mode will be used)"
|
||||
return 0
|
||||
}
|
||||
|
||||
stop_redis() {
|
||||
if [ "$REDIS_ENABLED" != "true" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
if ! command -v redis-cli &> /dev/null; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if Redis is running
|
||||
if ! redis-cli ping &>/dev/null; then
|
||||
log_info "Redis is not running"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Stop Redis via systemctl
|
||||
if command -v systemctl &> /dev/null; then
|
||||
log_info "Stopping Redis service..."
|
||||
if sudo systemctl stop redis-server 2>/dev/null; then
|
||||
log_success "Redis service stopped"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
log_warn "Could not stop Redis service"
|
||||
return 0
|
||||
}
|
||||
|
||||
redis_status() {
|
||||
if [ "$REDIS_ENABLED" != "true" ]; then
|
||||
echo -e " Redis: ${YELLOW}DISABLED${NC}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if ! command -v redis-cli &> /dev/null; then
|
||||
echo -e " Redis: ${YELLOW}NOT INSTALLED${NC}"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if redis-cli ping &>/dev/null; then
|
||||
local info=$(redis-cli info memory 2>/dev/null | grep "used_memory_human" | cut -d: -f2 | tr -d '\r')
|
||||
echo -e " Redis: ${GREEN}RUNNING${NC} (Memory: ${info:-unknown})"
|
||||
else
|
||||
echo -e " Redis: ${RED}STOPPED${NC}"
|
||||
fi
|
||||
}
|
||||
|
||||
run_all_checks() {
|
||||
log_info "Running environment checks..."
|
||||
echo ""
|
||||
|
||||
check_conda || return 1
|
||||
check_dependencies || return 1
|
||||
check_env_file
|
||||
load_env
|
||||
resolve_runtime_paths
|
||||
check_port || return 1
|
||||
check_database
|
||||
check_redis
|
||||
|
||||
echo ""
|
||||
log_success "All checks passed"
|
||||
return 0
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Service Management Functions
|
||||
# ============================================================
|
||||
ensure_dirs() {
|
||||
mkdir -p "${LOG_DIR}"
|
||||
mkdir -p "${LOG_DIR}/archive"
|
||||
mkdir -p "$(dirname "${PID_FILE}")"
|
||||
mkdir -p "${WATCHDOG_RUNTIME_DIR}"
|
||||
}
|
||||
|
||||
rotate_logs() {
|
||||
# Archive existing logs with timestamp before starting new session
|
||||
local ts=$(date '+%Y%m%d_%H%M%S')
|
||||
|
||||
if [ -f "$ACCESS_LOG" ] && [ -s "$ACCESS_LOG" ]; then
|
||||
mv "$ACCESS_LOG" "${LOG_DIR}/archive/access_${ts}.log"
|
||||
log_info "Archived access.log -> archive/access_${ts}.log"
|
||||
fi
|
||||
|
||||
if [ -f "$ERROR_LOG" ] && [ -s "$ERROR_LOG" ]; then
|
||||
mv "$ERROR_LOG" "${LOG_DIR}/archive/error_${ts}.log"
|
||||
log_info "Archived error.log -> archive/error_${ts}.log"
|
||||
fi
|
||||
|
||||
# Clean up old archives (keep last 10)
|
||||
cd "${LOG_DIR}/archive" 2>/dev/null && \
|
||||
ls -t access_*.log 2>/dev/null | tail -n +11 | xargs -r rm -f && \
|
||||
ls -t error_*.log 2>/dev/null | tail -n +11 | xargs -r rm -f
|
||||
cd "$ROOT"
|
||||
|
||||
# Create fresh log files
|
||||
touch "$ACCESS_LOG" "$ERROR_LOG"
|
||||
}
|
||||
|
||||
get_pid() {
|
||||
if [ -f "$PID_FILE" ]; then
|
||||
local pid=$(cat "$PID_FILE" 2>/dev/null)
|
||||
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
|
||||
echo "$pid"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fallback: find by port
|
||||
local pid=$(lsof -t -i ":${PORT}" -sTCP:LISTEN 2>/dev/null | head -1)
|
||||
if [ -n "$pid" ]; then
|
||||
echo "$pid"
|
||||
return 0
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
is_running() {
|
||||
get_pid &>/dev/null
|
||||
}
|
||||
|
||||
do_start() {
|
||||
local foreground=false
|
||||
|
||||
if [ "${1:-}" = "-f" ] || [ "${1:-}" = "--foreground" ]; then
|
||||
foreground=true
|
||||
fi
|
||||
|
||||
load_env
|
||||
resolve_runtime_paths
|
||||
|
||||
if is_running; then
|
||||
local pid=$(get_pid)
|
||||
log_warn "Server is already running (PID: ${pid})"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Run checks
|
||||
run_all_checks || return 1
|
||||
|
||||
echo ""
|
||||
|
||||
# Start Redis if enabled
|
||||
start_redis
|
||||
|
||||
log_info "Starting ${APP_NAME} server..."
|
||||
|
||||
ensure_dirs
|
||||
rotate_logs # Archive old logs before starting new session
|
||||
conda activate "$CONDA_ENV"
|
||||
load_env # Load environment variables from .env file
|
||||
resolve_runtime_paths
|
||||
# Re-evaluate port after loading .env (GUNICORN_BIND may have changed)
|
||||
PORT=$(echo "${GUNICORN_BIND:-0.0.0.0:8080}" | cut -d: -f2)
|
||||
export PYTHONPATH="${ROOT}/src:${PYTHONPATH:-}"
|
||||
cd "$ROOT"
|
||||
build_frontend_assets
|
||||
|
||||
# Log startup
|
||||
echo "[$(timestamp)] Starting server" >> "$STARTUP_LOG"
|
||||
|
||||
if [ "$foreground" = true ]; then
|
||||
log_info "Running in foreground mode (Ctrl+C to stop)"
|
||||
exec gunicorn \
|
||||
--config gunicorn.conf.py \
|
||||
--pid "$PID_FILE" \
|
||||
--access-logfile "$ACCESS_LOG" \
|
||||
--error-logfile "$ERROR_LOG" \
|
||||
--capture-output \
|
||||
"mes_dashboard:create_app()"
|
||||
else
|
||||
gunicorn \
|
||||
--config gunicorn.conf.py \
|
||||
--pid "$PID_FILE" \
|
||||
--access-logfile "$ACCESS_LOG" \
|
||||
--error-logfile "$ERROR_LOG" \
|
||||
--capture-output \
|
||||
--daemon \
|
||||
"mes_dashboard:create_app()"
|
||||
|
||||
sleep 1
|
||||
|
||||
if is_running; then
|
||||
local pid=$(get_pid)
|
||||
log_success "Server started successfully (PID: ${pid})"
|
||||
log_info "Access URL: http://localhost:${PORT}"
|
||||
log_info "Logs: ${LOG_DIR}/"
|
||||
echo "[$(timestamp)] Server started (PID: ${pid})" >> "$STARTUP_LOG"
|
||||
else
|
||||
log_error "Failed to start server"
|
||||
log_info "Check error log: ${ERROR_LOG}"
|
||||
echo "[$(timestamp)] Server start failed" >> "$STARTUP_LOG"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
do_stop() {
|
||||
load_env
|
||||
resolve_runtime_paths
|
||||
|
||||
if ! is_running; then
|
||||
log_warn "Server is not running"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local pid=$(get_pid)
|
||||
log_info "Stopping server (PID: ${pid})..."
|
||||
|
||||
# Find all gunicorn processes (master + workers)
|
||||
local all_pids=$(pgrep -f "gunicorn.*mes_dashboard" 2>/dev/null | tr '\n' ' ')
|
||||
|
||||
# Graceful shutdown with SIGTERM
|
||||
kill -TERM "$pid" 2>/dev/null
|
||||
|
||||
# Wait for graceful shutdown (max 10 seconds)
|
||||
local count=0
|
||||
while kill -0 "$pid" 2>/dev/null && [ $count -lt 10 ]; do
|
||||
sleep 1
|
||||
count=$((count + 1))
|
||||
echo -n "."
|
||||
done
|
||||
echo ""
|
||||
|
||||
# Force kill if still running (including orphaned workers)
|
||||
if kill -0 "$pid" 2>/dev/null || [ -n "$(pgrep -f 'gunicorn.*mes_dashboard' 2>/dev/null)" ]; then
|
||||
log_warn "Graceful shutdown timeout, forcing..."
|
||||
# Kill all gunicorn processes related to mes_dashboard
|
||||
pkill -9 -f "gunicorn.*mes_dashboard" 2>/dev/null
|
||||
sleep 1
|
||||
fi
|
||||
|
||||
# Cleanup PID file
|
||||
rm -f "$PID_FILE"
|
||||
|
||||
# Verify all processes are stopped
|
||||
if [ -z "$(pgrep -f 'gunicorn.*mes_dashboard' 2>/dev/null)" ]; then
|
||||
log_success "Server stopped"
|
||||
echo "[$(timestamp)] Server stopped (PID: ${pid})" >> "$STARTUP_LOG"
|
||||
else
|
||||
log_error "Failed to stop server"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
do_restart() {
|
||||
log_info "Restarting ${APP_NAME} server..."
|
||||
do_stop
|
||||
sleep 1
|
||||
do_start "$@"
|
||||
}
|
||||
|
||||
do_status() {
|
||||
# Load environment to get REDIS_ENABLED
|
||||
load_env
|
||||
resolve_runtime_paths
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " ${APP_NAME} Server Status"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
if is_running; then
|
||||
local pid=$(get_pid)
|
||||
echo -e " Server: ${GREEN}RUNNING${NC}"
|
||||
echo " PID: ${pid}"
|
||||
echo " Port: ${PORT}"
|
||||
echo " URL: http://localhost:${PORT}"
|
||||
echo " PIDFile: ${PID_FILE}"
|
||||
echo " Watchdog Runtime: ${WATCHDOG_RUNTIME_DIR}"
|
||||
else
|
||||
echo -e " Server: ${RED}STOPPED${NC}"
|
||||
fi
|
||||
|
||||
# Show Redis status
|
||||
redis_status
|
||||
|
||||
if is_running; then
|
||||
echo ""
|
||||
|
||||
# Show process info
|
||||
local pid=$(get_pid)
|
||||
if command -v ps &>/dev/null; then
|
||||
echo " Process Info:"
|
||||
ps -p "$pid" -o pid,ppid,%cpu,%mem,etime,cmd --no-headers 2>/dev/null | \
|
||||
awk '{printf " PID: %s | CPU: %s%% | MEM: %s%% | Uptime: %s\n", $1, $3, $4, $5}'
|
||||
fi
|
||||
|
||||
# Show recent log entries
|
||||
if [ -f "$ERROR_LOG" ]; then
|
||||
echo ""
|
||||
echo " Recent Errors (last 3):"
|
||||
tail -3 "$ERROR_LOG" 2>/dev/null | sed 's/^/ /'
|
||||
fi
|
||||
else
|
||||
echo ""
|
||||
echo " Start with: $0 start"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
}
|
||||
|
||||
do_logs() {
|
||||
local log_type="${1:-all}"
|
||||
local lines="${2:-50}"
|
||||
|
||||
case "$log_type" in
|
||||
access)
|
||||
if [ -f "$ACCESS_LOG" ]; then
|
||||
log_info "Access log (last ${lines} lines):"
|
||||
tail -n "$lines" "$ACCESS_LOG"
|
||||
else
|
||||
log_warn "Access log not found"
|
||||
fi
|
||||
;;
|
||||
error)
|
||||
if [ -f "$ERROR_LOG" ]; then
|
||||
log_info "Error log (last ${lines} lines):"
|
||||
tail -n "$lines" "$ERROR_LOG"
|
||||
else
|
||||
log_warn "Error log not found"
|
||||
fi
|
||||
;;
|
||||
follow)
|
||||
log_info "Following logs (Ctrl+C to stop)..."
|
||||
tail -f "$ACCESS_LOG" "$ERROR_LOG" 2>/dev/null
|
||||
;;
|
||||
*)
|
||||
log_info "=== Error Log (last 20 lines) ==="
|
||||
tail -20 "$ERROR_LOG" 2>/dev/null || echo "(empty)"
|
||||
echo ""
|
||||
log_info "=== Access Log (last 20 lines) ==="
|
||||
tail -20 "$ACCESS_LOG" 2>/dev/null || echo "(empty)"
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
do_check() {
|
||||
run_all_checks
|
||||
}
|
||||
|
||||
show_help() {
|
||||
echo ""
|
||||
echo "Usage: $0 <command> [options]"
|
||||
echo ""
|
||||
echo "Commands:"
|
||||
echo " start [-f] Start the server (-f for foreground mode)"
|
||||
echo " stop Stop the server gracefully"
|
||||
echo " restart Restart the server"
|
||||
echo " status Show server and Redis status"
|
||||
echo " logs [type] View logs (access|error|follow|all)"
|
||||
echo " check Run environment checks only"
|
||||
echo " help Show this help message"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 start # Start in background (with Redis)"
|
||||
echo " $0 start -f # Start in foreground"
|
||||
echo " $0 logs follow # Follow logs in real-time"
|
||||
echo " $0 logs error 100 # Show last 100 error log lines"
|
||||
echo ""
|
||||
echo "Environment Variables:"
|
||||
echo " GUNICORN_BIND Bind address (default: 0.0.0.0:8080)"
|
||||
echo " GUNICORN_WORKERS Number of workers (default: 1)"
|
||||
echo " GUNICORN_THREADS Threads per worker (default: 4)"
|
||||
echo " REDIS_ENABLED Enable Redis cache (default: true)"
|
||||
echo " REDIS_URL Redis connection URL"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Main
|
||||
# ============================================================
|
||||
main() {
|
||||
local command="${1:-}"
|
||||
shift || true
|
||||
|
||||
case "$command" in
|
||||
start)
|
||||
do_start "$@"
|
||||
;;
|
||||
stop)
|
||||
do_stop
|
||||
;;
|
||||
restart)
|
||||
do_restart "$@"
|
||||
;;
|
||||
status)
|
||||
do_status
|
||||
;;
|
||||
logs)
|
||||
do_logs "$@"
|
||||
;;
|
||||
check)
|
||||
do_check
|
||||
;;
|
||||
help|--help|-h)
|
||||
show_help
|
||||
;;
|
||||
"")
|
||||
# Default: start in foreground for backward compatibility
|
||||
do_start
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown command: ${command}"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
302
scripts/worker_watchdog.py
Normal file
302
scripts/worker_watchdog.py
Normal file
@@ -0,0 +1,302 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Worker watchdog for MES Dashboard.
|
||||
|
||||
Monitors a restart flag file and signals Gunicorn master to gracefully
|
||||
reload workers when the flag is detected.
|
||||
|
||||
Usage:
|
||||
python scripts/worker_watchdog.py
|
||||
|
||||
The watchdog:
|
||||
- Checks for /tmp/mes_dashboard_restart.flag every 5 seconds
|
||||
- Sends SIGHUP to Gunicorn master process when flag is detected
|
||||
- Removes the flag file after signaling
|
||||
- Logs all restart events
|
||||
|
||||
Configuration via environment variables:
|
||||
- WATCHDOG_CHECK_INTERVAL: Check interval in seconds (default: 5)
|
||||
- WATCHDOG_RESTART_FLAG: Path to restart flag file
|
||||
- WATCHDOG_PID_FILE: Path to Gunicorn PID file
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout),
|
||||
]
|
||||
)
|
||||
logger = logging.getLogger('mes_dashboard.watchdog')
|
||||
|
||||
# ============================================================
|
||||
# Configuration
|
||||
# ============================================================
|
||||
|
||||
CHECK_INTERVAL = int(os.getenv('WATCHDOG_CHECK_INTERVAL', '5'))
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.getenv(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
DEFAULT_RUNTIME_DIR = Path(
|
||||
os.getenv('WATCHDOG_RUNTIME_DIR', str(PROJECT_ROOT / 'tmp'))
|
||||
)
|
||||
RESTART_FLAG_PATH = os.getenv(
|
||||
'WATCHDOG_RESTART_FLAG',
|
||||
str(DEFAULT_RUNTIME_DIR / 'mes_dashboard_restart.flag')
|
||||
)
|
||||
GUNICORN_PID_FILE = os.getenv(
|
||||
'WATCHDOG_PID_FILE',
|
||||
str(DEFAULT_RUNTIME_DIR / 'gunicorn.pid')
|
||||
)
|
||||
RESTART_STATE_FILE = os.getenv(
|
||||
'WATCHDOG_STATE_FILE',
|
||||
str(DEFAULT_RUNTIME_DIR / 'mes_dashboard_restart_state.json')
|
||||
)
|
||||
RESTART_HISTORY_MAX = _env_int('WATCHDOG_RESTART_HISTORY_MAX', 50)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Watchdog Implementation
|
||||
# ============================================================
|
||||
|
||||
def get_gunicorn_pid() -> int | None:
|
||||
"""Get Gunicorn master PID from PID file.
|
||||
|
||||
Returns:
|
||||
PID of Gunicorn master process, or None if not found.
|
||||
"""
|
||||
pid_path = Path(GUNICORN_PID_FILE)
|
||||
|
||||
if not pid_path.exists():
|
||||
logger.warning(f"PID file not found: {GUNICORN_PID_FILE}")
|
||||
return None
|
||||
|
||||
try:
|
||||
pid = int(pid_path.read_text().strip())
|
||||
# Verify process exists
|
||||
os.kill(pid, 0)
|
||||
return pid
|
||||
except (ValueError, ProcessLookupError, PermissionError) as e:
|
||||
logger.warning(f"Invalid or stale PID file: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def read_restart_flag() -> dict | None:
|
||||
"""Read and parse the restart flag file.
|
||||
|
||||
Returns:
|
||||
Dictionary with restart metadata, or None if no flag exists.
|
||||
"""
|
||||
flag_path = Path(RESTART_FLAG_PATH)
|
||||
|
||||
if not flag_path.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
content = flag_path.read_text().strip()
|
||||
if content:
|
||||
return json.loads(content)
|
||||
return {"timestamp": datetime.now().isoformat()}
|
||||
except (json.JSONDecodeError, IOError) as e:
|
||||
logger.warning(f"Error reading restart flag: {e}")
|
||||
return {"timestamp": datetime.now().isoformat(), "error": str(e)}
|
||||
|
||||
|
||||
def remove_restart_flag() -> bool:
|
||||
"""Remove the restart flag file.
|
||||
|
||||
Returns:
|
||||
True if file was removed, False otherwise.
|
||||
"""
|
||||
flag_path = Path(RESTART_FLAG_PATH)
|
||||
|
||||
try:
|
||||
if flag_path.exists():
|
||||
flag_path.unlink()
|
||||
return True
|
||||
return False
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to remove restart flag: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def load_restart_state() -> dict:
|
||||
"""Load persisted restart state from disk."""
|
||||
state_path = Path(RESTART_STATE_FILE)
|
||||
if not state_path.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(state_path.read_text())
|
||||
except (json.JSONDecodeError, IOError):
|
||||
return {}
|
||||
|
||||
|
||||
def save_restart_state(
|
||||
requested_by: str | None = None,
|
||||
requested_at: str | None = None,
|
||||
requested_ip: str | None = None,
|
||||
completed_at: str | None = None,
|
||||
success: bool = True
|
||||
) -> None:
|
||||
"""Save restart state for status queries.
|
||||
|
||||
Args:
|
||||
requested_by: Username who requested the restart.
|
||||
requested_at: ISO timestamp when restart was requested.
|
||||
requested_ip: IP address of requester.
|
||||
completed_at: ISO timestamp when restart was completed.
|
||||
success: Whether the restart was successful.
|
||||
"""
|
||||
state_path = Path(RESTART_STATE_FILE)
|
||||
|
||||
entry = {
|
||||
"requested_by": requested_by,
|
||||
"requested_at": requested_at,
|
||||
"requested_ip": requested_ip,
|
||||
"completed_at": completed_at,
|
||||
"success": success
|
||||
}
|
||||
current_state = load_restart_state()
|
||||
history = current_state.get("history", [])
|
||||
if not isinstance(history, list):
|
||||
history = []
|
||||
history.append(entry)
|
||||
if len(history) > RESTART_HISTORY_MAX:
|
||||
history = history[-RESTART_HISTORY_MAX:]
|
||||
|
||||
state = {
|
||||
"last_restart": entry,
|
||||
"history": history,
|
||||
"history_limit": RESTART_HISTORY_MAX,
|
||||
}
|
||||
|
||||
try:
|
||||
state_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
state_path.write_text(json.dumps(state, indent=2))
|
||||
except IOError as e:
|
||||
logger.error(f"Failed to save restart state: {e}")
|
||||
|
||||
|
||||
def send_reload_signal(pid: int) -> bool:
|
||||
"""Send SIGHUP to Gunicorn master to reload workers.
|
||||
|
||||
Args:
|
||||
pid: PID of Gunicorn master process.
|
||||
|
||||
Returns:
|
||||
True if signal was sent successfully, False otherwise.
|
||||
"""
|
||||
try:
|
||||
os.kill(pid, signal.SIGHUP)
|
||||
logger.info(f"Sent SIGHUP to Gunicorn master (PID: {pid})")
|
||||
return True
|
||||
except ProcessLookupError:
|
||||
logger.error(f"Process {pid} not found")
|
||||
return False
|
||||
except PermissionError:
|
||||
logger.error(f"Permission denied sending signal to PID {pid}")
|
||||
return False
|
||||
|
||||
|
||||
def process_restart_request() -> bool:
|
||||
"""Process a restart request if flag file exists.
|
||||
|
||||
Returns:
|
||||
True if restart was processed, False if no restart needed.
|
||||
"""
|
||||
flag_data = read_restart_flag()
|
||||
|
||||
if flag_data is None:
|
||||
return False
|
||||
|
||||
logger.info(f"Restart flag detected: {flag_data}")
|
||||
|
||||
# Get Gunicorn master PID
|
||||
pid = get_gunicorn_pid()
|
||||
|
||||
if pid is None:
|
||||
logger.error("Cannot restart: Gunicorn master PID not found")
|
||||
# Still remove flag to prevent infinite loop
|
||||
remove_restart_flag()
|
||||
save_restart_state(
|
||||
requested_by=flag_data.get("user"),
|
||||
requested_at=flag_data.get("timestamp"),
|
||||
requested_ip=flag_data.get("ip"),
|
||||
completed_at=datetime.now().isoformat(),
|
||||
success=False
|
||||
)
|
||||
return True
|
||||
|
||||
# Send reload signal
|
||||
success = send_reload_signal(pid)
|
||||
|
||||
# Remove flag file
|
||||
remove_restart_flag()
|
||||
|
||||
# Save state
|
||||
save_restart_state(
|
||||
requested_by=flag_data.get("user"),
|
||||
requested_at=flag_data.get("timestamp"),
|
||||
requested_ip=flag_data.get("ip"),
|
||||
completed_at=datetime.now().isoformat(),
|
||||
success=success
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info(
|
||||
f"Worker restart completed - "
|
||||
f"Requested by: {flag_data.get('user', 'unknown')}, "
|
||||
f"IP: {flag_data.get('ip', 'unknown')}"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def run_watchdog() -> None:
|
||||
"""Main watchdog loop."""
|
||||
logger.info(
|
||||
f"Worker watchdog started - "
|
||||
f"Check interval: {CHECK_INTERVAL}s, "
|
||||
f"Flag path: {RESTART_FLAG_PATH}, "
|
||||
f"PID file: {GUNICORN_PID_FILE}"
|
||||
)
|
||||
|
||||
while True:
|
||||
try:
|
||||
process_restart_request()
|
||||
except Exception as e:
|
||||
logger.exception(f"Error in watchdog loop: {e}")
|
||||
|
||||
time.sleep(CHECK_INTERVAL)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Entry point for watchdog script."""
|
||||
try:
|
||||
run_watchdog()
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Watchdog stopped by user")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user