chore: reinitialize project with vite architecture

This commit is contained in:
beabigegg
2026-02-08 08:30:48 +08:00
commit b56e80381b
264 changed files with 75752 additions and 0 deletions

289
scripts/deploy.sh Normal file
View File

@@ -0,0 +1,289 @@
#!/usr/bin/env bash
#
# MES Dashboard Deployment Script
# Usage: ./deploy.sh [--skip-db-check]
#
set -euo pipefail
# ============================================================
# Configuration
# ============================================================
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
CONDA_ENV="mes-dashboard"
PYTHON_VERSION="3.11"
REDIS_CONF="/etc/redis/redis.conf"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# ============================================================
# Helper Functions
# ============================================================
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[OK]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
log_important() {
echo -e "${YELLOW}[IMPORTANT]${NC} $1"
}
# ============================================================
# Deployment Functions
# ============================================================
check_prerequisites() {
log_info "Checking prerequisites..."
# Check conda
if ! command -v conda &> /dev/null; then
log_error "Conda not found. Please install Miniconda/Anaconda first."
log_info "Download from: https://docs.conda.io/en/latest/miniconda.html"
exit 1
fi
log_success "Conda found"
# Source conda
source "$(conda info --base)/etc/profile.d/conda.sh"
}
check_redis() {
log_info "Checking Redis installation..."
# Check if redis-server is installed
if ! command -v redis-server &> /dev/null; then
log_error "Redis server not found."
log_info "Install with: sudo apt install redis-server"
exit 1
fi
log_success "Redis server found"
# Check if redis-cli is installed
if ! command -v redis-cli &> /dev/null; then
log_error "Redis CLI not found."
exit 1
fi
log_success "Redis CLI found"
# Check if Redis service is enabled
if systemctl is-enabled redis-server &>/dev/null; then
log_success "Redis service is enabled"
else
log_warn "Redis service is not enabled for auto-start"
log_info "Enable with: sudo systemctl enable redis-server"
fi
# Check if Redis is running
if systemctl is-active redis-server &>/dev/null; then
log_success "Redis service is running"
else
log_warn "Redis service is not running"
log_info "Start with: sudo systemctl start redis-server"
fi
# Test Redis connectivity
if redis-cli ping &>/dev/null; then
log_success "Redis connectivity OK (PONG received)"
else
log_warn "Cannot connect to Redis (service may need to be started)"
fi
}
setup_conda_env() {
log_info "Setting up conda environment..."
# Check if environment exists
if conda env list | grep -q "^${CONDA_ENV} "; then
log_success "Environment '${CONDA_ENV}' already exists"
else
log_info "Creating conda environment '${CONDA_ENV}' with Python ${PYTHON_VERSION}..."
conda create -n "$CONDA_ENV" python="$PYTHON_VERSION" -y
log_success "Environment '${CONDA_ENV}' created"
fi
# Activate environment
conda activate "$CONDA_ENV"
log_success "Environment '${CONDA_ENV}' activated"
}
install_dependencies() {
log_info "Installing dependencies..."
if [ -f "${ROOT}/requirements.txt" ]; then
pip install -r "${ROOT}/requirements.txt" --quiet
log_success "Dependencies installed"
else
log_error "requirements.txt not found"
exit 1
fi
}
install_frontend() {
if [ ! -f "${ROOT}/frontend/package.json" ]; then
log_info "No frontend package.json found, skipping Vite setup"
return 0
fi
if ! command -v npm &> /dev/null; then
log_warn "npm not found. Skip frontend build (Flask fallback mode only)."
return 0
fi
log_info "Installing frontend dependencies..."
npm --prefix "${ROOT}/frontend" install --no-audit --no-fund
log_info "Building frontend assets (Vite)..."
npm --prefix "${ROOT}/frontend" run build
log_success "Frontend assets built"
}
setup_env_file() {
log_info "Setting up configuration..."
if [ -f "${ROOT}/.env" ]; then
log_success ".env file already exists"
return 0
fi
if [ ! -f "${ROOT}/.env.example" ]; then
log_error ".env.example not found"
exit 1
fi
log_warn ".env file not found"
log_info "Copying .env.example to .env"
cp "${ROOT}/.env.example" "${ROOT}/.env"
echo ""
log_important "Please edit .env with your database credentials:"
echo " nano ${ROOT}/.env"
echo ""
echo "Required settings:"
echo " - DB_USER: Your database username"
echo " - DB_PASSWORD: Your database password"
echo " - SECRET_KEY: A secure random key for production"
echo ""
read -p "Press Enter after editing .env to continue..."
echo ""
}
verify_database() {
local skip_db="${1:-false}"
if [ "$skip_db" = "true" ]; then
log_warn "Skipping database verification"
return 0
fi
log_info "Verifying database connection..."
# Load .env
if [ -f "${ROOT}/.env" ]; then
set -a
source "${ROOT}/.env"
set +a
fi
export PYTHONPATH="${ROOT}/src:${PYTHONPATH:-}"
if python -c "
from sqlalchemy import text
from mes_dashboard.core.database import get_engine
engine = get_engine()
with engine.connect() as conn:
conn.execute(text('SELECT 1 FROM DUAL'))
" 2>/dev/null; then
log_success "Database connection successful"
else
log_warn "Database connection failed"
log_info "You can still proceed, but the application may not work correctly"
log_info "Please check your DB_* settings in .env"
fi
}
show_next_steps() {
echo ""
echo "=========================================="
echo " Deployment Complete!"
echo "=========================================="
echo ""
echo "Start the server:"
echo " ./scripts/start_server.sh start"
echo ""
echo "View logs:"
echo " ./scripts/start_server.sh logs follow"
echo ""
echo "Check status:"
echo " ./scripts/start_server.sh status"
echo ""
echo "Access URL:"
local port=$(grep -E "^GUNICORN_BIND=" "${ROOT}/.env" 2>/dev/null | cut -d: -f2 || echo "8080")
echo " http://localhost:${port:-8080}"
echo ""
echo "Optional: install conda+systemd services"
echo " sudo mkdir -p /etc/mes-dashboard"
echo " sudo cp .env /etc/mes-dashboard/mes-dashboard.env"
echo " sudo cp deploy/mes-dashboard.service /etc/systemd/system/"
echo " sudo cp deploy/mes-dashboard-watchdog.service /etc/systemd/system/"
echo " sudo systemctl daemon-reload"
echo " sudo systemctl enable --now mes-dashboard mes-dashboard-watchdog"
echo ""
echo "=========================================="
}
# ============================================================
# Main
# ============================================================
main() {
local skip_db=false
# Parse arguments
for arg in "$@"; do
case "$arg" in
--skip-db-check)
skip_db=true
;;
--help|-h)
echo "Usage: $0 [--skip-db-check]"
echo ""
echo "Options:"
echo " --skip-db-check Skip database connection verification"
echo " --help, -h Show this help message"
exit 0
;;
esac
done
echo ""
echo "=========================================="
echo " MES Dashboard Deployment"
echo "=========================================="
echo ""
check_prerequisites
check_redis
setup_conda_env
install_dependencies
install_frontend
setup_env_file
verify_database "$skip_db"
show_next_steps
}
main "$@"

195
scripts/run_stress_tests.py Normal file
View File

@@ -0,0 +1,195 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Stress Test Runner for MES Dashboard
Runs comprehensive stress tests including:
- Backend API load tests
- Frontend browser stress tests
Usage:
python scripts/run_stress_tests.py [options]
Options:
--backend-only Run only backend API tests
--frontend-only Run only frontend Playwright tests
--quick Quick test with minimal load (good for CI)
--heavy Heavy load test (10x normal)
--url URL Target URL (default: http://127.0.0.1:5000)
--report FILE Save report to file
"""
import argparse
import subprocess
import sys
import os
import time
from datetime import datetime
def run_backend_tests(url: str, config: dict) -> dict:
"""Run backend API stress tests."""
env = os.environ.copy()
env['STRESS_TEST_URL'] = url
env['STRESS_CONCURRENT_USERS'] = str(config.get('concurrent_users', 10))
env['STRESS_REQUESTS_PER_USER'] = str(config.get('requests_per_user', 20))
env['STRESS_TIMEOUT'] = str(config.get('timeout', 30))
print("\n" + "=" * 60)
print("Running Backend API Load Tests")
print("=" * 60)
print(f" URL: {url}")
print(f" Concurrent Users: {config.get('concurrent_users', 10)}")
print(f" Requests/User: {config.get('requests_per_user', 20)}")
print()
start_time = time.time()
result = subprocess.run(
['python', '-m', 'pytest', 'tests/stress/test_api_load.py', '-v', '-s', '--tb=short'],
env=env,
capture_output=False,
cwd=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
duration = time.time() - start_time
return {
'name': 'Backend API Load Tests',
'passed': result.returncode == 0,
'duration': duration,
'returncode': result.returncode
}
def run_frontend_tests(url: str, config: dict) -> dict:
"""Run frontend Playwright stress tests."""
env = os.environ.copy()
env['STRESS_TEST_URL'] = url
print("\n" + "=" * 60)
print("Running Frontend Playwright Stress Tests")
print("=" * 60)
print(f" URL: {url}")
print()
start_time = time.time()
result = subprocess.run(
['python', '-m', 'pytest', 'tests/stress/test_frontend_stress.py', '-v', '-s', '--tb=short'],
env=env,
capture_output=False,
cwd=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
duration = time.time() - start_time
return {
'name': 'Frontend Playwright Stress Tests',
'passed': result.returncode == 0,
'duration': duration,
'returncode': result.returncode
}
def generate_report(results: list, url: str, config: dict) -> str:
"""Generate a text report of stress test results."""
report_lines = [
"=" * 60,
"MES Dashboard Stress Test Report",
"=" * 60,
f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
f"Target URL: {url}",
f"Configuration: {config}",
"",
"-" * 60,
"Test Results:",
"-" * 60,
]
total_duration = 0
passed_count = 0
for result in results:
status = "PASSED" if result['passed'] else "FAILED"
report_lines.append(f" {result['name']}: {status}")
report_lines.append(f" Duration: {result['duration']:.2f}s")
total_duration += result['duration']
if result['passed']:
passed_count += 1
report_lines.extend([
"",
"-" * 60,
"Summary:",
"-" * 60,
f" Total Tests: {len(results)}",
f" Passed: {passed_count}",
f" Failed: {len(results) - passed_count}",
f" Total Duration: {total_duration:.2f}s",
"=" * 60,
])
return "\n".join(report_lines)
def main():
parser = argparse.ArgumentParser(description='Run MES Dashboard stress tests')
parser.add_argument('--backend-only', action='store_true', help='Run only backend tests')
parser.add_argument('--frontend-only', action='store_true', help='Run only frontend tests')
parser.add_argument('--quick', action='store_true', help='Quick test with minimal load')
parser.add_argument('--heavy', action='store_true', help='Heavy load test')
parser.add_argument('--url', default='http://127.0.0.1:5000', help='Target URL')
parser.add_argument('--report', help='Save report to file')
args = parser.parse_args()
# Configure load levels
if args.quick:
config = {
'concurrent_users': 3,
'requests_per_user': 5,
'timeout': 30
}
elif args.heavy:
config = {
'concurrent_users': 50,
'requests_per_user': 50,
'timeout': 60
}
else:
config = {
'concurrent_users': 10,
'requests_per_user': 20,
'timeout': 30
}
print("\n" + "=" * 60)
print("MES Dashboard Stress Test Suite")
print("=" * 60)
print(f"Target: {args.url}")
print(f"Mode: {'Quick' if args.quick else 'Heavy' if args.heavy else 'Normal'}")
print()
results = []
# Run tests based on flags
if not args.frontend_only:
results.append(run_backend_tests(args.url, config))
if not args.backend_only:
results.append(run_frontend_tests(args.url, config))
# Generate report
report = generate_report(results, args.url, config)
print("\n" + report)
# Save report if requested
if args.report:
with open(args.report, 'w', encoding='utf-8') as f:
f.write(report)
print(f"\nReport saved to: {args.report}")
# Exit with appropriate code
all_passed = all(r['passed'] for r in results)
sys.exit(0 if all_passed else 1)
if __name__ == '__main__':
main()

689
scripts/start_server.sh Normal file
View File

@@ -0,0 +1,689 @@
#!/usr/bin/env bash
#
# MES Dashboard Server Management Script
# Usage: ./start_server.sh [start|stop|restart|status|logs]
#
set -uo pipefail
# ============================================================
# Configuration
# ============================================================
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
CONDA_ENV="mes-dashboard"
APP_NAME="mes-dashboard"
PID_FILE_DEFAULT="${ROOT}/tmp/gunicorn.pid"
PID_FILE="${WATCHDOG_PID_FILE:-${PID_FILE_DEFAULT}}"
LOG_DIR="${ROOT}/logs"
ACCESS_LOG="${LOG_DIR}/access.log"
ERROR_LOG="${LOG_DIR}/error.log"
STARTUP_LOG="${LOG_DIR}/startup.log"
DEFAULT_PORT="${GUNICORN_BIND:-0.0.0.0:8080}"
PORT=$(echo "$DEFAULT_PORT" | cut -d: -f2)
# Redis configuration
REDIS_ENABLED="${REDIS_ENABLED:-true}"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# ============================================================
# Helper Functions
# ============================================================
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[OK]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
timestamp() {
date '+%Y-%m-%d %H:%M:%S'
}
resolve_runtime_paths() {
WATCHDOG_RUNTIME_DIR="${WATCHDOG_RUNTIME_DIR:-${ROOT}/tmp}"
WATCHDOG_RESTART_FLAG="${WATCHDOG_RESTART_FLAG:-${WATCHDOG_RUNTIME_DIR}/mes_dashboard_restart.flag}"
WATCHDOG_PID_FILE="${WATCHDOG_PID_FILE:-${PID_FILE_DEFAULT}}"
WATCHDOG_STATE_FILE="${WATCHDOG_STATE_FILE:-${WATCHDOG_RUNTIME_DIR}/mes_dashboard_restart_state.json}"
PID_FILE="${WATCHDOG_PID_FILE}"
export WATCHDOG_RUNTIME_DIR WATCHDOG_RESTART_FLAG WATCHDOG_PID_FILE WATCHDOG_STATE_FILE
}
# Load .env file if exists
load_env() {
if [ -f "${ROOT}/.env" ]; then
log_info "Loading environment from .env"
set -a # Mark all variables for export
source "${ROOT}/.env"
set +a
fi
}
# ============================================================
# Environment Check Functions
# ============================================================
check_conda() {
if ! command -v conda &> /dev/null; then
log_error "Conda not found. Please install Miniconda/Anaconda."
return 1
fi
# Source conda
source "$(conda info --base)/etc/profile.d/conda.sh"
# Check if environment exists
if ! conda env list | grep -q "^${CONDA_ENV} "; then
log_error "Conda environment '${CONDA_ENV}' not found."
log_info "Create it with: conda create -n ${CONDA_ENV} python=3.11"
return 1
fi
log_success "Conda environment '${CONDA_ENV}' found"
return 0
}
check_dependencies() {
conda activate "$CONDA_ENV"
local missing=()
# Check critical packages
python -c "import flask" 2>/dev/null || missing+=("flask")
python -c "import gunicorn" 2>/dev/null || missing+=("gunicorn")
python -c "import pandas" 2>/dev/null || missing+=("pandas")
python -c "import oracledb" 2>/dev/null || missing+=("oracledb")
if [ ${#missing[@]} -gt 0 ]; then
log_error "Missing dependencies: ${missing[*]}"
log_info "Install with: pip install ${missing[*]}"
return 1
fi
log_success "All dependencies installed"
return 0
}
check_env_file() {
if [ ! -f "${ROOT}/.env" ]; then
if [ -f "${ROOT}/.env.example" ]; then
log_warn ".env file not found, but .env.example exists"
log_info "Copy and configure: cp .env.example .env"
else
log_warn ".env file not found (optional but recommended)"
fi
return 0
fi
log_success ".env file found"
return 0
}
check_port() {
if lsof -i ":${PORT}" -sTCP:LISTEN &>/dev/null; then
local pid=$(lsof -t -i ":${PORT}" -sTCP:LISTEN 2>/dev/null | head -1)
log_error "Port ${PORT} is already in use (PID: ${pid})"
log_info "Stop the existing process or change GUNICORN_BIND"
return 1
fi
log_success "Port ${PORT} is available"
return 0
}
check_database() {
conda activate "$CONDA_ENV"
export PYTHONPATH="${ROOT}/src:${PYTHONPATH:-}"
if python -c "
from sqlalchemy import text
from mes_dashboard.core.database import get_engine
engine = get_engine()
with engine.connect() as conn:
conn.execute(text('SELECT 1 FROM DUAL'))
" 2>/dev/null; then
log_success "Database connection OK"
return 0
else
log_warn "Database connection failed (service may still start)"
return 0 # Non-fatal, allow startup
fi
}
build_frontend_assets() {
if [ "${FRONTEND_BUILD_ON_START:-true}" != "true" ]; then
log_info "Skip frontend build (FRONTEND_BUILD_ON_START=${FRONTEND_BUILD_ON_START})"
return 0
fi
if [ ! -f "${ROOT}/frontend/package.json" ]; then
return 0
fi
if ! command -v npm &> /dev/null; then
log_warn "npm not found, skip frontend build"
return 0
fi
local required_entries=(
"portal.js"
"resource-status.js"
"resource-history.js"
"job-query.js"
"excel-query.js"
"tables.js"
)
local needs_build=false
local newest_entry=""
for entry in "${required_entries[@]}"; do
local entry_path="${ROOT}/src/mes_dashboard/static/dist/${entry}"
if [ ! -f "${entry_path}" ]; then
needs_build=true
break
fi
if [ -z "${newest_entry}" ] || [ "${entry_path}" -nt "${newest_entry}" ]; then
newest_entry="${entry_path}"
fi
done
if [ "$needs_build" = false ] && find "${ROOT}/frontend/src" -type f -newer "${newest_entry}" | grep -q .; then
needs_build=true
fi
if [ "$needs_build" = false ] && ([ "${ROOT}/frontend/package.json" -nt "${newest_entry}" ] || [ "${ROOT}/frontend/vite.config.js" -nt "${newest_entry}" ]); then
needs_build=true
fi
if [ "$needs_build" = false ]; then
log_success "Frontend assets are up to date"
return 0
fi
log_info "Building frontend assets with Vite..."
if npm --prefix "${ROOT}/frontend" run build >/dev/null 2>&1; then
log_success "Frontend assets built"
else
log_warn "Frontend build failed; continuing with fallback inline scripts"
fi
}
# ============================================================
# Redis Management Functions
# ============================================================
check_redis() {
if [ "$REDIS_ENABLED" != "true" ]; then
log_info "Redis is disabled (REDIS_ENABLED=${REDIS_ENABLED})"
return 0
fi
if ! command -v redis-cli &> /dev/null; then
log_warn "Redis CLI not found (Redis features will be disabled)"
return 0
fi
if redis-cli ping &>/dev/null; then
log_success "Redis connection OK"
return 0
else
log_warn "Redis not responding (will attempt to start)"
return 1
fi
}
start_redis() {
if [ "$REDIS_ENABLED" != "true" ]; then
return 0
fi
if ! command -v redis-cli &> /dev/null; then
return 0
fi
# Check if Redis is already running
if redis-cli ping &>/dev/null; then
log_success "Redis is already running"
return 0
fi
# Try to start Redis via systemctl
if command -v systemctl &> /dev/null; then
log_info "Starting Redis service..."
if sudo systemctl start redis-server 2>/dev/null; then
sleep 1
if redis-cli ping &>/dev/null; then
log_success "Redis service started"
return 0
fi
fi
fi
log_warn "Could not start Redis (fallback mode will be used)"
return 0
}
stop_redis() {
if [ "$REDIS_ENABLED" != "true" ]; then
return 0
fi
if ! command -v redis-cli &> /dev/null; then
return 0
fi
# Check if Redis is running
if ! redis-cli ping &>/dev/null; then
log_info "Redis is not running"
return 0
fi
# Stop Redis via systemctl
if command -v systemctl &> /dev/null; then
log_info "Stopping Redis service..."
if sudo systemctl stop redis-server 2>/dev/null; then
log_success "Redis service stopped"
return 0
fi
fi
log_warn "Could not stop Redis service"
return 0
}
redis_status() {
if [ "$REDIS_ENABLED" != "true" ]; then
echo -e " Redis: ${YELLOW}DISABLED${NC}"
return 0
fi
if ! command -v redis-cli &> /dev/null; then
echo -e " Redis: ${YELLOW}NOT INSTALLED${NC}"
return 0
fi
if redis-cli ping &>/dev/null; then
local info=$(redis-cli info memory 2>/dev/null | grep "used_memory_human" | cut -d: -f2 | tr -d '\r')
echo -e " Redis: ${GREEN}RUNNING${NC} (Memory: ${info:-unknown})"
else
echo -e " Redis: ${RED}STOPPED${NC}"
fi
}
run_all_checks() {
log_info "Running environment checks..."
echo ""
check_conda || return 1
check_dependencies || return 1
check_env_file
load_env
resolve_runtime_paths
check_port || return 1
check_database
check_redis
echo ""
log_success "All checks passed"
return 0
}
# ============================================================
# Service Management Functions
# ============================================================
ensure_dirs() {
mkdir -p "${LOG_DIR}"
mkdir -p "${LOG_DIR}/archive"
mkdir -p "$(dirname "${PID_FILE}")"
mkdir -p "${WATCHDOG_RUNTIME_DIR}"
}
rotate_logs() {
# Archive existing logs with timestamp before starting new session
local ts=$(date '+%Y%m%d_%H%M%S')
if [ -f "$ACCESS_LOG" ] && [ -s "$ACCESS_LOG" ]; then
mv "$ACCESS_LOG" "${LOG_DIR}/archive/access_${ts}.log"
log_info "Archived access.log -> archive/access_${ts}.log"
fi
if [ -f "$ERROR_LOG" ] && [ -s "$ERROR_LOG" ]; then
mv "$ERROR_LOG" "${LOG_DIR}/archive/error_${ts}.log"
log_info "Archived error.log -> archive/error_${ts}.log"
fi
# Clean up old archives (keep last 10)
cd "${LOG_DIR}/archive" 2>/dev/null && \
ls -t access_*.log 2>/dev/null | tail -n +11 | xargs -r rm -f && \
ls -t error_*.log 2>/dev/null | tail -n +11 | xargs -r rm -f
cd "$ROOT"
# Create fresh log files
touch "$ACCESS_LOG" "$ERROR_LOG"
}
get_pid() {
if [ -f "$PID_FILE" ]; then
local pid=$(cat "$PID_FILE" 2>/dev/null)
if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
echo "$pid"
return 0
fi
fi
# Fallback: find by port
local pid=$(lsof -t -i ":${PORT}" -sTCP:LISTEN 2>/dev/null | head -1)
if [ -n "$pid" ]; then
echo "$pid"
return 0
fi
return 1
}
is_running() {
get_pid &>/dev/null
}
do_start() {
local foreground=false
if [ "${1:-}" = "-f" ] || [ "${1:-}" = "--foreground" ]; then
foreground=true
fi
load_env
resolve_runtime_paths
if is_running; then
local pid=$(get_pid)
log_warn "Server is already running (PID: ${pid})"
return 1
fi
# Run checks
run_all_checks || return 1
echo ""
# Start Redis if enabled
start_redis
log_info "Starting ${APP_NAME} server..."
ensure_dirs
rotate_logs # Archive old logs before starting new session
conda activate "$CONDA_ENV"
load_env # Load environment variables from .env file
resolve_runtime_paths
# Re-evaluate port after loading .env (GUNICORN_BIND may have changed)
PORT=$(echo "${GUNICORN_BIND:-0.0.0.0:8080}" | cut -d: -f2)
export PYTHONPATH="${ROOT}/src:${PYTHONPATH:-}"
cd "$ROOT"
build_frontend_assets
# Log startup
echo "[$(timestamp)] Starting server" >> "$STARTUP_LOG"
if [ "$foreground" = true ]; then
log_info "Running in foreground mode (Ctrl+C to stop)"
exec gunicorn \
--config gunicorn.conf.py \
--pid "$PID_FILE" \
--access-logfile "$ACCESS_LOG" \
--error-logfile "$ERROR_LOG" \
--capture-output \
"mes_dashboard:create_app()"
else
gunicorn \
--config gunicorn.conf.py \
--pid "$PID_FILE" \
--access-logfile "$ACCESS_LOG" \
--error-logfile "$ERROR_LOG" \
--capture-output \
--daemon \
"mes_dashboard:create_app()"
sleep 1
if is_running; then
local pid=$(get_pid)
log_success "Server started successfully (PID: ${pid})"
log_info "Access URL: http://localhost:${PORT}"
log_info "Logs: ${LOG_DIR}/"
echo "[$(timestamp)] Server started (PID: ${pid})" >> "$STARTUP_LOG"
else
log_error "Failed to start server"
log_info "Check error log: ${ERROR_LOG}"
echo "[$(timestamp)] Server start failed" >> "$STARTUP_LOG"
return 1
fi
fi
}
do_stop() {
load_env
resolve_runtime_paths
if ! is_running; then
log_warn "Server is not running"
return 0
fi
local pid=$(get_pid)
log_info "Stopping server (PID: ${pid})..."
# Find all gunicorn processes (master + workers)
local all_pids=$(pgrep -f "gunicorn.*mes_dashboard" 2>/dev/null | tr '\n' ' ')
# Graceful shutdown with SIGTERM
kill -TERM "$pid" 2>/dev/null
# Wait for graceful shutdown (max 10 seconds)
local count=0
while kill -0 "$pid" 2>/dev/null && [ $count -lt 10 ]; do
sleep 1
count=$((count + 1))
echo -n "."
done
echo ""
# Force kill if still running (including orphaned workers)
if kill -0 "$pid" 2>/dev/null || [ -n "$(pgrep -f 'gunicorn.*mes_dashboard' 2>/dev/null)" ]; then
log_warn "Graceful shutdown timeout, forcing..."
# Kill all gunicorn processes related to mes_dashboard
pkill -9 -f "gunicorn.*mes_dashboard" 2>/dev/null
sleep 1
fi
# Cleanup PID file
rm -f "$PID_FILE"
# Verify all processes are stopped
if [ -z "$(pgrep -f 'gunicorn.*mes_dashboard' 2>/dev/null)" ]; then
log_success "Server stopped"
echo "[$(timestamp)] Server stopped (PID: ${pid})" >> "$STARTUP_LOG"
else
log_error "Failed to stop server"
return 1
fi
}
do_restart() {
log_info "Restarting ${APP_NAME} server..."
do_stop
sleep 1
do_start "$@"
}
do_status() {
# Load environment to get REDIS_ENABLED
load_env
resolve_runtime_paths
echo ""
echo "=========================================="
echo " ${APP_NAME} Server Status"
echo "=========================================="
echo ""
if is_running; then
local pid=$(get_pid)
echo -e " Server: ${GREEN}RUNNING${NC}"
echo " PID: ${pid}"
echo " Port: ${PORT}"
echo " URL: http://localhost:${PORT}"
echo " PIDFile: ${PID_FILE}"
echo " Watchdog Runtime: ${WATCHDOG_RUNTIME_DIR}"
else
echo -e " Server: ${RED}STOPPED${NC}"
fi
# Show Redis status
redis_status
if is_running; then
echo ""
# Show process info
local pid=$(get_pid)
if command -v ps &>/dev/null; then
echo " Process Info:"
ps -p "$pid" -o pid,ppid,%cpu,%mem,etime,cmd --no-headers 2>/dev/null | \
awk '{printf " PID: %s | CPU: %s%% | MEM: %s%% | Uptime: %s\n", $1, $3, $4, $5}'
fi
# Show recent log entries
if [ -f "$ERROR_LOG" ]; then
echo ""
echo " Recent Errors (last 3):"
tail -3 "$ERROR_LOG" 2>/dev/null | sed 's/^/ /'
fi
else
echo ""
echo " Start with: $0 start"
fi
echo ""
echo "=========================================="
}
do_logs() {
local log_type="${1:-all}"
local lines="${2:-50}"
case "$log_type" in
access)
if [ -f "$ACCESS_LOG" ]; then
log_info "Access log (last ${lines} lines):"
tail -n "$lines" "$ACCESS_LOG"
else
log_warn "Access log not found"
fi
;;
error)
if [ -f "$ERROR_LOG" ]; then
log_info "Error log (last ${lines} lines):"
tail -n "$lines" "$ERROR_LOG"
else
log_warn "Error log not found"
fi
;;
follow)
log_info "Following logs (Ctrl+C to stop)..."
tail -f "$ACCESS_LOG" "$ERROR_LOG" 2>/dev/null
;;
*)
log_info "=== Error Log (last 20 lines) ==="
tail -20 "$ERROR_LOG" 2>/dev/null || echo "(empty)"
echo ""
log_info "=== Access Log (last 20 lines) ==="
tail -20 "$ACCESS_LOG" 2>/dev/null || echo "(empty)"
;;
esac
}
do_check() {
run_all_checks
}
show_help() {
echo ""
echo "Usage: $0 <command> [options]"
echo ""
echo "Commands:"
echo " start [-f] Start the server (-f for foreground mode)"
echo " stop Stop the server gracefully"
echo " restart Restart the server"
echo " status Show server and Redis status"
echo " logs [type] View logs (access|error|follow|all)"
echo " check Run environment checks only"
echo " help Show this help message"
echo ""
echo "Examples:"
echo " $0 start # Start in background (with Redis)"
echo " $0 start -f # Start in foreground"
echo " $0 logs follow # Follow logs in real-time"
echo " $0 logs error 100 # Show last 100 error log lines"
echo ""
echo "Environment Variables:"
echo " GUNICORN_BIND Bind address (default: 0.0.0.0:8080)"
echo " GUNICORN_WORKERS Number of workers (default: 1)"
echo " GUNICORN_THREADS Threads per worker (default: 4)"
echo " REDIS_ENABLED Enable Redis cache (default: true)"
echo " REDIS_URL Redis connection URL"
echo ""
}
# ============================================================
# Main
# ============================================================
main() {
local command="${1:-}"
shift || true
case "$command" in
start)
do_start "$@"
;;
stop)
do_stop
;;
restart)
do_restart "$@"
;;
status)
do_status
;;
logs)
do_logs "$@"
;;
check)
do_check
;;
help|--help|-h)
show_help
;;
"")
# Default: start in foreground for backward compatibility
do_start
;;
*)
log_error "Unknown command: ${command}"
show_help
exit 1
;;
esac
}
main "$@"

302
scripts/worker_watchdog.py Normal file
View File

@@ -0,0 +1,302 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Worker watchdog for MES Dashboard.
Monitors a restart flag file and signals Gunicorn master to gracefully
reload workers when the flag is detected.
Usage:
python scripts/worker_watchdog.py
The watchdog:
- Checks for /tmp/mes_dashboard_restart.flag every 5 seconds
- Sends SIGHUP to Gunicorn master process when flag is detected
- Removes the flag file after signaling
- Logs all restart events
Configuration via environment variables:
- WATCHDOG_CHECK_INTERVAL: Check interval in seconds (default: 5)
- WATCHDOG_RESTART_FLAG: Path to restart flag file
- WATCHDOG_PID_FILE: Path to Gunicorn PID file
"""
from __future__ import annotations
import json
import logging
import os
import signal
import sys
import time
from datetime import datetime
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
]
)
logger = logging.getLogger('mes_dashboard.watchdog')
# ============================================================
# Configuration
# ============================================================
CHECK_INTERVAL = int(os.getenv('WATCHDOG_CHECK_INTERVAL', '5'))
def _env_int(name: str, default: int) -> int:
try:
return int(os.getenv(name, str(default)))
except (TypeError, ValueError):
return default
PROJECT_ROOT = Path(__file__).resolve().parents[1]
DEFAULT_RUNTIME_DIR = Path(
os.getenv('WATCHDOG_RUNTIME_DIR', str(PROJECT_ROOT / 'tmp'))
)
RESTART_FLAG_PATH = os.getenv(
'WATCHDOG_RESTART_FLAG',
str(DEFAULT_RUNTIME_DIR / 'mes_dashboard_restart.flag')
)
GUNICORN_PID_FILE = os.getenv(
'WATCHDOG_PID_FILE',
str(DEFAULT_RUNTIME_DIR / 'gunicorn.pid')
)
RESTART_STATE_FILE = os.getenv(
'WATCHDOG_STATE_FILE',
str(DEFAULT_RUNTIME_DIR / 'mes_dashboard_restart_state.json')
)
RESTART_HISTORY_MAX = _env_int('WATCHDOG_RESTART_HISTORY_MAX', 50)
# ============================================================
# Watchdog Implementation
# ============================================================
def get_gunicorn_pid() -> int | None:
"""Get Gunicorn master PID from PID file.
Returns:
PID of Gunicorn master process, or None if not found.
"""
pid_path = Path(GUNICORN_PID_FILE)
if not pid_path.exists():
logger.warning(f"PID file not found: {GUNICORN_PID_FILE}")
return None
try:
pid = int(pid_path.read_text().strip())
# Verify process exists
os.kill(pid, 0)
return pid
except (ValueError, ProcessLookupError, PermissionError) as e:
logger.warning(f"Invalid or stale PID file: {e}")
return None
def read_restart_flag() -> dict | None:
"""Read and parse the restart flag file.
Returns:
Dictionary with restart metadata, or None if no flag exists.
"""
flag_path = Path(RESTART_FLAG_PATH)
if not flag_path.exists():
return None
try:
content = flag_path.read_text().strip()
if content:
return json.loads(content)
return {"timestamp": datetime.now().isoformat()}
except (json.JSONDecodeError, IOError) as e:
logger.warning(f"Error reading restart flag: {e}")
return {"timestamp": datetime.now().isoformat(), "error": str(e)}
def remove_restart_flag() -> bool:
"""Remove the restart flag file.
Returns:
True if file was removed, False otherwise.
"""
flag_path = Path(RESTART_FLAG_PATH)
try:
if flag_path.exists():
flag_path.unlink()
return True
return False
except IOError as e:
logger.error(f"Failed to remove restart flag: {e}")
return False
def load_restart_state() -> dict:
"""Load persisted restart state from disk."""
state_path = Path(RESTART_STATE_FILE)
if not state_path.exists():
return {}
try:
return json.loads(state_path.read_text())
except (json.JSONDecodeError, IOError):
return {}
def save_restart_state(
requested_by: str | None = None,
requested_at: str | None = None,
requested_ip: str | None = None,
completed_at: str | None = None,
success: bool = True
) -> None:
"""Save restart state for status queries.
Args:
requested_by: Username who requested the restart.
requested_at: ISO timestamp when restart was requested.
requested_ip: IP address of requester.
completed_at: ISO timestamp when restart was completed.
success: Whether the restart was successful.
"""
state_path = Path(RESTART_STATE_FILE)
entry = {
"requested_by": requested_by,
"requested_at": requested_at,
"requested_ip": requested_ip,
"completed_at": completed_at,
"success": success
}
current_state = load_restart_state()
history = current_state.get("history", [])
if not isinstance(history, list):
history = []
history.append(entry)
if len(history) > RESTART_HISTORY_MAX:
history = history[-RESTART_HISTORY_MAX:]
state = {
"last_restart": entry,
"history": history,
"history_limit": RESTART_HISTORY_MAX,
}
try:
state_path.parent.mkdir(parents=True, exist_ok=True)
state_path.write_text(json.dumps(state, indent=2))
except IOError as e:
logger.error(f"Failed to save restart state: {e}")
def send_reload_signal(pid: int) -> bool:
"""Send SIGHUP to Gunicorn master to reload workers.
Args:
pid: PID of Gunicorn master process.
Returns:
True if signal was sent successfully, False otherwise.
"""
try:
os.kill(pid, signal.SIGHUP)
logger.info(f"Sent SIGHUP to Gunicorn master (PID: {pid})")
return True
except ProcessLookupError:
logger.error(f"Process {pid} not found")
return False
except PermissionError:
logger.error(f"Permission denied sending signal to PID {pid}")
return False
def process_restart_request() -> bool:
"""Process a restart request if flag file exists.
Returns:
True if restart was processed, False if no restart needed.
"""
flag_data = read_restart_flag()
if flag_data is None:
return False
logger.info(f"Restart flag detected: {flag_data}")
# Get Gunicorn master PID
pid = get_gunicorn_pid()
if pid is None:
logger.error("Cannot restart: Gunicorn master PID not found")
# Still remove flag to prevent infinite loop
remove_restart_flag()
save_restart_state(
requested_by=flag_data.get("user"),
requested_at=flag_data.get("timestamp"),
requested_ip=flag_data.get("ip"),
completed_at=datetime.now().isoformat(),
success=False
)
return True
# Send reload signal
success = send_reload_signal(pid)
# Remove flag file
remove_restart_flag()
# Save state
save_restart_state(
requested_by=flag_data.get("user"),
requested_at=flag_data.get("timestamp"),
requested_ip=flag_data.get("ip"),
completed_at=datetime.now().isoformat(),
success=success
)
if success:
logger.info(
f"Worker restart completed - "
f"Requested by: {flag_data.get('user', 'unknown')}, "
f"IP: {flag_data.get('ip', 'unknown')}"
)
return True
def run_watchdog() -> None:
"""Main watchdog loop."""
logger.info(
f"Worker watchdog started - "
f"Check interval: {CHECK_INTERVAL}s, "
f"Flag path: {RESTART_FLAG_PATH}, "
f"PID file: {GUNICORN_PID_FILE}"
)
while True:
try:
process_restart_request()
except Exception as e:
logger.exception(f"Error in watchdog loop: {e}")
time.sleep(CHECK_INTERVAL)
def main() -> None:
"""Entry point for watchdog script."""
try:
run_watchdog()
except KeyboardInterrupt:
logger.info("Watchdog stopped by user")
sys.exit(0)
if __name__ == "__main__":
main()