# ============================================================ # MES Dashboard Environment Configuration # ============================================================ # Copy this file to .env and fill in your actual values: # cp .env.example .env # nano .env # ============================================================ # ============================================================ # Database Configuration (REQUIRED) # ============================================================ # Oracle Database connection settings DB_HOST=your_database_host DB_PORT=1521 DB_SERVICE=your_service_name DB_USER=your_username DB_PASSWORD=your_password # Database Pool Settings (optional, has defaults) # Adjust based on expected load DB_POOL_SIZE=5 # Default: 5 (dev: 2, prod: 10) DB_MAX_OVERFLOW=10 # Default: 10 (dev: 3, prod: 20) DB_POOL_TIMEOUT=30 # Seconds to wait when pool is exhausted DB_POOL_RECYCLE=1800 # Recycle connection after N seconds DB_TCP_CONNECT_TIMEOUT=10 DB_CONNECT_RETRY_COUNT=1 DB_CONNECT_RETRY_DELAY=1.0 DB_CALL_TIMEOUT_MS=55000 # Must stay below worker timeout # ============================================================ # Flask Configuration # ============================================================ # Environment mode: development | production | testing # If omitted, runtime defaults to production (fail-safe) FLASK_ENV=development # Debug mode: 0 for production, 1 for development FLASK_DEBUG=0 # Session Security (REQUIRED for production!) # Generate with: python -c "import secrets; print(secrets.token_hex(32))" SECRET_KEY=your-secret-key-change-in-production # Session timeout in seconds (default: 28800 = 8 hours) SESSION_LIFETIME=28800 # JSON request payload upper bound in bytes (default: 262144 = 256KB) MAX_JSON_BODY_BYTES=262144 # Route input-budget guardrails QUERY_TOOL_MAX_CONTAINER_IDS=200 RESOURCE_DETAIL_DEFAULT_LIMIT=500 RESOURCE_DETAIL_MAX_LIMIT=500 # Trust boundary for forwarded headers (safe default: false) # Direct-exposure deployment (no reverse proxy): keep this false TRUST_PROXY_HEADERS=false # Required when TRUST_PROXY_HEADERS=true. Supports comma-separated IP/CIDR entries. # Example: TRUSTED_PROXY_IPS=127.0.0.1,10.0.0.0/24 TRUSTED_PROXY_IPS= # CSP opt-in compatibility flag (default false = safer) CSP_ALLOW_UNSAFE_EVAL=false # ============================================================ # Authentication Configuration # ============================================================ # LDAP API endpoint for user authentication LDAP_API_URL=https://your-ldap-api-endpoint.example.com # Admin email addresses (comma-separated for multiple) ADMIN_EMAILS=admin@example.com # Local Authentication (for development/testing) # When enabled, uses local credentials instead of LDAP # Set LOCAL_AUTH_ENABLED=true to bypass LDAP authentication # Production safety: when FLASK_ENV=production, local auth is forcibly disabled LOCAL_AUTH_ENABLED=false LOCAL_AUTH_USERNAME= LOCAL_AUTH_PASSWORD= # ============================================================ # Gunicorn Configuration # ============================================================ # Server bind address and port GUNICORN_BIND=0.0.0.0:8080 # Number of worker processes # Recommend: 2 for ≤ 8GB RAM (trace queries consume 2-3 GB peak per worker) # Recommend: 4 for ≥ 16GB RAM GUNICORN_WORKERS=2 # Threads per worker GUNICORN_THREADS=4 # Worker timeout (seconds): should stay above DB/query-tool slow paths GUNICORN_TIMEOUT=130 # Graceful shutdown timeout for worker reloads (seconds) GUNICORN_GRACEFUL_TIMEOUT=60 # Worker recycle policy (set 0 to disable) GUNICORN_MAX_REQUESTS=5000 GUNICORN_MAX_REQUESTS_JITTER=500 # ============================================================ # Redis Configuration (for WIP cache) # ============================================================ # Redis connection URL REDIS_URL=redis://localhost:6379/0 # Enable/disable Redis cache (set to false to fallback to Oracle) REDIS_ENABLED=true # Redis key prefix (to separate from other applications) REDIS_KEY_PREFIX=mes_wip # Cache check interval in seconds (default: 600 = 10 minutes) CACHE_CHECK_INTERVAL=600 # Optional explicit TTL for WIP Redis keys (seconds) # If unset/0, TTL defaults to 3 * CACHE_CHECK_INTERVAL # Example: CACHE_CHECK_INTERVAL=600 -> default TTL=1800 WIP_CACHE_TTL_SECONDS=1800 # ============================================================ # Resource Cache Configuration # ============================================================ # Enable/disable Resource cache (DW_MES_RESOURCE) # When disabled, queries will fallback to Oracle directly RESOURCE_CACHE_ENABLED=true # Resource cache sync interval in seconds (default: 14400 = 4 hours) # The cache will check for updates at this interval using MAX(LASTCHANGEDATE) RESOURCE_SYNC_INTERVAL=14400 # Resource index version check interval in seconds (default: 5) RESOURCE_INDEX_VERSION_CHECK_INTERVAL=5 # Realtime equipment status cache toggle and sync interval REALTIME_EQUIPMENT_CACHE_ENABLED=true EQUIPMENT_STATUS_SYNC_INTERVAL=300 # Filter cache SQL view overrides FILTER_CACHE_WIP_VIEW=DWH.DW_MES_LOT_V FILTER_CACHE_SPEC_WORKCENTER_VIEW=DWH.DW_MES_SPEC_WORKCENTER_V # ============================================================ # Circuit Breaker Configuration # ============================================================ # Enable/disable circuit breaker for database protection CIRCUIT_BREAKER_ENABLED=true # Minimum failures before circuit can open CIRCUIT_BREAKER_FAILURE_THRESHOLD=5 # Failure rate threshold (0.0 - 1.0) CIRCUIT_BREAKER_FAILURE_RATE=0.5 # Seconds to wait in OPEN state before trying HALF_OPEN CIRCUIT_BREAKER_RECOVERY_TIMEOUT=30 # Sliding window size for counting successes/failures CIRCUIT_BREAKER_WINDOW_SIZE=10 # ============================================================ # Trace Pipeline Configuration # ============================================================ # Slow query warning threshold (seconds) — logs warning when stage exceeds this TRACE_SLOW_THRESHOLD_SECONDS=15 # Max parallel workers for events domain fetching (per request) # Recommend: 2 (each worker × EVENT_FETCHER_MAX_WORKERS = peak slow query slots) TRACE_EVENTS_MAX_WORKERS=2 # Max parallel workers for EventFetcher batch queries (per domain) # Recommend: 2 (peak concurrent slow queries = TRACE_EVENTS_MAX_WORKERS × this) EVENT_FETCHER_MAX_WORKERS=2 # Max parallel workers for forward pipeline WIP+rejects fetching FORWARD_PIPELINE_MAX_WORKERS=2 # --- Admission Control (提案 1: trace-events-memory-triage) --- # Max container IDs per synchronous events request. # Requests exceeding this limit return HTTP 413 (or HTTP 202 when async job queue is enabled). # Set based on available RAM: 50K CIDs ≈ 2-3 GB peak memory per request. TRACE_EVENTS_CID_LIMIT=50000 # Cursor fetchmany batch size for slow query iterator mode. # Smaller = less peak memory; larger = fewer Oracle round-trips. DB_SLOW_FETCHMANY_SIZE=5000 # Domain-level cache skip threshold (CID count). # When CID count exceeds this, per-domain and route-level cache writes are skipped. EVENT_FETCHER_CACHE_SKIP_CID_THRESHOLD=10000 # --- Async Job Queue (提案 2: trace-async-job-queue) --- # Enable RQ trace worker for async large query processing # Set to true and start the worker: rq worker trace-events TRACE_WORKER_ENABLED=false # CID threshold for automatic async job routing (requires RQ worker). # Requests with CID count > threshold are queued instead of processed synchronously. TRACE_ASYNC_CID_THRESHOLD=20000 # Job result retention time in seconds (default: 3600 = 1 hour) TRACE_JOB_TTL_SECONDS=3600 # Job execution timeout in seconds (default: 1800 = 30 minutes) TRACE_JOB_TIMEOUT_SECONDS=1800 # Number of RQ worker processes for trace jobs TRACE_WORKER_COUNT=1 # RQ queue name for trace jobs TRACE_WORKER_QUEUE=trace-events # --- Streaming Response (提案 3: trace-streaming-response) --- # NDJSON stream batch size (records per NDJSON line) TRACE_STREAM_BATCH_SIZE=5000 # ============================================================ # Performance Metrics Configuration # ============================================================ # Slow query threshold in seconds (default: 5.0) # Note: Real-time Oracle views may take 2-5s per query, set threshold accordingly SLOW_QUERY_THRESHOLD=5.0 # In-memory query metrics sliding window size METRICS_WINDOW_SIZE=1000 # ============================================================ # SQLite Log Store Configuration # ============================================================ # Enable/disable SQLite log store for admin dashboard LOG_STORE_ENABLED=true # SQLite database path LOG_SQLITE_PATH=logs/admin_logs.sqlite # Log retention period in days (default: 7) LOG_SQLITE_RETENTION_DAYS=7 # Maximum log rows (default: 100000) LOG_SQLITE_MAX_ROWS=100000 # ============================================================ # Worker Watchdog Configuration # ============================================================ # Runtime directory for restart flag/pid/state files WATCHDOG_RUNTIME_DIR=./tmp # Path to restart flag file (watchdog monitors this file) WATCHDOG_RESTART_FLAG=./tmp/mes_dashboard_restart.flag # Gunicorn PID file path (must match start script / systemd config) WATCHDOG_PID_FILE=./tmp/gunicorn.pid # Path to restart state file (stores last restart info) WATCHDOG_STATE_FILE=./tmp/mes_dashboard_restart_state.json # Max entries persisted in restart history (bounded to avoid state growth) WATCHDOG_RESTART_HISTORY_MAX=50 # Cooldown period between restart requests in seconds (default: 60) WORKER_RESTART_COOLDOWN=60 # Watchdog loop check interval in seconds WATCHDOG_CHECK_INTERVAL=5 # ============================================================ # Portal Shell / Full Modernization Flags # ============================================================ # Frontend build mode: auto | always | never # - auto: build only when dist is missing/stale # - always: force build on every start/restart # - never: skip build entirely FRONTEND_BUILD_MODE=always # Fail startup when frontend build fails (recommended in production) FRONTEND_BUILD_FAIL_ON_ERROR=true # Shell-first SPA navigation # Local default: false (avoid coupling first boot to shell-first cutover behavior) # Production recommendation: set true after shell cutover verification is complete PORTAL_SPA_ENABLED=false # Fail-fast readiness gate for in-scope dist assets # Local default: false (allow startup when dist artifacts are intentionally absent) # Production recommendation: set true to enforce release readiness at boot MODERNIZATION_ENFORCE_ASSET_READINESS=false # Retire in-scope runtime fallback behavior # Local default: false (retain compatibility fallback while developing) # Production recommendation: set true only after all in-scope dist assets are verified MODERNIZATION_RETIRE_IN_SCOPE_RUNTIME_FALLBACK=false # Runtime contract strict validation toggle # Local default: false (avoid strict conda/runtime checks on onboarding machines) # Production recommendation: set true to fail fast on contract drift RUNTIME_CONTRACT_ENFORCE=false # Health endpoint memo cache TTL in seconds HEALTH_MEMO_TTL_SECONDS=5 # Reject history options API cache TTL in seconds (default: 14400 = 4 hours) REJECT_HISTORY_OPTIONS_CACHE_TTL_SECONDS=14400 # ============================================================ # Runtime Resilience Diagnostics Thresholds # ============================================================ # Alert window for sustained degraded state (seconds) RESILIENCE_DEGRADED_ALERT_SECONDS=300 # Pool saturation warning / critical levels RESILIENCE_POOL_SATURATION_WARNING=0.90 RESILIENCE_POOL_SATURATION_CRITICAL=1.0 # Restart churn threshold: N restarts within window triggers churn warning RESILIENCE_RESTART_CHURN_WINDOW_SECONDS=600 RESILIENCE_RESTART_CHURN_THRESHOLD=3 # ============================================================ # CORS Configuration # ============================================================ # Comma-separated list of allowed origins for CORS # Example: https://example.com,https://app.example.com # Set to * for development (not recommended for production) CORS_ALLOWED_ORIGINS=