feat: harden long-range batch queries with redis+parquet caching

This commit is contained in:
egg
2026-03-02 21:04:18 +08:00
parent 2568fd836c
commit fb92579331
40 changed files with 5443 additions and 676 deletions

View File

@@ -26,6 +26,13 @@ DB_TCP_CONNECT_TIMEOUT=10
DB_CONNECT_RETRY_COUNT=1
DB_CONNECT_RETRY_DELAY=1.0
DB_CALL_TIMEOUT_MS=55000 # Must stay below worker timeout
DB_SLOW_CALL_TIMEOUT_MS=300000
DB_SLOW_MAX_CONCURRENT=5
DB_SLOW_POOL_ENABLED=true
DB_SLOW_POOL_SIZE=2
DB_SLOW_POOL_MAX_OVERFLOW=1
DB_SLOW_POOL_TIMEOUT=30
DB_SLOW_POOL_RECYCLE=1800
# ============================================================
# Flask Configuration
@@ -115,6 +122,24 @@ REDIS_ENABLED=true
# Redis key prefix (to separate from other applications)
REDIS_KEY_PREFIX=mes_wip
# Redis memory guardrail (prevent unbounded RAM growth)
# Example: 512mb / 1gb / 2gb. Set 0 to disable limit (NOT recommended).
REDIS_MAXMEMORY=512mb
# Eviction policy when maxmemory is reached
# Recommended: allkeys-lru (general cache), volatile-lru (TTL keys only)
REDIS_MAXMEMORY_POLICY=allkeys-lru
# Redis persistence (physical storage)
REDIS_PERSISTENCE_ENABLED=true
REDIS_APPENDONLY=yes
REDIS_APPENDFSYNC=everysec
REDIS_SAVE=900 1 300 10 60 10000
# Startup cleanup: remove stale keys that accidentally have no TTL
REDIS_TTL_CLEANUP_ON_START=true
REDIS_TTL_CLEANUP_PATTERNS=batch:*,reject_dataset:*,hold_dataset:*,resource_dataset:*,job_query:*
# Cache check interval in seconds (default: 600 = 10 minutes)
CACHE_CHECK_INTERVAL=600
@@ -306,6 +331,30 @@ HEALTH_MEMO_TTL_SECONDS=5
# Reject history options API cache TTL in seconds (default: 14400 = 4 hours)
REJECT_HISTORY_OPTIONS_CACHE_TTL_SECONDS=14400
# ============================================================
# Reject History Batch/Spill Guardrails
# ============================================================
# Batch chunking controls (for long-range reject queries)
REJECT_ENGINE_GRAIN_DAYS=10
REJECT_ENGINE_PARALLEL=2
REJECT_ENGINE_MAX_ROWS_PER_CHUNK=50000
REJECT_ENGINE_MAX_TOTAL_ROWS=300000
# Large result spill controls
REJECT_ENGINE_SPILL_ENABLED=true
REJECT_ENGINE_MAX_RESULT_MB=64
QUERY_SPOOL_DIR=tmp/query_spool
REJECT_ENGINE_SPOOL_TTL_SECONDS=21600
REJECT_ENGINE_SPOOL_MAX_BYTES=2147483648
REJECT_ENGINE_SPOOL_WARN_RATIO=0.85
REJECT_ENGINE_SPOOL_CLEANUP_INTERVAL_SECONDS=300
REJECT_ENGINE_SPOOL_ORPHAN_GRACE_SECONDS=600
# Batch query engine thresholds
BATCH_QUERY_TIME_THRESHOLD_DAYS=60
BATCH_QUERY_ID_THRESHOLD=1000
BATCH_CHUNK_MAX_MEMORY_MB=256
# ============================================================
# Runtime Resilience Diagnostics Thresholds
# ============================================================