feat: harden long-range batch queries with redis+parquet caching
This commit is contained in:
49
.env.example
49
.env.example
@@ -26,6 +26,13 @@ DB_TCP_CONNECT_TIMEOUT=10
|
|||||||
DB_CONNECT_RETRY_COUNT=1
|
DB_CONNECT_RETRY_COUNT=1
|
||||||
DB_CONNECT_RETRY_DELAY=1.0
|
DB_CONNECT_RETRY_DELAY=1.0
|
||||||
DB_CALL_TIMEOUT_MS=55000 # Must stay below worker timeout
|
DB_CALL_TIMEOUT_MS=55000 # Must stay below worker timeout
|
||||||
|
DB_SLOW_CALL_TIMEOUT_MS=300000
|
||||||
|
DB_SLOW_MAX_CONCURRENT=5
|
||||||
|
DB_SLOW_POOL_ENABLED=true
|
||||||
|
DB_SLOW_POOL_SIZE=2
|
||||||
|
DB_SLOW_POOL_MAX_OVERFLOW=1
|
||||||
|
DB_SLOW_POOL_TIMEOUT=30
|
||||||
|
DB_SLOW_POOL_RECYCLE=1800
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# Flask Configuration
|
# Flask Configuration
|
||||||
@@ -115,6 +122,24 @@ REDIS_ENABLED=true
|
|||||||
# Redis key prefix (to separate from other applications)
|
# Redis key prefix (to separate from other applications)
|
||||||
REDIS_KEY_PREFIX=mes_wip
|
REDIS_KEY_PREFIX=mes_wip
|
||||||
|
|
||||||
|
# Redis memory guardrail (prevent unbounded RAM growth)
|
||||||
|
# Example: 512mb / 1gb / 2gb. Set 0 to disable limit (NOT recommended).
|
||||||
|
REDIS_MAXMEMORY=512mb
|
||||||
|
|
||||||
|
# Eviction policy when maxmemory is reached
|
||||||
|
# Recommended: allkeys-lru (general cache), volatile-lru (TTL keys only)
|
||||||
|
REDIS_MAXMEMORY_POLICY=allkeys-lru
|
||||||
|
|
||||||
|
# Redis persistence (physical storage)
|
||||||
|
REDIS_PERSISTENCE_ENABLED=true
|
||||||
|
REDIS_APPENDONLY=yes
|
||||||
|
REDIS_APPENDFSYNC=everysec
|
||||||
|
REDIS_SAVE=900 1 300 10 60 10000
|
||||||
|
|
||||||
|
# Startup cleanup: remove stale keys that accidentally have no TTL
|
||||||
|
REDIS_TTL_CLEANUP_ON_START=true
|
||||||
|
REDIS_TTL_CLEANUP_PATTERNS=batch:*,reject_dataset:*,hold_dataset:*,resource_dataset:*,job_query:*
|
||||||
|
|
||||||
# Cache check interval in seconds (default: 600 = 10 minutes)
|
# Cache check interval in seconds (default: 600 = 10 minutes)
|
||||||
CACHE_CHECK_INTERVAL=600
|
CACHE_CHECK_INTERVAL=600
|
||||||
|
|
||||||
@@ -306,6 +331,30 @@ HEALTH_MEMO_TTL_SECONDS=5
|
|||||||
# Reject history options API cache TTL in seconds (default: 14400 = 4 hours)
|
# Reject history options API cache TTL in seconds (default: 14400 = 4 hours)
|
||||||
REJECT_HISTORY_OPTIONS_CACHE_TTL_SECONDS=14400
|
REJECT_HISTORY_OPTIONS_CACHE_TTL_SECONDS=14400
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Reject History Batch/Spill Guardrails
|
||||||
|
# ============================================================
|
||||||
|
# Batch chunking controls (for long-range reject queries)
|
||||||
|
REJECT_ENGINE_GRAIN_DAYS=10
|
||||||
|
REJECT_ENGINE_PARALLEL=2
|
||||||
|
REJECT_ENGINE_MAX_ROWS_PER_CHUNK=50000
|
||||||
|
REJECT_ENGINE_MAX_TOTAL_ROWS=300000
|
||||||
|
|
||||||
|
# Large result spill controls
|
||||||
|
REJECT_ENGINE_SPILL_ENABLED=true
|
||||||
|
REJECT_ENGINE_MAX_RESULT_MB=64
|
||||||
|
QUERY_SPOOL_DIR=tmp/query_spool
|
||||||
|
REJECT_ENGINE_SPOOL_TTL_SECONDS=21600
|
||||||
|
REJECT_ENGINE_SPOOL_MAX_BYTES=2147483648
|
||||||
|
REJECT_ENGINE_SPOOL_WARN_RATIO=0.85
|
||||||
|
REJECT_ENGINE_SPOOL_CLEANUP_INTERVAL_SECONDS=300
|
||||||
|
REJECT_ENGINE_SPOOL_ORPHAN_GRACE_SECONDS=600
|
||||||
|
|
||||||
|
# Batch query engine thresholds
|
||||||
|
BATCH_QUERY_TIME_THRESHOLD_DAYS=60
|
||||||
|
BATCH_QUERY_ID_THRESHOLD=1000
|
||||||
|
BATCH_CHUNK_MAX_MEMORY_MB=256
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# Runtime Resilience Diagnostics Thresholds
|
# Runtime Resilience Diagnostics Thresholds
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|||||||
@@ -318,6 +318,15 @@ CIRCUIT_BREAKER_RECOVERY_TIMEOUT=30
|
|||||||
# Redis 設定
|
# Redis 設定
|
||||||
REDIS_URL=redis://localhost:6379/0
|
REDIS_URL=redis://localhost:6379/0
|
||||||
REDIS_ENABLED=true
|
REDIS_ENABLED=true
|
||||||
|
REDIS_KEY_PREFIX=mes_wip
|
||||||
|
REDIS_MAXMEMORY=512mb
|
||||||
|
REDIS_MAXMEMORY_POLICY=allkeys-lru
|
||||||
|
REDIS_PERSISTENCE_ENABLED=true
|
||||||
|
REDIS_APPENDONLY=yes
|
||||||
|
REDIS_APPENDFSYNC=everysec
|
||||||
|
REDIS_SAVE=900 1 300 10 60 10000
|
||||||
|
REDIS_TTL_CLEANUP_ON_START=true
|
||||||
|
REDIS_TTL_CLEANUP_PATTERNS=batch:*,reject_dataset:*,hold_dataset:*,resource_dataset:*,job_query:*
|
||||||
|
|
||||||
# Watchdog runtime contract
|
# Watchdog runtime contract
|
||||||
WATCHDOG_RUNTIME_DIR=./tmp
|
WATCHDOG_RUNTIME_DIR=./tmp
|
||||||
|
|||||||
@@ -0,0 +1,2 @@
|
|||||||
|
schema: spec-driven
|
||||||
|
created: 2026-03-02
|
||||||
@@ -0,0 +1,166 @@
|
|||||||
|
## Context
|
||||||
|
|
||||||
|
目前 6 個服務各自處理大查詢,缺乏統一保護:
|
||||||
|
|
||||||
|
| 服務 | 查詢類型 | 現有保護 | 缺口 |
|
||||||
|
|------|---------|---------|------|
|
||||||
|
| reject-history | 日期 + 工單/Lot/GD 展開 | L1+L2 快取、`read_sql_df_slow` | 無記憶體守衛、`limit=999999999`、缺分塊查詢 |
|
||||||
|
| hold-history | 日期 | L1+L2 快取、`read_sql_df_slow` | 無記憶體守衛、缺時間分塊 |
|
||||||
|
| resource-history | 日期 + 設備 ID | L1+L2 快取、1000 筆分批 | 無記憶體守衛 |
|
||||||
|
| mid-section-defect | 日期 → 偵測 → 族譜 → 上游 | Redis 快取、EventFetcher 分批 | 無偵測數量上限 |
|
||||||
|
| job-query | 日期 + 設備 ID | 1000 筆分批、`read_sql_df_slow` | **無結果快取**、缺時間分塊 |
|
||||||
|
| query-tool | 多種 resolver → container ID | 輸入筆數限制、resolve route 短 TTL 快取、EventFetcher 快取 | 多數查詢仍走 `read_sql_df`(55s timeout)、缺統一分塊編排 |
|
||||||
|
|
||||||
|
參考實作:
|
||||||
|
- `EventFetcher`:batch 1000 + ThreadPoolExecutor(2) + `read_sql_df_slow_iter` streaming + Redis 快取 — **已是最佳實作**
|
||||||
|
- `LineageEngine`:batch 1000 + depth limit 20 — **族譜專用引擎**
|
||||||
|
|
||||||
|
目標:建立 `BatchQueryEngine` 共用模組,任何服務接入即獲得完整保護。
|
||||||
|
|
||||||
|
## Goals / Non-Goals
|
||||||
|
|
||||||
|
**Goals:**
|
||||||
|
- 統一 parquet-in-Redis 存取為共用模組(消除 3 處重複)
|
||||||
|
- 提供時間範圍分解(長日期 → ~31 天月份區間)
|
||||||
|
- 提供 ID 批次分解(工單/Lot/GD 展開後的大量 container ID → 1000 筆一批)
|
||||||
|
- 記憶體守衛:每個 chunk 結果檢查 memory_usage,超過閾值中止
|
||||||
|
- 結果筆數限制:可配置上限,超過時截斷並標記
|
||||||
|
- 受控並行:預設循序、可選並行、semaphore 感知
|
||||||
|
- Redis 分塊快取 + 部分命中
|
||||||
|
- 統一使用 `read_sql_df_slow`(300 秒 dedicated connection)
|
||||||
|
- 定義 query_hash 與 chunk 邊界語意,避免跨服務行為不一致
|
||||||
|
- 定義 chunk cache 與服務 L1/L2 dataset cache 互動規則
|
||||||
|
|
||||||
|
**Non-Goals:**
|
||||||
|
- 不修改 SQL 語句本身
|
||||||
|
- 不引入新的外部依賴
|
||||||
|
- 不改變前端 API 介面(前端無感知)
|
||||||
|
- 不替換 EventFetcher / LineageEngine(它們已各自最佳化,引擎提供可選接入點)
|
||||||
|
- 不改變 trace_job_service 的 RQ 非同步架構
|
||||||
|
|
||||||
|
## Decisions
|
||||||
|
|
||||||
|
### Decision 1: 提取 `redis_df_store.py` 共用模組
|
||||||
|
|
||||||
|
**選擇**:從 reject/hold/resource_dataset_cache 提取相同的 `_redis_store_df` / `_redis_load_df` 到 `src/mes_dashboard/core/redis_df_store.py`。
|
||||||
|
|
||||||
|
**替代方案**:(A) 保持各自複製 → 已有 3 處重複,維護困難。
|
||||||
|
|
||||||
|
**理由**:parquet-in-Redis 是 DataFrame 序列化工具,與快取策略(TTL、LRU)屬不同層次。
|
||||||
|
|
||||||
|
### Decision 2: `BatchQueryEngine` 作為工具類而非基底類別
|
||||||
|
|
||||||
|
**選擇**:提供獨立函式(`decompose_by_time_range`、`decompose_by_ids`、`execute_plan`、`merge_chunks`),各服務按需調用。
|
||||||
|
|
||||||
|
**替代方案**:(A) 抽象基底類別 `BaseDatasetCache` → 三個 dataset cache 差異大(SQL、policy filter、衍生計算),強制繼承會過度耦合。
|
||||||
|
|
||||||
|
**理由**:工具類模式讓服務保持現有結構,僅在主查詢路徑決定是否啟用分解。閾值以下的查詢完全不經過引擎。
|
||||||
|
|
||||||
|
### Decision 3: 預設循序、可選並行、semaphore 感知
|
||||||
|
|
||||||
|
**選擇**:`execute_plan(parallel=1)` 預設循序。實際並行上限 = `min(requested, semaphore_available - 1)`。
|
||||||
|
|
||||||
|
**替代方案**:(A) 預設並行 → 可能耗盡 semaphore;(B) 完全不並行 → 失去速度。
|
||||||
|
|
||||||
|
**理由**:Oracle 連線稀缺(Production 預設 `DB_SLOW_MAX_CONCURRENT=5`,Development 常見為 3)。reject_dataset_cache 查詢最重可設 parallel=2,其他預設循序最安全。
|
||||||
|
|
||||||
|
### Decision 4: 記憶體守衛 + 結果筆數限制
|
||||||
|
|
||||||
|
**選擇**:每個 chunk 查詢後檢查 `df.memory_usage(deep=True).sum()`,超過 `BATCH_CHUNK_MAX_MEMORY_MB`(預設 256MB)時中止該 chunk 並標記失敗。同時提供 `max_rows_per_chunk` 參數,在 SQL 中加入 `FETCH FIRST N ROWS ONLY`。
|
||||||
|
|
||||||
|
**替代方案**:(A) 無限制 → 現狀,OOM 風險高;(B) 全域限制 → 不夠靈活。
|
||||||
|
|
||||||
|
**理由**:chunk 級別的記憶體守衛是最後一道防線。分解後每個 chunk 的日期/ID 範圍已大幅縮小,記憶體超限通常代表異常資料,應中止而非繼續。
|
||||||
|
|
||||||
|
### Decision 5: 分塊快取 + 部分命中
|
||||||
|
|
||||||
|
**選擇**:Redis 鍵 `batch:{prefix}:{hash}:chunk:{idx}`,每個 chunk 獨立 SETEX。
|
||||||
|
|
||||||
|
**替代方案**:(A) 只快取最終結果 → 無法部分命中。
|
||||||
|
|
||||||
|
**理由**:使用者常見操作是「先查 1-6 月,再查 1-8 月」。分塊快取讓前 6 個月直接複用,只查 7-8 月。
|
||||||
|
|
||||||
|
### Decision 6: 引擎路徑統一使用 slow-query 路徑(且不佔用主 pool)
|
||||||
|
|
||||||
|
**選擇**:所有經過引擎的查詢統一使用 slow-query 路徑(300s timeout, semaphore 控制);未經引擎的既有短查詢路徑保持原狀。
|
||||||
|
慢查詢執行策略採兩層:
|
||||||
|
1. 主路徑:使用既有獨立 `SLOW POOL`(小容量)做 checkout/checkin。
|
||||||
|
2. fallback:當 SLOW POOL 不可用時,降級為 slow direct connection。
|
||||||
|
|
||||||
|
**替代方案**:
|
||||||
|
(A) 引擎路徑混用 `read_sql_df`(主 pool, 55s timeout)→ 長查詢高超時風險且會壓縮一般 API 吞吐。
|
||||||
|
(B) 慢查詢直接共用主 pool → 高峰時造成 pool 爭用與整體延遲放大。
|
||||||
|
|
||||||
|
**理由**:經過引擎的查詢本身就是「已知可能很慢」的查詢。慢查詢與主 pool 隔離可避免互相影響;SLOW POOL 讓連線重用與隔離同時成立,fallback direct connection 保障可用性。
|
||||||
|
|
||||||
|
### Decision 7: 部分失敗處理
|
||||||
|
|
||||||
|
**選擇**:某個 chunk 失敗時記錄錯誤、繼續剩餘 chunk。`merge_chunks()` 回傳成功部分,metadata 標記 `has_partial_failure=True`。
|
||||||
|
|
||||||
|
**替代方案**:(A) 全部回滾 → 已成功的 chunk 浪費。
|
||||||
|
|
||||||
|
**理由**:歷史報表場景下,部分結果比完全失敗更有價值。metadata 標記讓服務可決定是否警告使用者。
|
||||||
|
|
||||||
|
### Decision 8: Chunk Cache 與服務 L1/L2 Dataset Cache 互動
|
||||||
|
|
||||||
|
**選擇**:先讀 chunk cache(Redis)組裝結果;組裝後回填既有 service dataset cache(L1 process + L2 Redis)以維持現有 `/view` 路徑與 `query_id` 行為。
|
||||||
|
|
||||||
|
**替代方案**:(A) 只使用 chunk cache,不回填 service cache → 現有 view/query_id 流程失效或重複查詢。
|
||||||
|
|
||||||
|
**理由**:需要兼容既有 two-phase dataset API(primary query + cached view),chunk cache 是引擎層優化,不應破壞服務層介面。
|
||||||
|
|
||||||
|
### Decision 9: query_hash 規格
|
||||||
|
|
||||||
|
**選擇**:query_hash 使用 canonical JSON(sorted keys、穩定 list 順序、字串正規化)後 SHA-256 前 16 碼;hash 僅包含會影響原始資料集合的參數(不含純前端呈現參數)。
|
||||||
|
|
||||||
|
**替代方案**:(A) 每服務自由實作 hash → 跨服務不可預測且難除錯。
|
||||||
|
|
||||||
|
**理由**:chunk key、progress key、merge key 需可重現,否則無法保證 cache 命中與部分重用。
|
||||||
|
|
||||||
|
### Decision 10: 時間分解邊界語意
|
||||||
|
|
||||||
|
**選擇**:採閉區間 chunk `[chunk_start, chunk_end]`;下一段從 `chunk_end + 1 day` 開始;最後一段可小於 grain_days;輸入日期以服務既有時區/日界線為準,不在引擎層重新解釋時區。
|
||||||
|
|
||||||
|
**替代方案**:(A) 半開區間或依月份動態切割但不定義邊界 → 容易重疊或漏資料。
|
||||||
|
|
||||||
|
**理由**:邊界語意固定後,merge 去重、統計一致性與測試可驗證性都會提升。
|
||||||
|
|
||||||
|
### Decision 11: 大結果採 Parquet 落地,Redis 僅保留 metadata/熱快取
|
||||||
|
|
||||||
|
**選擇**:對長查詢(尤其 reject-history)引入 spill-to-disk:
|
||||||
|
1. chunk 查詢與 chunk cache 保持現行(Redis,短 TTL)
|
||||||
|
2. merge 後若結果超過門檻(rows / memory / serialized size),寫入 Parquet 至本機 spool 目錄
|
||||||
|
3. Redis 僅保存 metadata(query_id, file_path, row_count, schema_hash, created_at, expires_at)
|
||||||
|
4. `/view`/`/export` 優先透過 metadata 讀取 parquet;metadata 不存在時回退現行 cache 行為
|
||||||
|
5. 背景清理器定期移除過期 parquet 與孤兒 metadata
|
||||||
|
|
||||||
|
**替代方案**:
|
||||||
|
(A) Redis 全量承載所有結果(現況)→ 記憶體壓力高,易引發 lock timeout/OOM 連鎖
|
||||||
|
(B) 直接落 DB(例如 SQLite)→ 寫入鎖衝突與維運複雜度高(目前已有 `database is locked` 觀察)
|
||||||
|
|
||||||
|
**理由**:Redis 是記憶體快取,不適合長時間承載大結果;Parquet 落地可把大結果轉移到磁碟,降低 worker/Redis 記憶體峰值。
|
||||||
|
|
||||||
|
## Risks / Trade-offs
|
||||||
|
|
||||||
|
**[Redis 記憶體增長]** → 分塊快取增加 key 數量(365 天 ≈ 12 個 chunk key)。
|
||||||
|
→ 緩解:TTL 自動過期(900s);chunk 結果經 parquet 壓縮(通常 10:1 壓縮比)。
|
||||||
|
|
||||||
|
**[Semaphore 爭用]** → 並行 chunk 消耗更多 permit。
|
||||||
|
→ 緩解:感知可用數量,不足時自動降級循序。預設 parallel=1。
|
||||||
|
|
||||||
|
**[時間分解後的資料一致性]** → 不同月份 chunk 在不同時間點查詢。
|
||||||
|
→ 緩解:歷史報表資料更新頻率低(日級),短窗口內變動極低。可接受。
|
||||||
|
|
||||||
|
**[遷移風險]** → 先修改 3 個 dataset cache,再擴展至其他服務,整體範圍仍大。
|
||||||
|
→ 緩解:閾值控制(短查詢不經過引擎)+ P0/P1/P2/P3 分階段導入 + 每階段獨立驗證。
|
||||||
|
|
||||||
|
**[磁碟 I/O 與容量壓力]** → Parquet 落地會增加磁碟讀寫,若清理策略失效可能累積大量檔案。
|
||||||
|
→ 緩解:設定 spool 容量上限、TTL 清理、啟動時 orphan 掃描、超限時回退到「不落地僅回應摘要」保護模式。
|
||||||
|
|
||||||
|
**[Stale metadata / orphan file]** → Redis metadata 與實體檔案可能不一致。
|
||||||
|
→ 緩解:讀取前校驗檔案存在與 schema hash;不一致時自動失效 metadata 並記錄告警。
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
1. `mid_section_defect_service` 的 4 階段管線(偵測 → 族譜 → 上游歷史 → 歸因)中,哪些階段適合接入引擎?偵測查詢可日期分解,但族譜/上游已透過 EventFetcher 處理。
|
||||||
|
2. `query_tool_service` 有 15+ 種查詢類型,是否全部接入還是只處理最易超時的(split_merge_history、equipment_period)?
|
||||||
@@ -0,0 +1,83 @@
|
|||||||
|
## Why
|
||||||
|
|
||||||
|
目前各歷史報表服務(reject-history、hold-history、resource-history)、查詢工具(query-tool)、中段不良分析(mid-section-defect)和 Job 查詢(job-query)各自實作不同的批次查詢、快取和並行執行模式,缺乏統一編排與保護。主要問題:
|
||||||
|
|
||||||
|
1. **Oracle 超時**:長日期範圍(365+ 天)或大量 Container ID(工單展開後可達數千筆)的查詢可能超過 300 秒 call_timeout
|
||||||
|
2. **OOM 風險**:reject/hold dataset cache 以 `limit: 999999999` 取回全部資料,無記憶體上限守衛
|
||||||
|
3. **保護分散**:`EventFetcher` 已有 ID 分批 + 快取,但 reject/hold/resource dataset cache 仍各自維護查詢與快取策略
|
||||||
|
4. **重複程式碼**:3 個 dataset cache 各自複製相同的 parquet-in-Redis 序列化邏輯
|
||||||
|
5. **ID 展開膨脹**:工單 resolve 後 container ID 可能大量擴張,缺乏跨服務一致的分批/合併流程
|
||||||
|
6. **重查成本高**:延長查詢範圍(例如 1-6 月改 1-8 月)無法有效重用已查區段結果
|
||||||
|
7. **query-tool 超時風險高**:多數查詢仍走 `read_sql_df`(主 pool / 55s timeout),大查詢下容易超時
|
||||||
|
|
||||||
|
需要一個**可穩定複用的查詢引擎模組**,任何服務接入後自動獲得分解、快取、記憶體保護和超時保護。
|
||||||
|
|
||||||
|
## What Changes
|
||||||
|
|
||||||
|
- 新增 `BatchQueryEngine` 共用模組,提供:
|
||||||
|
- **時間範圍分解**:長日期 → ~31 天月份區間,每段獨立查詢
|
||||||
|
- **時間分解語意**:明確定義 chunk 邊界(閉區間)、跨月切割與最後一段不足月行為
|
||||||
|
- **ID 批次分解**:大量 ID(工單/Lot/GD Lot/流水批展開後)→ 1000 筆一批
|
||||||
|
- **query_hash 規格**:統一 canonicalization 與雜湊欄位,確保 chunk/cache key 穩定
|
||||||
|
- **記憶體守衛**:每個 chunk 結果檢查 `DataFrame.memory_usage()`,超過閾值時中止並警告
|
||||||
|
- **結果筆數限制**:可配置的最大結果筆數,超過時截斷並標記
|
||||||
|
- **受控並行執行**:預設循序、可選並行,嚴格遵守 slow query semaphore
|
||||||
|
- **Redis 分塊快取**:每個 chunk 獨立快取,支援部分命中(延長查詢範圍時複用已查過的區間)
|
||||||
|
- **快取層互動**:明確定義 chunk cache 與服務既有 L1/L2 dataset cache 的讀寫順序
|
||||||
|
- **進度追蹤**:Redis HSET 記錄進度,可供前端顯示
|
||||||
|
- 新增「**大結果落地層(Parquet spill)**」設計:
|
||||||
|
- 當長查詢結果超過記憶體/列數門檻時,將合併後結果以 Parquet 寫入本機持久目錄(例如 `tmp/query_spool/`)
|
||||||
|
- Redis 僅保存 metadata(query_id → parquet path / schema / rows / created_at / ttl)
|
||||||
|
- `/view` 與 `/export` 讀取流程優先走 Redis metadata + Parquet,避免整包 DataFrame 常駐 worker RAM
|
||||||
|
- 定時清理(TTL + 背景清理器)刪除過期 parquet,避免磁碟持續膨脹
|
||||||
|
- 新增 `redis_df_store` 共用模組,將 parquet-in-Redis 存取邏輯從 3 個 dataset cache 提取為共用工具
|
||||||
|
- 所有**引擎接管的 chunk 查詢**統一使用 slow 路徑(300 秒級 timeout)
|
||||||
|
- 使用既有「**獨立 SLOW POOL(小容量)**」做慢查詢連線重用
|
||||||
|
- 明確**不使用主查詢 pool** 承載慢查詢,避免拖垮一般 API
|
||||||
|
- 當 SLOW POOL 不可用時,降級為 slow direct connection(不影響主 pool)
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
### New Capabilities
|
||||||
|
- `batch-query-engine`: 統一批次查詢引擎模組,涵蓋分解策略(時間/ID)、記憶體守衛、結果限制、受控執行、Redis 分塊快取、進度追蹤、結果合併
|
||||||
|
|
||||||
|
### Modified Capabilities
|
||||||
|
- `reject-history-api`: 主查詢改為透過引擎執行;date_range 模式自動時間分解,container 模式(工單/Lot/GD Lot 展開後)自動 ID 分批
|
||||||
|
- `hold-dataset-cache`: 主查詢改為透過引擎執行,長日期自動分解
|
||||||
|
- `resource-dataset-cache`: 主查詢改為透過引擎執行,長日期自動分解
|
||||||
|
- `event-fetcher-unified`: 保持既有最佳化(batch + streaming + cache),僅在需要統一監控/進度模型時再評估導入
|
||||||
|
|
||||||
|
## Impact
|
||||||
|
|
||||||
|
- **後端**:新增 2 個共用模組(`batch_query_engine.py`、`redis_df_store.py`),優先修改 3 個 dataset cache 主查詢路徑(reject/hold/resource)
|
||||||
|
- **受影響服務**(優先順序):
|
||||||
|
- P0:reject-history(最容易超時/OOM — 長日期 + 工單展開 + 目前 `limit=999999999`)
|
||||||
|
- P1:hold-history、resource-history(相同架構,直接套用)
|
||||||
|
- P2:mid-section-defect(4 階段管線,偵測查詢 + 上游歷史)、job-query(缺快取 + 日期分解)
|
||||||
|
- P3:query-tool(優先處理 `read_sql_df` 高風險路徑並導入慢查詢保護)、event-fetcher(保持可選)
|
||||||
|
- **資料庫**:不改 SQL,僅縮小每次查詢的 bind parameter 範圍
|
||||||
|
- **資料庫連線策略**:慢查詢與一般 pooled query 隔離,避免資源互相干擾
|
||||||
|
- **Redis**:新增 `batch:*` 前綴的分塊快取鍵
|
||||||
|
- **儲存層**:新增 Parquet 結果落地目錄與清理機制(Redis 轉為索引/metadata,不再承載全部大結果)
|
||||||
|
- **記憶體**:引擎強制單 chunk 記憶體上限(預設 256MB),超過時中止
|
||||||
|
- **可用性**:Redis 設定 `maxmemory` + eviction 後仍可透過 Parquet metadata 回復查詢結果(cache 不命中不等於資料遺失)
|
||||||
|
- **向下相容**:短查詢(< 60 天、< 1000 ID)走現有路徑,零額外開銷;既有 route/event 快取策略保持不變
|
||||||
|
- **前端**:可選性變更,長查詢可顯示進度條(非必要)
|
||||||
|
|
||||||
|
## Parquet 落地的預期效果與副作用
|
||||||
|
|
||||||
|
**預期效果:**
|
||||||
|
- 大幅降低 worker 在「merge + cache 回填」階段的峰值記憶體(避免單 worker 突增到 GB 級)
|
||||||
|
- Redis 記憶體由「存整包資料」轉為「存索引/熱資料」,降低 OOM 與 lock timeout 連鎖風險
|
||||||
|
- 服務重啟後,若 parquet 尚未過期,仍可恢復查詢結果(搭配 metadata)
|
||||||
|
|
||||||
|
**可能副作用(Side Effects):**
|
||||||
|
- 磁碟 I/O 增加:查詢高峰時會有 parquet 寫入/讀取尖峰
|
||||||
|
- 磁碟容量風險:清理策略失效時,spool 目錄可能持續膨脹
|
||||||
|
- 資料一致性風險:metadata 指向檔案若被外部刪除/損壞,會出現 stale pointer
|
||||||
|
- 安全與治理:落地檔案需納入權限控管、備份/清理與稽核策略
|
||||||
|
|
||||||
|
**緩解方向:**
|
||||||
|
- 強制 TTL + 定期掃描清理(以 metadata 與檔案 mtime 雙重判斷)
|
||||||
|
- 啟動時做 orphan/stale 檢查與自動修復(刪 metadata 或刪孤兒檔)
|
||||||
|
- 先以 reject-history 長查詢為 P0,逐步擴展到其他服務
|
||||||
@@ -0,0 +1,166 @@
|
|||||||
|
## ADDED Requirements
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL provide time-range decomposition
|
||||||
|
The module SHALL decompose long date ranges into manageable monthly chunks to prevent Oracle timeout.
|
||||||
|
|
||||||
|
#### Scenario: Decompose date range into monthly chunks
|
||||||
|
- **WHEN** `decompose_by_time_range(start_date, end_date, grain_days=31)` is called
|
||||||
|
- **THEN** the date range SHALL be split into chunks of at most `grain_days` days each
|
||||||
|
- **THEN** each chunk SHALL contain `chunk_start` and `chunk_end` date strings
|
||||||
|
- **THEN** chunks SHALL be contiguous and non-overlapping, covering the full range
|
||||||
|
|
||||||
|
#### Scenario: Short date range returns single chunk
|
||||||
|
- **WHEN** the date range is shorter than or equal to `grain_days`
|
||||||
|
- **THEN** a single chunk covering the full range SHALL be returned
|
||||||
|
|
||||||
|
#### Scenario: Time-chunk boundary semantics are deterministic
|
||||||
|
- **WHEN** a date range is decomposed into multiple chunks
|
||||||
|
- **THEN** each chunk SHALL use a closed interval `[chunk_start, chunk_end]`
|
||||||
|
- **THEN** the next chunk SHALL start at `previous_chunk_end + 1 day`
|
||||||
|
- **THEN** the final chunk MAY contain fewer than `grain_days` days
|
||||||
|
- **THEN** chunk ranges SHALL have no overlap and no gap
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL provide ID-batch decomposition
|
||||||
|
The module SHALL decompose large ID lists (from workorder/lot/GD lot/serial resolve expansion) into batches respecting Oracle IN-clause limits.
|
||||||
|
|
||||||
|
#### Scenario: Decompose ID list into batches
|
||||||
|
- **WHEN** `decompose_by_ids(ids, batch_size=1000)` is called with more than `batch_size` IDs
|
||||||
|
- **THEN** the ID list SHALL be split into batches of at most `batch_size` items each
|
||||||
|
|
||||||
|
#### Scenario: Small ID list returns single batch
|
||||||
|
- **WHEN** the ID list has fewer than or equal to `batch_size` items
|
||||||
|
- **THEN** a single batch containing all IDs SHALL be returned
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL execute chunk plans with controlled parallelism
|
||||||
|
The module SHALL execute query chunks sequentially by default, with opt-in parallel execution respecting the slow query semaphore.
|
||||||
|
|
||||||
|
#### Scenario: Sequential execution (default)
|
||||||
|
- **WHEN** `execute_plan(chunks, query_fn, parallel=1)` is called
|
||||||
|
- **THEN** chunks SHALL be executed one at a time in order
|
||||||
|
- **THEN** each chunk result SHALL be stored to Redis immediately after completion
|
||||||
|
- **THEN** the function SHALL return a `query_hash` identifying the batch result
|
||||||
|
|
||||||
|
#### Scenario: Parallel execution with semaphore awareness
|
||||||
|
- **WHEN** `execute_plan(chunks, query_fn, parallel=2)` is called
|
||||||
|
- **THEN** up to `parallel` chunks SHALL execute concurrently via ThreadPoolExecutor
|
||||||
|
- **THEN** each thread SHALL acquire the slow query semaphore before executing `query_fn`
|
||||||
|
- **THEN** actual concurrency SHALL be capped at `min(parallel, available_semaphore_permits - 1)`
|
||||||
|
- **THEN** if semaphore is fully occupied, execution SHALL degrade to sequential
|
||||||
|
|
||||||
|
#### Scenario: All engine queries use dedicated connection
|
||||||
|
- **WHEN** a chunk's `query_fn` executes an Oracle query
|
||||||
|
- **THEN** it SHALL use `read_sql_df_slow` (dedicated connection, 300s timeout, semaphore-controlled)
|
||||||
|
- **THEN** pooled connection (`read_sql_df`) SHALL NOT be used for engine-managed queries
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL enforce memory guards per chunk
|
||||||
|
The module SHALL check each chunk result's memory usage and abort if it exceeds a configurable threshold.
|
||||||
|
|
||||||
|
#### Scenario: Chunk memory within limit
|
||||||
|
- **WHEN** a chunk query returns a DataFrame within `BATCH_CHUNK_MAX_MEMORY_MB` (default 256MB, env-configurable)
|
||||||
|
- **THEN** the chunk SHALL be stored to Redis and marked as completed
|
||||||
|
|
||||||
|
#### Scenario: Chunk memory exceeds limit
|
||||||
|
- **WHEN** a chunk query returns a DataFrame exceeding `BATCH_CHUNK_MAX_MEMORY_MB`
|
||||||
|
- **THEN** the chunk SHALL be discarded (NOT stored to Redis)
|
||||||
|
- **THEN** the chunk SHALL be marked as failed in metadata with reason `memory_limit_exceeded`
|
||||||
|
- **THEN** a warning log SHALL include chunk index, actual memory MB, and threshold
|
||||||
|
- **THEN** remaining chunks SHALL continue execution
|
||||||
|
|
||||||
|
#### Scenario: Result row count limit
|
||||||
|
- **WHEN** `max_rows_per_chunk` is configured
|
||||||
|
- **THEN** the engine SHALL pass this limit to `query_fn` for SQL-level truncation (e.g., `FETCH FIRST N ROWS ONLY`)
|
||||||
|
- **THEN** if the result contains exactly `max_rows_per_chunk` rows, metadata SHALL include `truncated=True`
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL support partial cache hits
|
||||||
|
The module SHALL check Redis for previously cached chunks and skip re-execution for cached chunks.
|
||||||
|
|
||||||
|
#### Scenario: Partial cache hit skips cached chunks
|
||||||
|
- **WHEN** `execute_plan(chunks, query_fn, skip_cached=True)` is called
|
||||||
|
- **THEN** for each chunk, Redis SHALL be checked for an existing cached result
|
||||||
|
- **THEN** chunks with valid cached results SHALL NOT be re-executed
|
||||||
|
- **THEN** only uncached chunks SHALL be passed to `query_fn`
|
||||||
|
|
||||||
|
#### Scenario: Full cache hit skips all execution
|
||||||
|
- **WHEN** all chunks already exist in Redis cache
|
||||||
|
- **THEN** no Oracle queries SHALL be executed
|
||||||
|
- **THEN** `merge_chunks()` SHALL return the combined cached DataFrames
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL generate deterministic query_hash
|
||||||
|
The module SHALL use a stable hash for cache/progress keys so semantically identical queries map to the same batch identity.
|
||||||
|
|
||||||
|
#### Scenario: Stable hash for equivalent parameters
|
||||||
|
- **WHEN** two requests contain the same semantic query parameters in different input order
|
||||||
|
- **THEN** canonicalization SHALL normalize ordering before hashing
|
||||||
|
- **THEN** `query_hash` SHALL be identical for both requests
|
||||||
|
|
||||||
|
#### Scenario: Hash changes only when dataset-affecting parameters change
|
||||||
|
- **WHEN** parameters affecting the raw dataset (date range, mode, resolved IDs, core filters) change
|
||||||
|
- **THEN** `query_hash` SHALL change
|
||||||
|
- **THEN** presentation-only parameters SHALL NOT change `query_hash`
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL define chunk-cache to service-cache handoff
|
||||||
|
The module SHALL integrate chunk-level cache with existing service-level dataset caches without breaking query_id-based view APIs.
|
||||||
|
|
||||||
|
#### Scenario: Chunk merge backfills service dataset cache
|
||||||
|
- **WHEN** chunk results are loaded/merged into a complete dataset for a primary query
|
||||||
|
- **THEN** the merged DataFrame SHALL be written back to the service's existing dataset cache layers (L1 process + L2 Redis)
|
||||||
|
- **THEN** downstream `/view` queries using the service `query_id` SHALL continue to work without additional Oracle queries
|
||||||
|
|
||||||
|
#### Scenario: Service cache miss with chunk cache hit
|
||||||
|
- **WHEN** a service-level dataset cache entry has expired but relevant chunk cache keys still exist
|
||||||
|
- **THEN** the engine SHALL rebuild the merged dataset from chunk cache
|
||||||
|
- **THEN** the service dataset cache SHALL be repopulated before returning response
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL store chunk results in Redis
|
||||||
|
The module SHALL store each chunk as a separate Redis key using parquet-in-Redis format.
|
||||||
|
|
||||||
|
#### Scenario: Chunk storage key format
|
||||||
|
- **WHEN** a chunk result is stored
|
||||||
|
- **THEN** the Redis key SHALL follow the pattern `batch:{cache_prefix}:{query_hash}:chunk:{idx}`
|
||||||
|
- **THEN** each chunk SHALL be stored as a parquet-encoded base64 string via `redis_df_store`
|
||||||
|
- **THEN** each chunk key SHALL have a TTL matching the service's cache TTL (default 900 seconds)
|
||||||
|
|
||||||
|
#### Scenario: Chunk metadata tracking
|
||||||
|
- **WHEN** chunks are being executed
|
||||||
|
- **THEN** a metadata key `batch:{cache_prefix}:{query_hash}:meta` SHALL be updated via Redis HSET
|
||||||
|
- **THEN** metadata SHALL include `total`, `completed`, `failed`, `pct`, `status`, and `has_partial_failure` fields
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL merge chunk results into a single DataFrame
|
||||||
|
The module SHALL provide result assembly from cached chunks.
|
||||||
|
|
||||||
|
#### Scenario: Merge all chunks
|
||||||
|
- **WHEN** `merge_chunks(query_hash)` is called
|
||||||
|
- **THEN** all chunk DataFrames SHALL be loaded from Redis and concatenated via `pd.concat`
|
||||||
|
- **THEN** if any chunk is missing, the merge SHALL proceed with available chunks and set `has_partial_failure=True`
|
||||||
|
|
||||||
|
#### Scenario: Iterate chunks for streaming
|
||||||
|
- **WHEN** `iterate_chunks(query_hash)` is called
|
||||||
|
- **THEN** chunk DataFrames SHALL be yielded one at a time without loading all into memory simultaneously
|
||||||
|
|
||||||
|
### Requirement: BatchQueryEngine SHALL handle chunk failures gracefully
|
||||||
|
The module SHALL continue execution when individual chunks fail and report partial results.
|
||||||
|
|
||||||
|
#### Scenario: Single chunk failure
|
||||||
|
- **WHEN** a chunk's `query_fn` raises an exception (timeout, ORA error, etc.)
|
||||||
|
- **THEN** the error SHALL be logged with chunk index and exception details
|
||||||
|
- **THEN** the failed chunk SHALL be marked as failed in metadata
|
||||||
|
- **THEN** remaining chunks SHALL continue execution
|
||||||
|
|
||||||
|
#### Scenario: All chunks fail
|
||||||
|
- **WHEN** all chunks' `query_fn` calls raise exceptions
|
||||||
|
- **THEN** metadata status SHALL be set to `failed`
|
||||||
|
- **THEN** `merge_chunks()` SHALL return an empty DataFrame
|
||||||
|
|
||||||
|
### Requirement: Shared redis_df_store module SHALL provide parquet-in-Redis utilities
|
||||||
|
The module SHALL provide reusable DataFrame serialization to/from Redis using parquet + base64 encoding.
|
||||||
|
|
||||||
|
#### Scenario: Store DataFrame to Redis
|
||||||
|
- **WHEN** `redis_store_df(key, df, ttl)` is called
|
||||||
|
- **THEN** the DataFrame SHALL be serialized to parquet format using pyarrow
|
||||||
|
- **THEN** the parquet bytes SHALL be base64-encoded and stored via Redis SETEX with the given TTL
|
||||||
|
- **THEN** if Redis is unavailable, the function SHALL log a warning and return without error
|
||||||
|
|
||||||
|
#### Scenario: Load DataFrame from Redis
|
||||||
|
- **WHEN** `redis_load_df(key)` is called
|
||||||
|
- **THEN** the base64 string SHALL be loaded from Redis, decoded, and deserialized to a DataFrame
|
||||||
|
- **THEN** if the key does not exist or Redis is unavailable, the function SHALL return None
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
## MODIFIED Requirements
|
||||||
|
|
||||||
|
### Requirement: EventFetcher SHALL provide unified cached event querying across domains
|
||||||
|
`EventFetcher` SHALL encapsulate batch event queries with L1/L2 layered cache and rate limit bucket configuration, supporting domains: `history`, `materials`, `rejects`, `holds`, `jobs`, `upstream_history`, `downstream_rejects`. EventFetcher MAY optionally delegate ID batching to `BatchQueryEngine` for consistent decomposition patterns.
|
||||||
|
|
||||||
|
#### Scenario: Cache miss for event domain query
|
||||||
|
- **WHEN** `EventFetcher` is called for a domain with container IDs and no cache exists
|
||||||
|
- **THEN** the domain query SHALL execute against Oracle via `read_sql_df_slow()` (non-pooled dedicated connection)
|
||||||
|
- **THEN** each batch query SHALL use `timeout_seconds=60`
|
||||||
|
- **THEN** the result SHALL be stored in L2 Redis cache with key format `evt:{domain}:{sorted_cids_hash}` if CID count is within cache threshold
|
||||||
|
- **THEN** L1 memory cache SHALL also be populated if CID count is within cache threshold
|
||||||
|
|
||||||
|
#### Scenario: Cache hit for event domain query
|
||||||
|
- **WHEN** `EventFetcher` is called for a domain and L2 Redis cache contains a valid entry
|
||||||
|
- **THEN** the cached result SHALL be returned without executing Oracle query
|
||||||
|
- **THEN** DB connection pool SHALL NOT be consumed
|
||||||
|
|
||||||
|
#### Scenario: Rate limit bucket per domain
|
||||||
|
- **WHEN** `EventFetcher` is used from a route handler
|
||||||
|
- **THEN** each domain SHALL have a configurable rate limit bucket aligned with `configured_rate_limit()` pattern
|
||||||
|
- **THEN** rate limit configuration SHALL be overridable via environment variables
|
||||||
|
|
||||||
|
#### Scenario: Large CID set exceeds cache threshold
|
||||||
|
- **WHEN** the normalized CID count exceeds `CACHE_SKIP_CID_THRESHOLD` (default 10000, env: `EVENT_FETCHER_CACHE_SKIP_CID_THRESHOLD`)
|
||||||
|
- **THEN** EventFetcher SHALL skip both L1 and L2 cache writes
|
||||||
|
- **THEN** a warning log SHALL be emitted with domain name, CID count, and threshold value
|
||||||
|
- **THEN** the query result SHALL still be returned to the caller
|
||||||
|
|
||||||
|
#### Scenario: Batch concurrency default
|
||||||
|
- **WHEN** EventFetcher processes batches for a domain with >1000 CIDs
|
||||||
|
- **THEN** the default `EVENT_FETCHER_MAX_WORKERS` SHALL be 2 (env: `EVENT_FETCHER_MAX_WORKERS`)
|
||||||
|
|
||||||
|
#### Scenario: Optional BatchQueryEngine integration
|
||||||
|
- **WHEN** EventFetcher is refactored to use `BatchQueryEngine` (optional, not required)
|
||||||
|
- **THEN** `decompose_by_ids()` MAY replace inline batching logic
|
||||||
|
- **THEN** existing ThreadPoolExecutor + read_sql_df_slow_iter patterns SHALL be preserved as the primary implementation
|
||||||
|
- **THEN** no behavioral changes SHALL be introduced by engine integration
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
## MODIFIED Requirements
|
||||||
|
|
||||||
|
### Requirement: Hold dataset cache SHALL execute a single Oracle query and cache the result
|
||||||
|
The hold_dataset_cache module SHALL query Oracle once for the full hold/release fact set and cache it for subsequent derivations. For date ranges exceeding 60 days, the query SHALL be decomposed into monthly chunks via `BatchQueryOrchestrator`.
|
||||||
|
|
||||||
|
#### Scenario: Primary query execution and caching
|
||||||
|
- **WHEN** `execute_primary_query()` is called with date range and hold_type parameters
|
||||||
|
- **THEN** a deterministic `query_id` SHALL be computed from the primary params (start_date, end_date) using SHA256
|
||||||
|
- **THEN** if a cached DataFrame exists for this query_id (L1 or L2), it SHALL be used without querying Oracle
|
||||||
|
- **THEN** if no cache exists, a single Oracle query SHALL fetch all hold/release records from `DW_MES_HOLDRELEASEHISTORY` for the date range (all hold_types)
|
||||||
|
- **THEN** the result DataFrame SHALL be stored in both L1 (ProcessLevelCache) and L2 (Redis as parquet/base64)
|
||||||
|
- **THEN** the response SHALL include `query_id`, trend, reason_pareto, duration, and list page 1
|
||||||
|
|
||||||
|
#### Scenario: Long date range triggers batch decomposition
|
||||||
|
- **WHEN** the date range exceeds 60 days (configurable via `BATCH_QUERY_TIME_THRESHOLD_DAYS`)
|
||||||
|
- **THEN** the query SHALL be decomposed into ~31-day monthly chunks via `BatchQueryOrchestrator.decompose_by_time_range()`
|
||||||
|
- **THEN** each chunk SHALL execute independently via `read_sql_df_slow` with the chunk's date sub-range
|
||||||
|
- **THEN** chunk results SHALL be stored individually in Redis and merged via `pd.concat`
|
||||||
|
- **THEN** the merged DataFrame SHALL be stored in the existing L1+L2 cache under the original query_id
|
||||||
|
|
||||||
|
#### Scenario: Short date range uses direct query
|
||||||
|
- **WHEN** the date range is 60 days or fewer
|
||||||
|
- **THEN** the existing single-query path SHALL be used without batch decomposition
|
||||||
|
|
||||||
|
#### Scenario: Cache TTL and eviction
|
||||||
|
- **WHEN** a DataFrame is cached
|
||||||
|
- **THEN** the cache TTL SHALL be 900 seconds (15 minutes)
|
||||||
|
- **THEN** L1 cache max_size SHALL be 8 entries with LRU eviction
|
||||||
|
- **THEN** the Redis namespace SHALL be `hold_dataset`
|
||||||
|
|
||||||
|
#### Scenario: Redis parquet helpers use shared module
|
||||||
|
- **WHEN** DataFrames are stored or loaded from Redis
|
||||||
|
- **THEN** the module SHALL use `redis_df_store.redis_store_df()` and `redis_df_store.redis_load_df()` from the shared `core/redis_df_store.py` module
|
||||||
|
- **THEN** inline `_redis_store_df` / `_redis_load_df` functions SHALL be removed
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
## MODIFIED Requirements
|
||||||
|
|
||||||
|
### Requirement: Job query SHALL use BatchQueryEngine for long-range decomposition
|
||||||
|
|
||||||
|
The `get_jobs_by_resources()` function SHALL delegate to BatchQueryEngine when the requested date range exceeds the configurable threshold, preventing Oracle timeout on large job queries.
|
||||||
|
|
||||||
|
#### Scenario: Long date range triggers engine decomposition
|
||||||
|
- **WHEN** `get_jobs_by_resources(resource_ids, start_date, end_date)` is called
|
||||||
|
- **AND** the date range exceeds `BATCH_QUERY_TIME_THRESHOLD_DAYS` (default 60)
|
||||||
|
- **THEN** the date range SHALL be decomposed via `decompose_by_time_range()`
|
||||||
|
- **THEN** each chunk SHALL be executed through the existing job SQL with chunk-scoped dates
|
||||||
|
- **THEN** the existing `_build_resource_filter()` batching SHALL be preserved within each chunk
|
||||||
|
|
||||||
|
#### Scenario: Short date range preserves direct path
|
||||||
|
- **WHEN** the date range is within the threshold
|
||||||
|
- **THEN** the existing direct query path SHALL be used with zero overhead
|
||||||
|
|
||||||
|
### Requirement: Job query results SHALL be cached in Redis
|
||||||
|
|
||||||
|
Job query results SHALL be cached using the shared `redis_df_store` module to avoid redundant Oracle queries on repeated requests.
|
||||||
|
|
||||||
|
#### Scenario: Cache hit returns stored result
|
||||||
|
- **WHEN** a job query is executed with identical parameters within the cache TTL
|
||||||
|
- **THEN** the cached result SHALL be returned without hitting Oracle
|
||||||
|
|
||||||
|
#### Scenario: Cache miss triggers fresh query
|
||||||
|
- **WHEN** no cached result exists for the query parameters
|
||||||
|
- **THEN** the query SHALL execute against Oracle
|
||||||
|
- **THEN** the result SHALL be stored in Redis with the configured TTL
|
||||||
|
|
||||||
|
### Requirement: Job queries SHALL use read_sql_df_slow execution path
|
||||||
|
- **WHEN** engine-managed job queries execute
|
||||||
|
- **THEN** they SHALL use `read_sql_df_slow` (dedicated connection, 300s timeout)
|
||||||
|
- **THEN** no pooled-query regressions SHALL be introduced
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
## MODIFIED Requirements
|
||||||
|
|
||||||
|
### Requirement: Detection query SHALL use BatchQueryEngine for long-range decomposition
|
||||||
|
|
||||||
|
The `_fetch_station_detection_data()` function SHALL delegate to BatchQueryEngine when the requested date range exceeds the configurable threshold, preventing Oracle timeout on large detection queries.
|
||||||
|
|
||||||
|
#### Scenario: Long date range triggers engine decomposition
|
||||||
|
- **WHEN** `_fetch_station_detection_data(start_date, end_date, station)` is called
|
||||||
|
- **AND** the date range exceeds `BATCH_QUERY_TIME_THRESHOLD_DAYS` (default 60)
|
||||||
|
- **THEN** the date range SHALL be decomposed via `decompose_by_time_range()`
|
||||||
|
- **THEN** each chunk SHALL be executed through the existing detection SQL with chunk-scoped dates
|
||||||
|
- **THEN** chunk results SHALL be cached in Redis and merged into a single DataFrame
|
||||||
|
|
||||||
|
#### Scenario: Short date range preserves direct path
|
||||||
|
- **WHEN** the date range is within the threshold
|
||||||
|
- **THEN** the existing direct query path SHALL be used with zero overhead
|
||||||
|
|
||||||
|
#### Scenario: Memory guard protects against oversized detection results
|
||||||
|
- **WHEN** a single chunk result exceeds `BATCH_CHUNK_MAX_MEMORY_MB`
|
||||||
|
- **THEN** that chunk SHALL be discarded and marked as failed
|
||||||
|
- **THEN** remaining chunks SHALL continue executing
|
||||||
|
- **THEN** the batch metadata SHALL reflect `has_partial_failure`
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
## MODIFIED Requirements
|
||||||
|
|
||||||
|
### Requirement: High-risk query_tool paths SHALL migrate to slow-query execution
|
||||||
|
|
||||||
|
Functions currently using `read_sql_df` (fast pool, 55s timeout) that handle unbounded or user-driven queries SHALL be migrated to `read_sql_df_slow` (dedicated connection, 300s timeout) to prevent timeout failures.
|
||||||
|
|
||||||
|
#### Scenario: Serial number resolution uses slow-query path
|
||||||
|
- **WHEN** `_resolve_by_serial_number()` executes resolver SQL queries
|
||||||
|
- **THEN** queries SHALL use `read_sql_df_slow` instead of `read_sql_df`
|
||||||
|
|
||||||
|
#### Scenario: Work order resolution uses slow-query path
|
||||||
|
- **WHEN** `_resolve_by_work_order()` executes resolver SQL queries
|
||||||
|
- **THEN** queries SHALL use `read_sql_df_slow` instead of `read_sql_df`
|
||||||
|
|
||||||
|
#### Scenario: Equipment query functions use slow-query path
|
||||||
|
- **WHEN** `get_equipment_status_hours()`, `get_equipment_lots()`, `get_equipment_materials()`, `get_equipment_rejects()`, or `get_equipment_jobs()` execute equipment SQL queries
|
||||||
|
- **THEN** queries SHALL use `read_sql_df_slow` instead of `read_sql_df`
|
||||||
|
|
||||||
|
### Requirement: High-risk query_tool paths SHALL use engine decomposition for large inputs
|
||||||
|
|
||||||
|
Selected query functions SHALL delegate to BatchQueryEngine for ID decomposition when the resolved input set is large.
|
||||||
|
|
||||||
|
#### Scenario: Large serial number batch triggers engine decomposition
|
||||||
|
- **WHEN** `_resolve_by_serial_number()` is called with more IDs than `BATCH_QUERY_ID_THRESHOLD`
|
||||||
|
- **THEN** IDs SHALL be decomposed via `decompose_by_ids()`
|
||||||
|
- **THEN** each batch SHALL be executed through the existing resolver SQL
|
||||||
|
|
||||||
|
#### Scenario: Equipment period queries use engine time decomposition
|
||||||
|
- **WHEN** equipment period queries span more than `BATCH_QUERY_TIME_THRESHOLD_DAYS`
|
||||||
|
- **THEN** the date range SHALL be decomposed via `decompose_by_time_range()`
|
||||||
|
|
||||||
|
### Requirement: Existing resolve cache strategy SHALL be reviewed for heavy query patterns
|
||||||
|
|
||||||
|
#### Scenario: Route-level short-TTL cache extended for high-repeat patterns
|
||||||
|
- **WHEN** a query pattern is identified as high-repeat (same parameters within minutes)
|
||||||
|
- **THEN** result caching SHALL be considered using `redis_df_store`
|
||||||
|
- **THEN** cache TTL SHALL align with the service's data freshness requirements
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
## MODIFIED Requirements
|
||||||
|
|
||||||
|
### Requirement: Database query execution path
|
||||||
|
The reject-history service (`reject_history_service.py` and `reject_dataset_cache.py`) SHALL use `read_sql_df_slow` (dedicated connection) instead of `read_sql_df` (pooled connection) for all Oracle queries. For large queries, `BatchQueryEngine` SHALL decompose by time range or ID count.
|
||||||
|
|
||||||
|
#### Scenario: Primary query uses dedicated connection
|
||||||
|
- **WHEN** the reject-history primary query is executed
|
||||||
|
- **THEN** it uses `read_sql_df_slow` which creates a dedicated Oracle connection outside the pool
|
||||||
|
- **AND** the connection has a 300-second call_timeout (configurable)
|
||||||
|
- **AND** the connection is subject to the global slow query semaphore
|
||||||
|
|
||||||
|
#### Scenario: Long date range triggers time decomposition (date_range mode)
|
||||||
|
- **WHEN** the primary query is in `date_range` mode and the range exceeds 60 days (configurable via `BATCH_QUERY_TIME_THRESHOLD_DAYS`)
|
||||||
|
- **THEN** the query SHALL be decomposed into ~31-day monthly chunks via `BatchQueryEngine.decompose_by_time_range()`
|
||||||
|
- **THEN** each chunk SHALL execute independently with the chunk's date sub-range as bind parameters
|
||||||
|
- **THEN** chunk results SHALL be stored individually in Redis and merged via `pd.concat`
|
||||||
|
|
||||||
|
#### Scenario: Large container ID set triggers ID decomposition (container mode)
|
||||||
|
- **WHEN** the primary query is in `container` mode (workorder/lot/wafer_lot input) and the resolved container ID count exceeds 1000
|
||||||
|
- **THEN** the container IDs SHALL be decomposed into 1000-item batches via `BatchQueryEngine.decompose_by_ids()`
|
||||||
|
- **THEN** each batch SHALL execute independently
|
||||||
|
- **THEN** batch results SHALL be merged into the final cached DataFrame
|
||||||
|
|
||||||
|
#### Scenario: Short date range or small ID set uses direct query
|
||||||
|
- **WHEN** the date range is 60 days or fewer, or resolved container IDs are 1000 or fewer
|
||||||
|
- **THEN** the existing single-query path SHALL be used without engine decomposition
|
||||||
|
|
||||||
|
#### Scenario: Memory guard on result
|
||||||
|
- **WHEN** a chunk query result exceeds `BATCH_CHUNK_MAX_MEMORY_MB`
|
||||||
|
- **THEN** the chunk SHALL be discarded and marked as failed
|
||||||
|
- **THEN** the current `limit: 999999999` pattern SHALL be replaced with a configurable `max_rows_per_chunk`
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
## MODIFIED Requirements
|
||||||
|
|
||||||
|
### Requirement: Resource dataset cache SHALL execute a single Oracle query and cache the result
|
||||||
|
The resource_dataset_cache module SHALL query Oracle once for the full shift-status fact set and cache it for subsequent derivations. For date ranges exceeding 60 days, the query SHALL be decomposed into monthly chunks via `BatchQueryOrchestrator`.
|
||||||
|
|
||||||
|
#### Scenario: Primary query execution and caching
|
||||||
|
- **WHEN** `execute_primary_query()` is called with date range, granularity, and resource filter parameters
|
||||||
|
- **THEN** a deterministic `query_id` SHALL be computed from all primary params using SHA256
|
||||||
|
- **THEN** if a cached DataFrame exists for this query_id (L1 or L2), it SHALL be used without querying Oracle
|
||||||
|
- **THEN** if no cache exists, a single Oracle query SHALL fetch all shift-status records from `DW_MES_RESOURCESTATUS_SHIFT` for the filtered resources and date range
|
||||||
|
- **THEN** the result DataFrame SHALL be stored in both L1 (ProcessLevelCache) and L2 (Redis as parquet/base64)
|
||||||
|
- **THEN** the response SHALL include `query_id`, summary (KPI, trend, heatmap, comparison), and detail page 1
|
||||||
|
|
||||||
|
#### Scenario: Long date range triggers batch decomposition
|
||||||
|
- **WHEN** the date range exceeds 60 days (configurable via `BATCH_QUERY_TIME_THRESHOLD_DAYS`)
|
||||||
|
- **THEN** the query SHALL be decomposed into ~31-day monthly chunks via `BatchQueryOrchestrator.decompose_by_time_range()`
|
||||||
|
- **THEN** each chunk SHALL execute independently via `read_sql_df_slow` with the chunk's date sub-range
|
||||||
|
- **THEN** chunk results SHALL be stored individually in Redis and merged via `pd.concat`
|
||||||
|
- **THEN** the merged DataFrame SHALL be stored in the existing L1+L2 cache under the original query_id
|
||||||
|
|
||||||
|
#### Scenario: Short date range uses direct query
|
||||||
|
- **WHEN** the date range is 60 days or fewer
|
||||||
|
- **THEN** the existing single-query path SHALL be used without batch decomposition
|
||||||
|
|
||||||
|
#### Scenario: Cache TTL and eviction
|
||||||
|
- **WHEN** a DataFrame is cached
|
||||||
|
- **THEN** the cache TTL SHALL be 900 seconds (15 minutes)
|
||||||
|
- **THEN** L1 cache max_size SHALL be 8 entries with LRU eviction
|
||||||
|
- **THEN** the Redis namespace SHALL be `resource_dataset`
|
||||||
|
|
||||||
|
#### Scenario: Redis parquet helpers use shared module
|
||||||
|
- **WHEN** DataFrames are stored or loaded from Redis
|
||||||
|
- **THEN** the module SHALL use `redis_df_store.redis_store_df()` and `redis_df_store.redis_load_df()` from the shared `core/redis_df_store.py` module
|
||||||
|
- **THEN** inline `_redis_store_df` / `_redis_load_df` functions SHALL be removed
|
||||||
@@ -0,0 +1,122 @@
|
|||||||
|
## 0. Artifact Alignment (P2/P3 Specs)
|
||||||
|
|
||||||
|
- [x] 0.1 Add delta spec for `mid-section-defect` in this change (scope: long-range detection query decomposition only)
|
||||||
|
- [x] 0.2 Add delta spec for `job-query` in this change (scope: long-range query decomposition + result cache)
|
||||||
|
- [x] 0.3 Add delta spec for `query-tool` in this change (scope: high-risk endpoints and timeout-protection strategy)
|
||||||
|
|
||||||
|
## 1. Shared Infrastructure — redis_df_store
|
||||||
|
|
||||||
|
- [x] 1.1 Create `src/mes_dashboard/core/redis_df_store.py` with `redis_store_df(key, df, ttl)` and `redis_load_df(key)` extracted from reject_dataset_cache.py (lines 82-111)
|
||||||
|
- [x] 1.2 Add chunk-level helpers: `redis_store_chunk(prefix, query_hash, idx, df, ttl)`, `redis_load_chunk(prefix, query_hash, idx)`, `redis_chunk_exists(prefix, query_hash, idx)`
|
||||||
|
|
||||||
|
## 2. Shared Infrastructure — BatchQueryEngine
|
||||||
|
|
||||||
|
- [x] 2.1 Create `src/mes_dashboard/services/batch_query_engine.py` with `decompose_by_time_range(start_date, end_date, grain_days=31)` returning list of chunk dicts
|
||||||
|
- [x] 2.2 Add `decompose_by_ids(ids, batch_size=1000)` for container ID batching (workorder/lot/GD lot/serial 展開後)
|
||||||
|
- [x] 2.3 Implement `execute_plan(chunks, query_fn, parallel=1, query_hash=None, skip_cached=True, cache_prefix='', chunk_ttl=900)` with sequential execution path
|
||||||
|
- [x] 2.4 Add parallel execution path using ThreadPoolExecutor with semaphore-aware concurrency cap: `min(parallel, available_permits - 1)`
|
||||||
|
- [x] 2.5 Add memory guard: after each chunk query, check `df.memory_usage(deep=True).sum()` vs `BATCH_CHUNK_MAX_MEMORY_MB` (default 256MB, env-configurable); discard and mark failed if exceeded
|
||||||
|
- [x] 2.6 Add result row count limit: `max_rows_per_chunk` parameter passed to query_fn for SQL-level `FETCH FIRST N ROWS ONLY`
|
||||||
|
- [x] 2.7 Implement `merge_chunks(cache_prefix, query_hash)` and `iterate_chunks(cache_prefix, query_hash)` for result assembly
|
||||||
|
- [x] 2.8 Add progress tracking via Redis HSET (`batch:{prefix}:{hash}:meta`) with total/completed/failed/pct/status/has_partial_failure fields
|
||||||
|
- [x] 2.9 Add chunk failure handling: log error, mark failed in metadata, continue remaining chunks
|
||||||
|
- [x] 2.10 Enforce all engine queries use `read_sql_df_slow` (dedicated connection, 300s timeout)
|
||||||
|
- [x] 2.11 Implement deterministic `query_hash` helper (canonical JSON + SHA-256[:16]) and reuse across chunk/progress/cache keys
|
||||||
|
- [x] 2.12 Define and implement time chunk boundary semantics (`[start,end]`, next=`end+1day`, final short chunk allowed)
|
||||||
|
- [x] 2.13 Define cache interaction contract: chunk cache merge result must backfill existing service dataset cache (`query_id`)
|
||||||
|
|
||||||
|
## 3. Unit Tests — redis_df_store
|
||||||
|
|
||||||
|
- [x] 3.1 Test `redis_store_df` / `redis_load_df` round-trip
|
||||||
|
- [x] 3.2 Test chunk helpers round-trip
|
||||||
|
- [x] 3.3 Test Redis unavailable graceful fallback (returns None, no exception)
|
||||||
|
|
||||||
|
## 4. Unit Tests — BatchQueryEngine
|
||||||
|
|
||||||
|
- [x] 4.1 Test `decompose_by_time_range` (90 days → 3 chunks, 31 days → 1 chunk, edge cases)
|
||||||
|
- [x] 4.2 Test `decompose_by_ids` (2500 IDs → 3 batches, 500 IDs → 1 batch)
|
||||||
|
- [x] 4.3 Test `execute_plan` sequential: mock query_fn, verify chunks stored in Redis
|
||||||
|
- [x] 4.4 Test `execute_plan` parallel: verify ThreadPoolExecutor used, semaphore respected
|
||||||
|
- [x] 4.5 Test partial cache hit: pre-populate 2/5 chunks, verify only 3 executed
|
||||||
|
- [x] 4.6 Test memory guard: mock query_fn returning oversized DataFrame, verify chunk discarded
|
||||||
|
- [x] 4.7 Test result row count limit: verify max_rows_per_chunk passed to query_fn
|
||||||
|
- [x] 4.8 Test `merge_chunks`: verify pd.concat produces correct merged DataFrame
|
||||||
|
- [x] 4.9 Test progress tracking: verify Redis HSET updated after each chunk
|
||||||
|
- [x] 4.10 Test chunk failure resilience: one chunk fails, others complete, metadata reflects partial
|
||||||
|
|
||||||
|
## 5. P0: Adopt in reject_dataset_cache
|
||||||
|
|
||||||
|
- [x] 5.1 Replace inline `_redis_store_df` / `_redis_load_df` with imports from `core.redis_df_store`
|
||||||
|
- [x] 5.2 Add `_run_reject_chunk(chunk_params) -> DataFrame` that binds chunk's start_date/end_date to existing SQL
|
||||||
|
- [x] 5.3 Wrap `execute_primary_query()` date_range mode: use engine when date range > 60 days
|
||||||
|
- [x] 5.4 Wrap `execute_primary_query()` container mode: use engine when resolved container IDs > 1000 (workorder/lot/GD lot 展開後)
|
||||||
|
- [x] 5.5 Replace `limit: 999999999` with configurable `max_rows_per_chunk`
|
||||||
|
- [x] 5.6 Keep existing direct path for short ranges / small ID sets (no overhead)
|
||||||
|
- [x] 5.7 Merge chunk results and store in existing L1+L2 cache under original query_id
|
||||||
|
- [x] 5.8 Add env var `BATCH_QUERY_TIME_THRESHOLD_DAYS` (default 60)
|
||||||
|
- [x] 5.9 Test: 365-day date range → verify chunks decomposed, no Oracle timeout
|
||||||
|
- [x] 5.10 Test: large workorder (500+ containers) → verify ID batching works
|
||||||
|
|
||||||
|
## 6. P1: Adopt in hold_dataset_cache
|
||||||
|
|
||||||
|
- [x] 6.1 Replace inline `_redis_store_df` / `_redis_load_df` with imports from `core.redis_df_store`
|
||||||
|
- [x] 6.2 Wrap `execute_primary_query()`: use engine when date range > 60 days
|
||||||
|
- [x] 6.3 Keep existing direct path for short date ranges
|
||||||
|
- [x] 6.4 Test hold-history with long date range
|
||||||
|
|
||||||
|
## 7. P1: Adopt in resource_dataset_cache
|
||||||
|
|
||||||
|
- [x] 7.1 Replace inline `_redis_store_df` / `_redis_load_df` with imports from `core.redis_df_store`
|
||||||
|
- [x] 7.2 Wrap `execute_primary_query()`: use engine when date range > 60 days
|
||||||
|
- [x] 7.3 Keep existing direct path for short date ranges
|
||||||
|
- [x] 7.4 Test resource-history with long date range
|
||||||
|
|
||||||
|
## 8. P2: Adopt in mid_section_defect_service
|
||||||
|
|
||||||
|
- [x] 8.1 Evaluate which stages benefit: detection query (date-range decomposable) vs genealogy/upstream (already via EventFetcher)
|
||||||
|
- [x] 8.2 Wrap `_fetch_station_detection_data()`: use engine time decomposition when date range > 60 days
|
||||||
|
- [x] 8.3 Add memory guard on detection result DataFrame
|
||||||
|
- [x] 8.4 Test: large date range + high-volume station → verify no timeout
|
||||||
|
|
||||||
|
## 9. P2: Adopt in job_query_service
|
||||||
|
|
||||||
|
- [x] 9.1 Wrap `get_jobs_by_resources()`: use engine time decomposition when date range > 60 days
|
||||||
|
- [x] 9.2 Keep `read_sql_df_slow` as the execution path for engine-managed job queries; avoid introducing pooled-query regressions
|
||||||
|
- [x] 9.3 Add Redis caching for job query results (currently has none)
|
||||||
|
- [x] 9.4 Test: full-year query with many resources → verify no timeout
|
||||||
|
|
||||||
|
## 10. P3: Adopt in query_tool_service
|
||||||
|
|
||||||
|
- [x] 10.1 Evaluate which query types benefit most: split_merge_history (has explicit timeout handling), equipment-period APIs, large resolver flows
|
||||||
|
- [x] 10.2 Identify and migrate high-risk `read_sql_df` paths to engine-managed slow-query path (or explicit `read_sql_df_slow`) to avoid 55s timeout failures
|
||||||
|
- [x] 10.3 Wrap selected high-risk query functions with engine ID/time decomposition
|
||||||
|
- [x] 10.4 Review and extend existing resolve cache strategy (currently short TTL route cache) for heavy/high-repeat query patterns
|
||||||
|
- [x] 10.5 Test: large work order expansion → verify batching and timeout resilience
|
||||||
|
|
||||||
|
## 11. P3: event_fetcher (optional)
|
||||||
|
|
||||||
|
- [x] 11.1 Evaluate if replacing inline ThreadPoolExecutor with engine adds value (already optimized)
|
||||||
|
- [x] 11.2 If adopted: delegate ID batching to `decompose_by_ids()` + `execute_plan()` — NOT ADOPTED: EventFetcher already uses optimal streaming (read_sql_df_slow_iter) + ID batching (1000) + ThreadPoolExecutor(2). Engine adoption would regress streaming to full materialization.
|
||||||
|
- [x] 11.3 Preserve existing `read_sql_df_slow_iter` streaming pattern — PRESERVED: no changes to event_fetcher
|
||||||
|
|
||||||
|
## 12. Integration Verification
|
||||||
|
|
||||||
|
- [x] 12.1 Run full test suite: `pytest tests/test_batch_query_engine.py tests/test_redis_df_store.py tests/test_reject_dataset_cache.py`
|
||||||
|
- [x] 12.2 Manual test: reject-history 365-day query → no timeout, chunks visible in Redis — AUTOMATED: test_365_day_range_triggers_engine verifies decomposition; manual validation deferred to deployment
|
||||||
|
- [x] 12.3 Manual test: reject-history large workorder (container mode) → no timeout — AUTOMATED: test_large_container_set_triggers_engine verifies ID batching; manual validation deferred to deployment
|
||||||
|
- [x] 12.4 Verify Redis keys: `redis-cli keys "batch:*"` → correct prefix and TTL — AUTOMATED: chunk key format `batch:{prefix}:{hash}:chunk:{idx}` verified in unit tests
|
||||||
|
- [x] 12.5 Monitor slow query semaphore during parallel execution — AUTOMATED: _effective_parallelism tested; runtime monitoring deferred to deployment
|
||||||
|
- [x] 12.6 Verify query_hash stability: same semantic params produce same hash, reordered inputs do not create cache misses
|
||||||
|
- [x] 12.7 Verify time-chunk boundary correctness: no overlap/no gap across full date range
|
||||||
|
|
||||||
|
## 13. P0 Hardening — Parquet Spill for Large Result Sets
|
||||||
|
|
||||||
|
- [x] 13.1 Define spill thresholds: `REJECT_ENGINE_MAX_TOTAL_ROWS`, `REJECT_ENGINE_MAX_RESULT_MB`, and enable flag
|
||||||
|
- [x] 13.2 Add `query_spool_store.py` (write/read parquet, metadata schema, path safety checks)
|
||||||
|
- [x] 13.3 Implement reject-history spill path: merge result exceeds threshold → write parquet + store metadata pointer in Redis
|
||||||
|
- [x] 13.4 Update `/view` and `/export` read path to support `query_id -> metadata -> parquet` fallback
|
||||||
|
- [x] 13.5 Add startup/periodic cleanup job: remove expired parquet files and orphan metadata
|
||||||
|
- [x] 13.6 Add guardrails for disk usage (spool size cap + warning logs + fail-safe behavior)
|
||||||
|
- [x] 13.7 Unit tests: spill write/read, metadata mismatch, missing file fallback, cleanup correctness
|
||||||
|
- [x] 13.8 Integration test: long-range reject query triggers spill and serves view/export without worker RSS spike
|
||||||
|
- [x] 13.9 Stress test: concurrent long-range queries verify no OOM and bounded Redis memory
|
||||||
@@ -23,6 +23,15 @@ PORT=$(echo "$DEFAULT_PORT" | cut -d: -f2)
|
|||||||
|
|
||||||
# Redis configuration
|
# Redis configuration
|
||||||
REDIS_ENABLED="${REDIS_ENABLED:-true}"
|
REDIS_ENABLED="${REDIS_ENABLED:-true}"
|
||||||
|
REDIS_KEY_PREFIX="${REDIS_KEY_PREFIX:-mes_wip}"
|
||||||
|
REDIS_MAXMEMORY="${REDIS_MAXMEMORY:-512mb}"
|
||||||
|
REDIS_MAXMEMORY_POLICY="${REDIS_MAXMEMORY_POLICY:-allkeys-lru}"
|
||||||
|
REDIS_PERSISTENCE_ENABLED="${REDIS_PERSISTENCE_ENABLED:-true}"
|
||||||
|
REDIS_APPENDONLY="${REDIS_APPENDONLY:-yes}"
|
||||||
|
REDIS_APPENDFSYNC="${REDIS_APPENDFSYNC:-everysec}"
|
||||||
|
REDIS_SAVE="${REDIS_SAVE:-900 1 300 10 60 10000}"
|
||||||
|
REDIS_TTL_CLEANUP_ON_START="${REDIS_TTL_CLEANUP_ON_START:-true}"
|
||||||
|
REDIS_TTL_CLEANUP_PATTERNS="${REDIS_TTL_CLEANUP_PATTERNS:-batch:*,reject_dataset:*,hold_dataset:*,resource_dataset:*,job_query:*}"
|
||||||
# Worker watchdog configuration
|
# Worker watchdog configuration
|
||||||
WATCHDOG_ENABLED="${WATCHDOG_ENABLED:-true}"
|
WATCHDOG_ENABLED="${WATCHDOG_ENABLED:-true}"
|
||||||
# RQ trace worker configuration
|
# RQ trace worker configuration
|
||||||
@@ -337,6 +346,101 @@ check_redis() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
apply_redis_runtime_config() {
|
||||||
|
if [ "$REDIS_ENABLED" != "true" ]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ! command -v redis-cli &> /dev/null; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ! redis-cli ping &>/dev/null; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local configured=0
|
||||||
|
|
||||||
|
if [ -n "${REDIS_MAXMEMORY:-}" ] && [ "${REDIS_MAXMEMORY}" != "0" ]; then
|
||||||
|
if redis-cli CONFIG SET maxmemory "${REDIS_MAXMEMORY}" >/dev/null 2>&1; then
|
||||||
|
configured=$((configured + 1))
|
||||||
|
else
|
||||||
|
log_warn "Failed to set Redis maxmemory=${REDIS_MAXMEMORY}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -n "${REDIS_MAXMEMORY_POLICY:-}" ]; then
|
||||||
|
if redis-cli CONFIG SET maxmemory-policy "${REDIS_MAXMEMORY_POLICY}" >/dev/null 2>&1; then
|
||||||
|
configured=$((configured + 1))
|
||||||
|
else
|
||||||
|
log_warn "Failed to set Redis maxmemory-policy=${REDIS_MAXMEMORY_POLICY}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if is_enabled "${REDIS_PERSISTENCE_ENABLED:-true}"; then
|
||||||
|
if redis-cli CONFIG SET appendonly "${REDIS_APPENDONLY}" >/dev/null 2>&1; then
|
||||||
|
configured=$((configured + 1))
|
||||||
|
else
|
||||||
|
log_warn "Failed to set Redis appendonly=${REDIS_APPENDONLY}"
|
||||||
|
fi
|
||||||
|
if redis-cli CONFIG SET appendfsync "${REDIS_APPENDFSYNC}" >/dev/null 2>&1; then
|
||||||
|
configured=$((configured + 1))
|
||||||
|
else
|
||||||
|
log_warn "Failed to set Redis appendfsync=${REDIS_APPENDFSYNC}"
|
||||||
|
fi
|
||||||
|
if [ -n "${REDIS_SAVE:-}" ]; then
|
||||||
|
if redis-cli CONFIG SET save "${REDIS_SAVE}" >/dev/null 2>&1; then
|
||||||
|
configured=$((configured + 1))
|
||||||
|
else
|
||||||
|
log_warn "Failed to set Redis save='${REDIS_SAVE}'"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if redis-cli CONFIG REWRITE >/dev/null 2>&1; then
|
||||||
|
configured=$((configured + 1))
|
||||||
|
else
|
||||||
|
log_warn "Redis CONFIG REWRITE failed (runtime config is active but may not persist restart)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$configured" -gt 0 ]; then
|
||||||
|
log_info "Redis runtime config applied (maxmemory=${REDIS_MAXMEMORY}, policy=${REDIS_MAXMEMORY_POLICY}, appendonly=${REDIS_APPENDONLY})"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup_redis_keys_without_ttl() {
|
||||||
|
if [ "$REDIS_ENABLED" != "true" ]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ! command -v redis-cli &> /dev/null; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ! redis-cli ping &>/dev/null; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if ! is_enabled "${REDIS_TTL_CLEANUP_ON_START:-true}"; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
local deleted=0
|
||||||
|
local raw_pattern
|
||||||
|
for raw_pattern in ${REDIS_TTL_CLEANUP_PATTERNS//,/ }; do
|
||||||
|
local full_pattern="${REDIS_KEY_PREFIX}:${raw_pattern}"
|
||||||
|
while IFS= read -r key; do
|
||||||
|
[ -z "${key}" ] && continue
|
||||||
|
local pttl
|
||||||
|
pttl=$(redis-cli PTTL "${key}" 2>/dev/null || echo "-2")
|
||||||
|
if [[ "${pttl}" =~ ^-?[0-9]+$ ]] && [ "${pttl}" -lt 0 ]; then
|
||||||
|
if redis-cli DEL "${key}" >/dev/null 2>&1; then
|
||||||
|
deleted=$((deleted + 1))
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done < <(redis-cli --scan --pattern "${full_pattern}" 2>/dev/null)
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$deleted" -gt 0 ]; then
|
||||||
|
log_info "Redis TTL cleanup removed ${deleted} stale keys without expiry"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
start_redis() {
|
start_redis() {
|
||||||
if [ "$REDIS_ENABLED" != "true" ]; then
|
if [ "$REDIS_ENABLED" != "true" ]; then
|
||||||
return 0
|
return 0
|
||||||
@@ -349,6 +453,8 @@ start_redis() {
|
|||||||
# Check if Redis is already running
|
# Check if Redis is already running
|
||||||
if redis-cli ping &>/dev/null; then
|
if redis-cli ping &>/dev/null; then
|
||||||
log_success "Redis is already running"
|
log_success "Redis is already running"
|
||||||
|
apply_redis_runtime_config
|
||||||
|
cleanup_redis_keys_without_ttl
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -359,6 +465,8 @@ start_redis() {
|
|||||||
sleep 1
|
sleep 1
|
||||||
if redis-cli ping &>/dev/null; then
|
if redis-cli ping &>/dev/null; then
|
||||||
log_success "Redis service started"
|
log_success "Redis service started"
|
||||||
|
apply_redis_runtime_config
|
||||||
|
cleanup_redis_keys_without_ttl
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -48,6 +48,10 @@ from mes_dashboard.services.scrap_reason_exclusion_cache import (
|
|||||||
init_scrap_reason_exclusion_cache,
|
init_scrap_reason_exclusion_cache,
|
||||||
stop_scrap_reason_exclusion_cache_worker,
|
stop_scrap_reason_exclusion_cache_worker,
|
||||||
)
|
)
|
||||||
|
from mes_dashboard.core.query_spool_store import (
|
||||||
|
init_query_spool_cleanup,
|
||||||
|
stop_query_spool_cleanup_worker,
|
||||||
|
)
|
||||||
from mes_dashboard.core.modernization_policy import (
|
from mes_dashboard.core.modernization_policy import (
|
||||||
get_deferred_routes as get_deferred_routes_from_scope_matrix,
|
get_deferred_routes as get_deferred_routes_from_scope_matrix,
|
||||||
get_missing_in_scope_assets,
|
get_missing_in_scope_assets,
|
||||||
@@ -335,6 +339,11 @@ def _shutdown_runtime_resources() -> None:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("Error stopping scrap exclusion cache worker: %s", exc)
|
logger.warning("Error stopping scrap exclusion cache worker: %s", exc)
|
||||||
|
|
||||||
|
try:
|
||||||
|
stop_query_spool_cleanup_worker()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Error stopping query spool cleanup worker: %s", exc)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from mes_dashboard.core.metrics_history import stop_metrics_history
|
from mes_dashboard.core.metrics_history import stop_metrics_history
|
||||||
stop_metrics_history()
|
stop_metrics_history()
|
||||||
@@ -440,6 +449,7 @@ def create_app(config_name: str | None = None) -> Flask:
|
|||||||
start_cache_updater() # Start Redis cache updater
|
start_cache_updater() # Start Redis cache updater
|
||||||
init_realtime_equipment_cache(app) # Start realtime equipment status cache
|
init_realtime_equipment_cache(app) # Start realtime equipment status cache
|
||||||
init_scrap_reason_exclusion_cache(app) # Start exclusion-policy cache sync
|
init_scrap_reason_exclusion_cache(app) # Start exclusion-policy cache sync
|
||||||
|
init_query_spool_cleanup(app) # Start parquet spool cleanup worker
|
||||||
from mes_dashboard.core.metrics_history import start_metrics_history
|
from mes_dashboard.core.metrics_history import start_metrics_history
|
||||||
start_metrics_history(app) # Start metrics history collector
|
start_metrics_history(app) # Start metrics history collector
|
||||||
_register_shutdown_hooks(app)
|
_register_shutdown_hooks(app)
|
||||||
|
|||||||
@@ -51,9 +51,14 @@ class Config:
|
|||||||
DB_CONNECT_RETRY_DELAY = _float_env("DB_CONNECT_RETRY_DELAY", 1.0)
|
DB_CONNECT_RETRY_DELAY = _float_env("DB_CONNECT_RETRY_DELAY", 1.0)
|
||||||
DB_CALL_TIMEOUT_MS = _int_env("DB_CALL_TIMEOUT_MS", 55000)
|
DB_CALL_TIMEOUT_MS = _int_env("DB_CALL_TIMEOUT_MS", 55000)
|
||||||
|
|
||||||
# Slow-query dedicated connection settings (non-pooled)
|
# Slow-query settings (isolated from main request pool)
|
||||||
DB_SLOW_CALL_TIMEOUT_MS = _int_env("DB_SLOW_CALL_TIMEOUT_MS", 300000) # 300s
|
DB_SLOW_CALL_TIMEOUT_MS = _int_env("DB_SLOW_CALL_TIMEOUT_MS", 300000) # 300s
|
||||||
DB_SLOW_MAX_CONCURRENT = _int_env("DB_SLOW_MAX_CONCURRENT", 5)
|
DB_SLOW_MAX_CONCURRENT = _int_env("DB_SLOW_MAX_CONCURRENT", 5)
|
||||||
|
DB_SLOW_POOL_ENABLED = _bool_env("DB_SLOW_POOL_ENABLED", True)
|
||||||
|
DB_SLOW_POOL_SIZE = _int_env("DB_SLOW_POOL_SIZE", 2)
|
||||||
|
DB_SLOW_POOL_MAX_OVERFLOW = _int_env("DB_SLOW_POOL_MAX_OVERFLOW", 1)
|
||||||
|
DB_SLOW_POOL_TIMEOUT = _int_env("DB_SLOW_POOL_TIMEOUT", 30)
|
||||||
|
DB_SLOW_POOL_RECYCLE = _int_env("DB_SLOW_POOL_RECYCLE", 1800)
|
||||||
|
|
||||||
# Auth configuration - MUST be set in .env file
|
# Auth configuration - MUST be set in .env file
|
||||||
LDAP_API_URL = os.getenv("LDAP_API_URL", "")
|
LDAP_API_URL = os.getenv("LDAP_API_URL", "")
|
||||||
@@ -100,6 +105,7 @@ class DevelopmentConfig(Config):
|
|||||||
DB_CONNECT_RETRY_DELAY = _float_env("DB_CONNECT_RETRY_DELAY", 1.0)
|
DB_CONNECT_RETRY_DELAY = _float_env("DB_CONNECT_RETRY_DELAY", 1.0)
|
||||||
DB_CALL_TIMEOUT_MS = _int_env("DB_CALL_TIMEOUT_MS", 55000)
|
DB_CALL_TIMEOUT_MS = _int_env("DB_CALL_TIMEOUT_MS", 55000)
|
||||||
DB_SLOW_MAX_CONCURRENT = _int_env("DB_SLOW_MAX_CONCURRENT", 3)
|
DB_SLOW_MAX_CONCURRENT = _int_env("DB_SLOW_MAX_CONCURRENT", 3)
|
||||||
|
DB_SLOW_POOL_ENABLED = _bool_env("DB_SLOW_POOL_ENABLED", True)
|
||||||
|
|
||||||
|
|
||||||
class ProductionConfig(Config):
|
class ProductionConfig(Config):
|
||||||
@@ -117,6 +123,7 @@ class ProductionConfig(Config):
|
|||||||
DB_CONNECT_RETRY_DELAY = _float_env("DB_CONNECT_RETRY_DELAY", 1.0)
|
DB_CONNECT_RETRY_DELAY = _float_env("DB_CONNECT_RETRY_DELAY", 1.0)
|
||||||
DB_CALL_TIMEOUT_MS = _int_env("DB_CALL_TIMEOUT_MS", 55000)
|
DB_CALL_TIMEOUT_MS = _int_env("DB_CALL_TIMEOUT_MS", 55000)
|
||||||
DB_SLOW_MAX_CONCURRENT = _int_env("DB_SLOW_MAX_CONCURRENT", 5)
|
DB_SLOW_MAX_CONCURRENT = _int_env("DB_SLOW_MAX_CONCURRENT", 5)
|
||||||
|
DB_SLOW_POOL_ENABLED = _bool_env("DB_SLOW_POOL_ENABLED", True)
|
||||||
|
|
||||||
|
|
||||||
class TestingConfig(Config):
|
class TestingConfig(Config):
|
||||||
@@ -136,6 +143,7 @@ class TestingConfig(Config):
|
|||||||
DB_CALL_TIMEOUT_MS = 5000
|
DB_CALL_TIMEOUT_MS = 5000
|
||||||
DB_SLOW_CALL_TIMEOUT_MS = 10000
|
DB_SLOW_CALL_TIMEOUT_MS = 10000
|
||||||
DB_SLOW_MAX_CONCURRENT = 1
|
DB_SLOW_MAX_CONCURRENT = 1
|
||||||
|
DB_SLOW_POOL_ENABLED = False
|
||||||
CSRF_ENABLED = False
|
CSRF_ENABLED = False
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -113,6 +113,7 @@ def install_log_redaction_filter(target_logger: logging.Logger | None = None) ->
|
|||||||
|
|
||||||
_ENGINE = None
|
_ENGINE = None
|
||||||
_HEALTH_ENGINE = None
|
_HEALTH_ENGINE = None
|
||||||
|
_SLOW_ENGINE = None
|
||||||
_DB_RUNTIME_CONFIG: Optional[Dict[str, Any]] = None
|
_DB_RUNTIME_CONFIG: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
@@ -166,6 +167,23 @@ def _from_app_or_env_float(name: str, fallback: float) -> float:
|
|||||||
return float(fallback)
|
return float(fallback)
|
||||||
|
|
||||||
|
|
||||||
|
def _from_app_or_env_bool(name: str, fallback: bool) -> bool:
|
||||||
|
try:
|
||||||
|
app_value = current_app.config.get(name)
|
||||||
|
if app_value is not None:
|
||||||
|
if isinstance(app_value, bool):
|
||||||
|
return app_value
|
||||||
|
return str(app_value).strip().lower() in {"1", "true", "yes", "on"}
|
||||||
|
except RuntimeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
env_value = os.getenv(name)
|
||||||
|
if env_value is not None:
|
||||||
|
return env_value.strip().lower() in {"1", "true", "yes", "on"}
|
||||||
|
|
||||||
|
return bool(fallback)
|
||||||
|
|
||||||
|
|
||||||
def get_db_runtime_config(refresh: bool = False) -> Dict[str, Any]:
|
def get_db_runtime_config(refresh: bool = False) -> Dict[str, Any]:
|
||||||
"""Get effective DB runtime configuration used by pool and direct connections."""
|
"""Get effective DB runtime configuration used by pool and direct connections."""
|
||||||
global _DB_RUNTIME_CONFIG
|
global _DB_RUNTIME_CONFIG
|
||||||
@@ -201,6 +219,26 @@ def get_db_runtime_config(refresh: bool = False) -> Dict[str, Any]:
|
|||||||
"DB_SLOW_MAX_CONCURRENT",
|
"DB_SLOW_MAX_CONCURRENT",
|
||||||
config_class.DB_SLOW_MAX_CONCURRENT,
|
config_class.DB_SLOW_MAX_CONCURRENT,
|
||||||
),
|
),
|
||||||
|
"slow_pool_enabled": _from_app_or_env_bool(
|
||||||
|
"DB_SLOW_POOL_ENABLED",
|
||||||
|
getattr(config_class, "DB_SLOW_POOL_ENABLED", True),
|
||||||
|
),
|
||||||
|
"slow_pool_size": _from_app_or_env_int(
|
||||||
|
"DB_SLOW_POOL_SIZE",
|
||||||
|
getattr(config_class, "DB_SLOW_POOL_SIZE", 2),
|
||||||
|
),
|
||||||
|
"slow_pool_max_overflow": _from_app_or_env_int(
|
||||||
|
"DB_SLOW_POOL_MAX_OVERFLOW",
|
||||||
|
getattr(config_class, "DB_SLOW_POOL_MAX_OVERFLOW", 1),
|
||||||
|
),
|
||||||
|
"slow_pool_timeout": _from_app_or_env_int(
|
||||||
|
"DB_SLOW_POOL_TIMEOUT",
|
||||||
|
getattr(config_class, "DB_SLOW_POOL_TIMEOUT", 30),
|
||||||
|
),
|
||||||
|
"slow_pool_recycle": _from_app_or_env_int(
|
||||||
|
"DB_SLOW_POOL_RECYCLE",
|
||||||
|
getattr(config_class, "DB_SLOW_POOL_RECYCLE", config_class.DB_POOL_RECYCLE),
|
||||||
|
),
|
||||||
"slow_fetchmany_size": _from_app_or_env_int(
|
"slow_fetchmany_size": _from_app_or_env_int(
|
||||||
"DB_SLOW_FETCHMANY_SIZE",
|
"DB_SLOW_FETCHMANY_SIZE",
|
||||||
5000,
|
5000,
|
||||||
@@ -234,6 +272,7 @@ def get_pool_status() -> Dict[str, Any]:
|
|||||||
"saturation": saturation,
|
"saturation": saturation,
|
||||||
"slow_query_active": get_slow_query_active_count(),
|
"slow_query_active": get_slow_query_active_count(),
|
||||||
"slow_query_waiting": get_slow_query_waiting_count(),
|
"slow_query_waiting": get_slow_query_waiting_count(),
|
||||||
|
"slow_pool_enabled": bool(runtime.get("slow_pool_enabled", False)),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -313,6 +352,43 @@ def get_health_engine():
|
|||||||
return _HEALTH_ENGINE
|
return _HEALTH_ENGINE
|
||||||
|
|
||||||
|
|
||||||
|
def get_slow_engine():
|
||||||
|
"""Get dedicated SQLAlchemy engine for slow-query workloads.
|
||||||
|
|
||||||
|
Slow-query pool is isolated from request pool to avoid starving normal API
|
||||||
|
traffic. This engine is used only when DB_SLOW_POOL_ENABLED=true.
|
||||||
|
"""
|
||||||
|
global _SLOW_ENGINE
|
||||||
|
if _SLOW_ENGINE is None:
|
||||||
|
runtime = get_db_runtime_config()
|
||||||
|
_SLOW_ENGINE = create_engine(
|
||||||
|
CONNECTION_STRING,
|
||||||
|
poolclass=QueuePool,
|
||||||
|
pool_size=max(int(runtime["slow_pool_size"]), 1),
|
||||||
|
max_overflow=max(int(runtime["slow_pool_max_overflow"]), 0),
|
||||||
|
pool_timeout=max(int(runtime["slow_pool_timeout"]), 1),
|
||||||
|
pool_recycle=max(int(runtime["slow_pool_recycle"]), 1),
|
||||||
|
pool_pre_ping=True,
|
||||||
|
connect_args={
|
||||||
|
"tcp_connect_timeout": runtime["tcp_connect_timeout"],
|
||||||
|
"retry_count": runtime["retry_count"],
|
||||||
|
"retry_delay": runtime["retry_delay"],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
_register_pool_events(
|
||||||
|
_SLOW_ENGINE,
|
||||||
|
int(runtime["slow_call_timeout_ms"]),
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Slow-query engine created (pool_size=%s, max_overflow=%s, pool_timeout=%s, pool_recycle=%s)",
|
||||||
|
runtime["slow_pool_size"],
|
||||||
|
runtime["slow_pool_max_overflow"],
|
||||||
|
runtime["slow_pool_timeout"],
|
||||||
|
runtime["slow_pool_recycle"],
|
||||||
|
)
|
||||||
|
return _SLOW_ENGINE
|
||||||
|
|
||||||
|
|
||||||
def _register_pool_events(engine, call_timeout_ms: int):
|
def _register_pool_events(engine, call_timeout_ms: int):
|
||||||
"""Register event listeners for connection pool monitoring."""
|
"""Register event listeners for connection pool monitoring."""
|
||||||
|
|
||||||
@@ -413,12 +489,16 @@ def dispose_engine():
|
|||||||
|
|
||||||
Call this during application shutdown to cleanly release resources.
|
Call this during application shutdown to cleanly release resources.
|
||||||
"""
|
"""
|
||||||
global _ENGINE, _HEALTH_ENGINE, _DB_RUNTIME_CONFIG, _SLOW_QUERY_SEMAPHORE
|
global _ENGINE, _HEALTH_ENGINE, _SLOW_ENGINE, _DB_RUNTIME_CONFIG, _SLOW_QUERY_SEMAPHORE
|
||||||
stop_keepalive()
|
stop_keepalive()
|
||||||
if _HEALTH_ENGINE is not None:
|
if _HEALTH_ENGINE is not None:
|
||||||
_HEALTH_ENGINE.dispose()
|
_HEALTH_ENGINE.dispose()
|
||||||
logger.info("Health engine disposed")
|
logger.info("Health engine disposed")
|
||||||
_HEALTH_ENGINE = None
|
_HEALTH_ENGINE = None
|
||||||
|
if _SLOW_ENGINE is not None:
|
||||||
|
_SLOW_ENGINE.dispose()
|
||||||
|
logger.info("Slow-query engine disposed")
|
||||||
|
_SLOW_ENGINE = None
|
||||||
if _ENGINE is not None:
|
if _ENGINE is not None:
|
||||||
_ENGINE.dispose()
|
_ENGINE.dispose()
|
||||||
logger.info("Database engine disposed, all connections closed")
|
logger.info("Database engine disposed, all connections closed")
|
||||||
@@ -495,6 +575,44 @@ def get_db_connection():
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_slow_query_connection(
|
||||||
|
runtime: Dict[str, Any],
|
||||||
|
timeout_ms: int,
|
||||||
|
):
|
||||||
|
"""Acquire a DB-API connection for slow queries.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple(connection, pooled)
|
||||||
|
- connection: DB-API connection-like object
|
||||||
|
- pooled: True when sourced from isolated slow pool
|
||||||
|
"""
|
||||||
|
if bool(runtime.get("slow_pool_enabled", False)):
|
||||||
|
engine = get_slow_engine()
|
||||||
|
conn = engine.raw_connection()
|
||||||
|
conn.call_timeout = timeout_ms
|
||||||
|
logger.debug(
|
||||||
|
"Slow-query pooled connection checked out (call_timeout_ms=%s)",
|
||||||
|
timeout_ms,
|
||||||
|
)
|
||||||
|
return conn, True
|
||||||
|
|
||||||
|
conn = oracledb.connect(
|
||||||
|
**DB_CONFIG,
|
||||||
|
tcp_connect_timeout=runtime["tcp_connect_timeout"],
|
||||||
|
retry_count=runtime["retry_count"],
|
||||||
|
retry_delay=runtime["retry_delay"],
|
||||||
|
)
|
||||||
|
conn.call_timeout = timeout_ms
|
||||||
|
with _DIRECT_CONN_LOCK:
|
||||||
|
global _DIRECT_CONN_COUNTER
|
||||||
|
_DIRECT_CONN_COUNTER += 1
|
||||||
|
logger.debug(
|
||||||
|
"Slow-query direct connection established (call_timeout_ms=%s)",
|
||||||
|
timeout_ms,
|
||||||
|
)
|
||||||
|
return conn, False
|
||||||
|
|
||||||
|
|
||||||
def _extract_ora_code(exc: Exception) -> str:
|
def _extract_ora_code(exc: Exception) -> str:
|
||||||
"""Extract ORA error code from exception message."""
|
"""Extract ORA error code from exception message."""
|
||||||
match = re.search(r'ORA-(\d+)', str(exc))
|
match = re.search(r'ORA-(\d+)', str(exc))
|
||||||
@@ -616,11 +734,11 @@ def read_sql_df_slow(
|
|||||||
params: Optional[Dict[str, Any]] = None,
|
params: Optional[Dict[str, Any]] = None,
|
||||||
timeout_seconds: Optional[int] = None,
|
timeout_seconds: Optional[int] = None,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""Execute a slow SQL query with a custom timeout via direct oracledb connection.
|
"""Execute a slow SQL query with a custom timeout.
|
||||||
|
|
||||||
Unlike read_sql_df which uses the pooled engine (55s timeout),
|
Unlike read_sql_df which uses the main request pool (55s timeout),
|
||||||
this creates a dedicated connection with a longer call_timeout
|
this path uses a slow-query channel with longer call_timeout
|
||||||
for known-slow queries (e.g. full table scans on large tables).
|
(isolated slow pool when enabled, otherwise direct connection).
|
||||||
|
|
||||||
Concurrency is limited by a semaphore (DB_SLOW_MAX_CONCURRENT) to
|
Concurrency is limited by a semaphore (DB_SLOW_MAX_CONCURRENT) to
|
||||||
prevent Oracle connection exhaustion.
|
prevent Oracle connection exhaustion.
|
||||||
@@ -663,19 +781,12 @@ def read_sql_df_slow(
|
|||||||
logger.info("Slow query starting (active=%s, timeout_ms=%s)", active, timeout_ms)
|
logger.info("Slow query starting (active=%s, timeout_ms=%s)", active, timeout_ms)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
conn = None
|
conn = None
|
||||||
|
pooled = False
|
||||||
try:
|
try:
|
||||||
conn = oracledb.connect(
|
conn, pooled = _get_slow_query_connection(runtime, timeout_ms)
|
||||||
**DB_CONFIG,
|
|
||||||
tcp_connect_timeout=runtime["tcp_connect_timeout"],
|
|
||||||
retry_count=runtime["retry_count"],
|
|
||||||
retry_delay=runtime["retry_delay"],
|
|
||||||
)
|
|
||||||
conn.call_timeout = timeout_ms
|
|
||||||
with _DIRECT_CONN_LOCK:
|
|
||||||
global _DIRECT_CONN_COUNTER
|
|
||||||
_DIRECT_CONN_COUNTER += 1
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Slow-query connection established (call_timeout_ms=%s)", timeout_ms
|
"Slow-query execution channel=%s",
|
||||||
|
"slow-pool" if pooled else "direct",
|
||||||
)
|
)
|
||||||
|
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
@@ -766,20 +877,13 @@ def read_sql_df_slow_iter(
|
|||||||
logger.info("Slow query iter starting (active=%s, timeout_ms=%s, batch_size=%s)", active, timeout_ms, batch_size)
|
logger.info("Slow query iter starting (active=%s, timeout_ms=%s, batch_size=%s)", active, timeout_ms, batch_size)
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
conn = None
|
conn = None
|
||||||
|
pooled = False
|
||||||
total_rows = 0
|
total_rows = 0
|
||||||
try:
|
try:
|
||||||
conn = oracledb.connect(
|
conn, pooled = _get_slow_query_connection(runtime, timeout_ms)
|
||||||
**DB_CONFIG,
|
|
||||||
tcp_connect_timeout=runtime["tcp_connect_timeout"],
|
|
||||||
retry_count=runtime["retry_count"],
|
|
||||||
retry_delay=runtime["retry_delay"],
|
|
||||||
)
|
|
||||||
conn.call_timeout = timeout_ms
|
|
||||||
with _DIRECT_CONN_LOCK:
|
|
||||||
global _DIRECT_CONN_COUNTER
|
|
||||||
_DIRECT_CONN_COUNTER += 1
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Slow-query iter connection established (call_timeout_ms=%s)", timeout_ms
|
"Slow-query iter execution channel=%s",
|
||||||
|
"slow-pool" if pooled else "direct",
|
||||||
)
|
)
|
||||||
|
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|||||||
483
src/mes_dashboard/core/query_spool_store.py
Normal file
483
src/mes_dashboard/core/query_spool_store.py
Normal file
@@ -0,0 +1,483 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Parquet spool store for large query results.
|
||||||
|
|
||||||
|
Stores oversized DataFrame results on disk and keeps a lightweight Redis
|
||||||
|
metadata pointer so view/export endpoints can reload data without keeping
|
||||||
|
the full payload in Redis memory.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from decimal import Decimal
|
||||||
|
from numbers import Real
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.core.redis_client import (
|
||||||
|
get_key,
|
||||||
|
get_redis_client,
|
||||||
|
release_lock,
|
||||||
|
try_acquire_lock,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger("mes_dashboard.query_spool_store")
|
||||||
|
|
||||||
|
|
||||||
|
def _bool_env(name: str, default: bool) -> bool:
|
||||||
|
value = os.getenv(name)
|
||||||
|
if value is None:
|
||||||
|
return default
|
||||||
|
return value.strip().lower() in {"1", "true", "yes", "on"}
|
||||||
|
|
||||||
|
|
||||||
|
def _int_env(name: str, default: int) -> int:
|
||||||
|
raw = os.getenv(name)
|
||||||
|
if raw is None:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return int(raw)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _float_env(name: str, default: float) -> float:
|
||||||
|
raw = os.getenv(name)
|
||||||
|
if raw is None:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return float(raw)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
QUERY_SPOOL_ENABLED = _bool_env("REJECT_ENGINE_SPILL_ENABLED", True)
|
||||||
|
QUERY_SPOOL_DIR = Path(os.getenv("QUERY_SPOOL_DIR", "tmp/query_spool"))
|
||||||
|
QUERY_SPOOL_TTL_SECONDS = max(_int_env("REJECT_ENGINE_SPOOL_TTL_SECONDS", 21600), 300)
|
||||||
|
QUERY_SPOOL_MAX_BYTES = max(_int_env("REJECT_ENGINE_SPOOL_MAX_BYTES", 2147483648), 1)
|
||||||
|
QUERY_SPOOL_WARN_RATIO = min(max(_float_env("REJECT_ENGINE_SPOOL_WARN_RATIO", 0.85), 0.1), 1.0)
|
||||||
|
QUERY_SPOOL_CLEANUP_INTERVAL_SECONDS = max(
|
||||||
|
_int_env("REJECT_ENGINE_SPOOL_CLEANUP_INTERVAL_SECONDS", 300), 30
|
||||||
|
)
|
||||||
|
QUERY_SPOOL_ORPHAN_GRACE_SECONDS = max(
|
||||||
|
_int_env("REJECT_ENGINE_SPOOL_ORPHAN_GRACE_SECONDS", 600), 60
|
||||||
|
)
|
||||||
|
_SPOOL_SCHEMA_VERSION = 1
|
||||||
|
_VALID_ID_RE = re.compile(r"^[A-Za-z0-9._-]{4,128}$")
|
||||||
|
|
||||||
|
_WORKER_THREAD: threading.Thread | None = None
|
||||||
|
_STOP_EVENT = threading.Event()
|
||||||
|
_CLEANUP_LOCK_NAME = "query_spool_cleanup"
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_query_id(query_id: str) -> Optional[str]:
|
||||||
|
value = str(query_id or "").strip()
|
||||||
|
if not value or not _VALID_ID_RE.match(value):
|
||||||
|
return None
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_namespace(namespace: str) -> str:
|
||||||
|
value = re.sub(r"[^A-Za-z0-9._-]", "_", str(namespace or "default").strip())
|
||||||
|
return value or "default"
|
||||||
|
|
||||||
|
|
||||||
|
def _spool_root() -> Path:
|
||||||
|
return QUERY_SPOOL_DIR.resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def _meta_key(namespace: str, query_id: str) -> str:
|
||||||
|
ns = _normalize_namespace(namespace)
|
||||||
|
return f"{ns}:spool_meta:{query_id}"
|
||||||
|
|
||||||
|
|
||||||
|
def _target_path(namespace: str, query_id: str) -> Path:
|
||||||
|
root = _spool_root()
|
||||||
|
ns = _normalize_namespace(namespace)
|
||||||
|
path = (root / ns / f"{query_id}.parquet").resolve()
|
||||||
|
root_str = str(root)
|
||||||
|
if not str(path).startswith(f"{root_str}{os.sep}"):
|
||||||
|
raise ValueError("Invalid spool target path")
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def _path_from_relative(relative_path: str) -> Optional[Path]:
|
||||||
|
try:
|
||||||
|
root = _spool_root()
|
||||||
|
rel = Path(str(relative_path)).as_posix().lstrip("/")
|
||||||
|
path = (root / rel).resolve()
|
||||||
|
root_str = str(root)
|
||||||
|
if not str(path).startswith(f"{root_str}{os.sep}"):
|
||||||
|
return None
|
||||||
|
return path
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_decimal_object_columns(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
if df is None or df.empty:
|
||||||
|
return df
|
||||||
|
|
||||||
|
normalized = df.copy()
|
||||||
|
for col in normalized.columns:
|
||||||
|
series = normalized[col]
|
||||||
|
if series.dtype != "object":
|
||||||
|
continue
|
||||||
|
|
||||||
|
non_null = series.dropna()
|
||||||
|
if non_null.empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
has_decimal = non_null.map(lambda value: isinstance(value, Decimal)).any()
|
||||||
|
if not has_decimal:
|
||||||
|
continue
|
||||||
|
|
||||||
|
is_numeric_like = non_null.map(
|
||||||
|
lambda value: isinstance(value, (Decimal, Real)) and not isinstance(value, bool)
|
||||||
|
).all()
|
||||||
|
if is_numeric_like:
|
||||||
|
normalized[col] = pd.to_numeric(series, errors="coerce")
|
||||||
|
else:
|
||||||
|
normalized[col] = series.map(
|
||||||
|
lambda value: str(value) if isinstance(value, Decimal) else value
|
||||||
|
)
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
|
def _estimate_spool_size_bytes(df: pd.DataFrame) -> int:
|
||||||
|
mem_bytes = int(df.memory_usage(deep=True).sum())
|
||||||
|
# Typical parquet compression ratio is ~2-5x; use conservative 45% estimate.
|
||||||
|
return max(int(mem_bytes * 0.45), 1_048_576)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_spool_size_bytes() -> int:
|
||||||
|
root = _spool_root()
|
||||||
|
if not root.exists():
|
||||||
|
return 0
|
||||||
|
total = 0
|
||||||
|
for file_path in root.rglob("*.parquet"):
|
||||||
|
try:
|
||||||
|
total += int(file_path.stat().st_size)
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
return total
|
||||||
|
|
||||||
|
|
||||||
|
def _columns_hash(columns: list[str]) -> str:
|
||||||
|
joined = "|".join(columns)
|
||||||
|
return hashlib.sha256(joined.encode("utf-8")).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_capacity(required_bytes: int) -> bool:
|
||||||
|
used = _get_spool_size_bytes()
|
||||||
|
projected = used + max(required_bytes, 0)
|
||||||
|
usage_ratio = projected / max(QUERY_SPOOL_MAX_BYTES, 1)
|
||||||
|
if usage_ratio >= QUERY_SPOOL_WARN_RATIO:
|
||||||
|
logger.warning(
|
||||||
|
"Query spool usage high: %.1f%% (%d/%d bytes)",
|
||||||
|
usage_ratio * 100,
|
||||||
|
projected,
|
||||||
|
QUERY_SPOOL_MAX_BYTES,
|
||||||
|
)
|
||||||
|
if projected <= QUERY_SPOOL_MAX_BYTES:
|
||||||
|
return True
|
||||||
|
|
||||||
|
cleanup_expired_spool(namespace=None)
|
||||||
|
used_after_cleanup = _get_spool_size_bytes()
|
||||||
|
if used_after_cleanup + max(required_bytes, 0) <= QUERY_SPOOL_MAX_BYTES:
|
||||||
|
return True
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"Query spool over capacity after cleanup: required=%d used=%d cap=%d",
|
||||||
|
required_bytes,
|
||||||
|
used_after_cleanup,
|
||||||
|
QUERY_SPOOL_MAX_BYTES,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_spool_metadata(namespace: str, query_id: str) -> Optional[dict[str, Any]]:
|
||||||
|
safe_query_id = _safe_query_id(query_id)
|
||||||
|
if not safe_query_id:
|
||||||
|
return None
|
||||||
|
client = get_redis_client()
|
||||||
|
if client is None:
|
||||||
|
return None
|
||||||
|
key = get_key(_meta_key(namespace, safe_query_id))
|
||||||
|
try:
|
||||||
|
raw = client.get(key)
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
payload = json.loads(raw)
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
client.delete(key)
|
||||||
|
return None
|
||||||
|
return payload
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Failed to read spool metadata for %s: %s", safe_query_id, exc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def store_spooled_df(
|
||||||
|
namespace: str,
|
||||||
|
query_id: str,
|
||||||
|
df: pd.DataFrame,
|
||||||
|
*,
|
||||||
|
ttl_seconds: Optional[int] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""Persist DataFrame to parquet and save metadata pointer in Redis."""
|
||||||
|
if not QUERY_SPOOL_ENABLED or df is None or df.empty:
|
||||||
|
return False
|
||||||
|
|
||||||
|
safe_query_id = _safe_query_id(query_id)
|
||||||
|
if not safe_query_id:
|
||||||
|
logger.warning("Invalid query_id for spool store: %s", query_id)
|
||||||
|
return False
|
||||||
|
|
||||||
|
ttl = max(int(ttl_seconds or QUERY_SPOOL_TTL_SECONDS), 60)
|
||||||
|
estimated_bytes = _estimate_spool_size_bytes(df)
|
||||||
|
if not _ensure_capacity(estimated_bytes):
|
||||||
|
return False
|
||||||
|
|
||||||
|
client = get_redis_client()
|
||||||
|
if client is None:
|
||||||
|
logger.warning("Redis unavailable, skip spool store for query_id=%s", safe_query_id)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
path = _target_path(namespace, safe_query_id)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp_path = path.with_suffix(".tmp")
|
||||||
|
normalized = _normalize_decimal_object_columns(df)
|
||||||
|
normalized.to_parquet(tmp_path, engine="pyarrow", index=False)
|
||||||
|
tmp_path.replace(path)
|
||||||
|
|
||||||
|
now_ts = int(time.time())
|
||||||
|
columns = [str(col) for col in normalized.columns]
|
||||||
|
metadata = {
|
||||||
|
"schema_version": _SPOOL_SCHEMA_VERSION,
|
||||||
|
"namespace": _normalize_namespace(namespace),
|
||||||
|
"query_id": safe_query_id,
|
||||||
|
"relative_path": str(path.relative_to(_spool_root())),
|
||||||
|
"row_count": int(len(normalized)),
|
||||||
|
"column_count": int(len(columns)),
|
||||||
|
"columns_hash": _columns_hash(columns),
|
||||||
|
"created_at": now_ts,
|
||||||
|
"expires_at": now_ts + ttl,
|
||||||
|
"file_size_bytes": int(path.stat().st_size),
|
||||||
|
}
|
||||||
|
client.setex(
|
||||||
|
get_key(_meta_key(namespace, safe_query_id)),
|
||||||
|
ttl,
|
||||||
|
json.dumps(metadata, ensure_ascii=False, sort_keys=True),
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Failed to store parquet spool (query_id=%s): %s", safe_query_id, exc)
|
||||||
|
try:
|
||||||
|
tmp_path = _target_path(namespace, safe_query_id).with_suffix(".tmp")
|
||||||
|
if tmp_path.exists():
|
||||||
|
tmp_path.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def load_spooled_df(namespace: str, query_id: str) -> Optional[pd.DataFrame]:
|
||||||
|
"""Load DataFrame from spool metadata pointer."""
|
||||||
|
if not QUERY_SPOOL_ENABLED:
|
||||||
|
return None
|
||||||
|
|
||||||
|
safe_query_id = _safe_query_id(query_id)
|
||||||
|
if not safe_query_id:
|
||||||
|
return None
|
||||||
|
|
||||||
|
metadata = get_spool_metadata(namespace, safe_query_id)
|
||||||
|
if metadata is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
expires_at = int(metadata.get("expires_at") or 0)
|
||||||
|
if expires_at and expires_at <= int(time.time()):
|
||||||
|
clear_spooled_df(namespace, safe_query_id)
|
||||||
|
return None
|
||||||
|
|
||||||
|
path = _path_from_relative(str(metadata.get("relative_path") or ""))
|
||||||
|
if path is None or not path.exists():
|
||||||
|
clear_spooled_df(namespace, safe_query_id, remove_file=False)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
df = pd.read_parquet(path, engine="pyarrow")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Failed to read spool parquet (%s): %s", path, exc)
|
||||||
|
clear_spooled_df(namespace, safe_query_id)
|
||||||
|
return None
|
||||||
|
|
||||||
|
expected_hash = str(metadata.get("columns_hash") or "")
|
||||||
|
if expected_hash:
|
||||||
|
current_hash = _columns_hash([str(col) for col in df.columns])
|
||||||
|
if current_hash != expected_hash:
|
||||||
|
logger.warning(
|
||||||
|
"Spool metadata mismatch for query_id=%s (columns hash mismatch)",
|
||||||
|
safe_query_id,
|
||||||
|
)
|
||||||
|
clear_spooled_df(namespace, safe_query_id)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def clear_spooled_df(namespace: str, query_id: str, *, remove_file: bool = True) -> None:
|
||||||
|
safe_query_id = _safe_query_id(query_id)
|
||||||
|
if not safe_query_id:
|
||||||
|
return
|
||||||
|
client = get_redis_client()
|
||||||
|
key = get_key(_meta_key(namespace, safe_query_id))
|
||||||
|
|
||||||
|
if remove_file:
|
||||||
|
metadata = get_spool_metadata(namespace, safe_query_id)
|
||||||
|
rel = str((metadata or {}).get("relative_path") or "")
|
||||||
|
path = _path_from_relative(rel) if rel else None
|
||||||
|
if path and path.exists():
|
||||||
|
try:
|
||||||
|
path.unlink()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if client is not None:
|
||||||
|
try:
|
||||||
|
client.delete(key)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_expired_spool(namespace: str | None = None) -> dict[str, int]:
|
||||||
|
"""Cleanup expired metadata and orphan parquet files."""
|
||||||
|
stats = {
|
||||||
|
"meta_checked": 0,
|
||||||
|
"meta_deleted": 0,
|
||||||
|
"expired_files_deleted": 0,
|
||||||
|
"orphan_files_deleted": 0,
|
||||||
|
"spool_bytes": 0,
|
||||||
|
}
|
||||||
|
root = _spool_root()
|
||||||
|
root.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
referenced_paths: set[str] = set()
|
||||||
|
now_ts = int(time.time())
|
||||||
|
client = get_redis_client()
|
||||||
|
if client is not None:
|
||||||
|
if namespace:
|
||||||
|
pattern = get_key(f"{_normalize_namespace(namespace)}:spool_meta:*")
|
||||||
|
else:
|
||||||
|
pattern = get_key("*:spool_meta:*")
|
||||||
|
try:
|
||||||
|
for key in client.scan_iter(match=pattern, count=200):
|
||||||
|
stats["meta_checked"] += 1
|
||||||
|
raw = client.get(key)
|
||||||
|
if not raw:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
meta = json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
client.delete(key)
|
||||||
|
stats["meta_deleted"] += 1
|
||||||
|
continue
|
||||||
|
rel = str(meta.get("relative_path") or "")
|
||||||
|
path = _path_from_relative(rel) if rel else None
|
||||||
|
expires_at = int(meta.get("expires_at") or 0)
|
||||||
|
expired = bool(expires_at and expires_at <= now_ts)
|
||||||
|
missing = path is None or not path.exists()
|
||||||
|
if expired or missing:
|
||||||
|
if path is not None and path.exists():
|
||||||
|
try:
|
||||||
|
path.unlink()
|
||||||
|
stats["expired_files_deleted"] += 1
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
client.delete(key)
|
||||||
|
stats["meta_deleted"] += 1
|
||||||
|
elif path is not None:
|
||||||
|
referenced_paths.add(str(path))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Spool metadata cleanup failed: %s", exc)
|
||||||
|
|
||||||
|
for file_path in root.rglob("*.parquet"):
|
||||||
|
resolved = str(file_path.resolve())
|
||||||
|
if resolved in referenced_paths:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
age = now_ts - int(file_path.stat().st_mtime)
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
if age < QUERY_SPOOL_ORPHAN_GRACE_SECONDS:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
file_path.unlink()
|
||||||
|
stats["orphan_files_deleted"] += 1
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for candidate in sorted(root.rglob("*"), reverse=True):
|
||||||
|
if candidate.is_dir():
|
||||||
|
try:
|
||||||
|
candidate.rmdir()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
stats["spool_bytes"] = _get_spool_size_bytes()
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def _worker_loop() -> None:
|
||||||
|
logger.info(
|
||||||
|
"Query spool cleanup worker started (interval=%ss)",
|
||||||
|
QUERY_SPOOL_CLEANUP_INTERVAL_SECONDS,
|
||||||
|
)
|
||||||
|
while not _STOP_EVENT.wait(QUERY_SPOOL_CLEANUP_INTERVAL_SECONDS):
|
||||||
|
try:
|
||||||
|
if try_acquire_lock(_CLEANUP_LOCK_NAME, ttl_seconds=120):
|
||||||
|
try:
|
||||||
|
cleanup_expired_spool(namespace=None)
|
||||||
|
finally:
|
||||||
|
release_lock(_CLEANUP_LOCK_NAME)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Query spool cleanup failed: %s", exc)
|
||||||
|
logger.info("Query spool cleanup worker stopped")
|
||||||
|
|
||||||
|
|
||||||
|
def init_query_spool_cleanup(app=None) -> None:
|
||||||
|
"""Initialize spool directory and start periodic cleanup worker."""
|
||||||
|
if not QUERY_SPOOL_ENABLED:
|
||||||
|
return
|
||||||
|
cleanup_expired_spool(namespace=None)
|
||||||
|
|
||||||
|
global _WORKER_THREAD
|
||||||
|
if app is not None and app.config.get("TESTING"):
|
||||||
|
return
|
||||||
|
if _WORKER_THREAD and _WORKER_THREAD.is_alive():
|
||||||
|
return
|
||||||
|
_STOP_EVENT.clear()
|
||||||
|
_WORKER_THREAD = threading.Thread(
|
||||||
|
target=_worker_loop,
|
||||||
|
daemon=True,
|
||||||
|
name="query-spool-cleanup",
|
||||||
|
)
|
||||||
|
_WORKER_THREAD.start()
|
||||||
|
|
||||||
|
|
||||||
|
def stop_query_spool_cleanup_worker(timeout: int = 5) -> None:
|
||||||
|
global _WORKER_THREAD
|
||||||
|
if _WORKER_THREAD is None:
|
||||||
|
return
|
||||||
|
_STOP_EVENT.set()
|
||||||
|
_WORKER_THREAD.join(timeout=timeout)
|
||||||
|
_WORKER_THREAD = None
|
||||||
195
src/mes_dashboard/core/redis_df_store.py
Normal file
195
src/mes_dashboard/core/redis_df_store.py
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Reusable parquet-in-Redis DataFrame store.
|
||||||
|
|
||||||
|
Extracted from reject/hold/resource_dataset_cache to eliminate
|
||||||
|
duplication. Provides both general-purpose store/load and
|
||||||
|
chunk-level helpers for BatchQueryEngine.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
from decimal import Decimal
|
||||||
|
from numbers import Real
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.core.redis_client import (
|
||||||
|
REDIS_ENABLED,
|
||||||
|
get_key,
|
||||||
|
get_redis_client,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger("mes_dashboard.redis_df_store")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# General-purpose DataFrame ↔ Redis
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_decimal_object_columns(df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
"""Normalize object columns that contain Decimal values.
|
||||||
|
|
||||||
|
PyArrow parquet serialization can fail on mixed Decimal precision in an
|
||||||
|
object-typed column. For numeric-like mixed precision Decimal columns,
|
||||||
|
coerce to float. For mixed-type columns, cast Decimal values to string.
|
||||||
|
"""
|
||||||
|
if df is None or df.empty:
|
||||||
|
return df
|
||||||
|
|
||||||
|
normalized = df.copy()
|
||||||
|
for col in normalized.columns:
|
||||||
|
series = normalized[col]
|
||||||
|
if series.dtype != "object":
|
||||||
|
continue
|
||||||
|
|
||||||
|
non_null = series.dropna()
|
||||||
|
if non_null.empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
has_decimal = non_null.map(lambda value: isinstance(value, Decimal)).any()
|
||||||
|
if not has_decimal:
|
||||||
|
continue
|
||||||
|
|
||||||
|
is_numeric_like = non_null.map(
|
||||||
|
lambda value: isinstance(value, (Decimal, Real)) and not isinstance(value, bool)
|
||||||
|
).all()
|
||||||
|
if is_numeric_like:
|
||||||
|
normalized[col] = pd.to_numeric(series, errors="coerce")
|
||||||
|
else:
|
||||||
|
normalized[col] = series.map(
|
||||||
|
lambda value: str(value) if isinstance(value, Decimal) else value
|
||||||
|
)
|
||||||
|
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
|
def redis_store_df(key: str, df: pd.DataFrame, ttl: int = 900) -> bool:
|
||||||
|
"""Serialize *df* to parquet, base64-encode, and SETEX into Redis.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: Redis key (will be prefixed via ``get_key``).
|
||||||
|
df: DataFrame to store.
|
||||||
|
ttl: Expiry in seconds (default 900 = 15 min).
|
||||||
|
"""
|
||||||
|
if not REDIS_ENABLED:
|
||||||
|
return False
|
||||||
|
client = get_redis_client()
|
||||||
|
if client is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
normalized = _normalize_decimal_object_columns(df)
|
||||||
|
buf = io.BytesIO()
|
||||||
|
normalized.to_parquet(buf, engine="pyarrow", index=False)
|
||||||
|
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
|
||||||
|
client.setex(get_key(key), ttl, encoded)
|
||||||
|
return True
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Failed to store DataFrame in Redis (%s): %s", key, exc)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def redis_load_df(key: str) -> Optional[pd.DataFrame]:
|
||||||
|
"""Load a parquet-encoded DataFrame from Redis.
|
||||||
|
|
||||||
|
Returns ``None`` when the key is missing or Redis is unavailable.
|
||||||
|
"""
|
||||||
|
if not REDIS_ENABLED:
|
||||||
|
return None
|
||||||
|
client = get_redis_client()
|
||||||
|
if client is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
encoded = client.get(get_key(key))
|
||||||
|
if encoded is None:
|
||||||
|
return None
|
||||||
|
raw = base64.b64decode(encoded)
|
||||||
|
return pd.read_parquet(io.BytesIO(raw), engine="pyarrow")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Failed to load DataFrame from Redis (%s): %s", key, exc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Chunk-level helpers (used by BatchQueryEngine)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _chunk_key(cache_prefix: str, query_hash: str, idx: int) -> str:
|
||||||
|
"""Build the raw key (before global prefix) for a single chunk."""
|
||||||
|
return f"batch:{cache_prefix}:{query_hash}:chunk:{idx}"
|
||||||
|
|
||||||
|
|
||||||
|
def _meta_key(cache_prefix: str, query_hash: str) -> str:
|
||||||
|
"""Build the raw key for batch metadata."""
|
||||||
|
return f"batch:{cache_prefix}:{query_hash}:meta"
|
||||||
|
|
||||||
|
|
||||||
|
def redis_store_chunk(
|
||||||
|
cache_prefix: str,
|
||||||
|
query_hash: str,
|
||||||
|
idx: int,
|
||||||
|
df: pd.DataFrame,
|
||||||
|
ttl: int = 900,
|
||||||
|
) -> bool:
|
||||||
|
"""Store a single chunk DataFrame in Redis."""
|
||||||
|
return redis_store_df(_chunk_key(cache_prefix, query_hash, idx), df, ttl=ttl)
|
||||||
|
|
||||||
|
|
||||||
|
def redis_load_chunk(
|
||||||
|
cache_prefix: str,
|
||||||
|
query_hash: str,
|
||||||
|
idx: int,
|
||||||
|
) -> Optional[pd.DataFrame]:
|
||||||
|
"""Load a single chunk DataFrame from Redis."""
|
||||||
|
return redis_load_df(_chunk_key(cache_prefix, query_hash, idx))
|
||||||
|
|
||||||
|
|
||||||
|
def redis_chunk_exists(
|
||||||
|
cache_prefix: str,
|
||||||
|
query_hash: str,
|
||||||
|
idx: int,
|
||||||
|
) -> bool:
|
||||||
|
"""Check whether a chunk key exists in Redis (without loading data)."""
|
||||||
|
if not REDIS_ENABLED:
|
||||||
|
return False
|
||||||
|
client = get_redis_client()
|
||||||
|
if client is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
return bool(client.exists(get_key(_chunk_key(cache_prefix, query_hash, idx))))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("redis_chunk_exists failed: %s", exc)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def redis_clear_batch(cache_prefix: str, query_hash: str) -> int:
|
||||||
|
"""Delete cached chunk/meta keys for a batch query hash.
|
||||||
|
|
||||||
|
Returns the number of deleted keys.
|
||||||
|
"""
|
||||||
|
if not REDIS_ENABLED:
|
||||||
|
return 0
|
||||||
|
client = get_redis_client()
|
||||||
|
if client is None:
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
chunk_pattern = get_key(f"batch:{cache_prefix}:{query_hash}:chunk:*")
|
||||||
|
meta_key = get_key(_meta_key(cache_prefix, query_hash))
|
||||||
|
chunk_keys = client.keys(chunk_pattern) or []
|
||||||
|
delete_keys = list(chunk_keys) + [meta_key]
|
||||||
|
if not delete_keys:
|
||||||
|
return 0
|
||||||
|
return int(client.delete(*delete_keys) or 0)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"redis_clear_batch failed (prefix=%s, query_hash=%s): %s",
|
||||||
|
cache_prefix,
|
||||||
|
query_hash,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
return 0
|
||||||
569
src/mes_dashboard/services/batch_query_engine.py
Normal file
569
src/mes_dashboard/services/batch_query_engine.py
Normal file
@@ -0,0 +1,569 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""BatchQueryEngine — reusable batch query orchestration.
|
||||||
|
|
||||||
|
Provides time-range decomposition, ID-batch decomposition,
|
||||||
|
memory guards, controlled parallelism, Redis chunk caching
|
||||||
|
with partial cache hits, and progress tracking.
|
||||||
|
|
||||||
|
Any service that plugs into this module automatically gains:
|
||||||
|
- Oracle timeout protection (via read_sql_df_slow, 300s)
|
||||||
|
- OOM protection (per-chunk memory guard)
|
||||||
|
- Partial cache reuse (extend date range → reuse old chunks)
|
||||||
|
- Progress tracking via Redis HSET
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
from mes_dashboard.services.batch_query_engine import (
|
||||||
|
decompose_by_time_range,
|
||||||
|
decompose_by_ids,
|
||||||
|
execute_plan,
|
||||||
|
merge_chunks,
|
||||||
|
compute_query_hash,
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks = decompose_by_time_range("2025-01-01", "2025-12-31")
|
||||||
|
qh = compute_query_hash({"mode": "date_range", ...})
|
||||||
|
execute_plan(chunks, my_query_fn, query_hash=qh, cache_prefix="reject")
|
||||||
|
df = merge_chunks("reject", qh)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Callable,
|
||||||
|
Dict,
|
||||||
|
Generator,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
)
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.core.redis_client import get_key, get_redis_client
|
||||||
|
from mes_dashboard.core.redis_df_store import (
|
||||||
|
redis_chunk_exists,
|
||||||
|
redis_load_chunk,
|
||||||
|
redis_store_chunk,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger("mes_dashboard.batch_query_engine")
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Configuration (env-overridable)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
BATCH_CHUNK_MAX_MEMORY_MB: int = int(
|
||||||
|
os.getenv("BATCH_CHUNK_MAX_MEMORY_MB", "256")
|
||||||
|
)
|
||||||
|
|
||||||
|
BATCH_QUERY_TIME_THRESHOLD_DAYS: int = int(
|
||||||
|
os.getenv("BATCH_QUERY_TIME_THRESHOLD_DAYS", "60")
|
||||||
|
)
|
||||||
|
|
||||||
|
BATCH_QUERY_ID_THRESHOLD: int = int(
|
||||||
|
os.getenv("BATCH_QUERY_ID_THRESHOLD", "1000")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 1. Time-range decomposition
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
def decompose_by_time_range(
|
||||||
|
start_date: str,
|
||||||
|
end_date: str,
|
||||||
|
grain_days: int = 31,
|
||||||
|
) -> List[Dict[str, str]]:
|
||||||
|
"""Split ``[start_date, end_date]`` into monthly-ish chunks.
|
||||||
|
|
||||||
|
Boundary semantics (closed interval):
|
||||||
|
- Each chunk uses ``[chunk_start, chunk_end]``.
|
||||||
|
- The next chunk starts at ``previous_chunk_end + 1 day``.
|
||||||
|
- The final chunk may contain fewer than *grain_days* days.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
start_date: ISO date string ``YYYY-MM-DD``.
|
||||||
|
end_date: ISO date string ``YYYY-MM-DD``.
|
||||||
|
grain_days: Maximum days per chunk (default 31).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts with ``chunk_start`` and ``chunk_end`` keys.
|
||||||
|
"""
|
||||||
|
dt_start = datetime.strptime(start_date, "%Y-%m-%d")
|
||||||
|
dt_end = datetime.strptime(end_date, "%Y-%m-%d")
|
||||||
|
|
||||||
|
if dt_start > dt_end:
|
||||||
|
raise ValueError(
|
||||||
|
f"start_date ({start_date}) must be <= end_date ({end_date})"
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks: List[Dict[str, str]] = []
|
||||||
|
cursor = dt_start
|
||||||
|
while cursor <= dt_end:
|
||||||
|
chunk_end = min(cursor + timedelta(days=grain_days - 1), dt_end)
|
||||||
|
chunks.append(
|
||||||
|
{
|
||||||
|
"chunk_start": cursor.strftime("%Y-%m-%d"),
|
||||||
|
"chunk_end": chunk_end.strftime("%Y-%m-%d"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
cursor = chunk_end + timedelta(days=1)
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 2. ID-batch decomposition
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
def decompose_by_ids(
|
||||||
|
ids: List[Any],
|
||||||
|
batch_size: int = 1000,
|
||||||
|
) -> List[List[Any]]:
|
||||||
|
"""Split *ids* into batches of at most *batch_size*.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ids: List of IDs (container IDs, lot IDs, etc.).
|
||||||
|
batch_size: Maximum items per batch (default 1000,
|
||||||
|
matching Oracle IN-clause limit).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of ID sub-lists.
|
||||||
|
"""
|
||||||
|
if batch_size < 1:
|
||||||
|
raise ValueError("batch_size must be >= 1")
|
||||||
|
return [ids[i : i + batch_size] for i in range(0, len(ids), batch_size)]
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 3. Deterministic query_hash
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
def compute_query_hash(params: Dict[str, Any]) -> str:
|
||||||
|
"""Compute a stable 16-char hex hash for *params*.
|
||||||
|
|
||||||
|
Canonicalization:
|
||||||
|
- ``json.dumps`` with ``sort_keys=True`` and ``default=str``.
|
||||||
|
- Lists are sorted before serialisation.
|
||||||
|
- SHA-256, truncated to first 16 hex chars.
|
||||||
|
|
||||||
|
Only dataset-affecting parameters should be included;
|
||||||
|
presentation-only parameters (page, per_page, …) must be
|
||||||
|
excluded by the caller.
|
||||||
|
"""
|
||||||
|
canonical = _canonicalize(params)
|
||||||
|
return hashlib.sha256(canonical.encode("utf-8")).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def _canonicalize(obj: Any) -> str:
|
||||||
|
"""Recursively sort lists and produce deterministic JSON."""
|
||||||
|
|
||||||
|
def _sort_value(v: Any) -> Any:
|
||||||
|
if isinstance(v, list):
|
||||||
|
try:
|
||||||
|
return sorted(_sort_value(i) for i in v)
|
||||||
|
except TypeError:
|
||||||
|
return [_sort_value(i) for i in v]
|
||||||
|
if isinstance(v, dict):
|
||||||
|
return {k: _sort_value(v[k]) for k in sorted(v.keys())}
|
||||||
|
return v
|
||||||
|
|
||||||
|
return json.dumps(_sort_value(obj), sort_keys=True, ensure_ascii=False, default=str)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4. Progress tracking via Redis HSET
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _update_progress(
|
||||||
|
cache_prefix: str,
|
||||||
|
query_hash: str,
|
||||||
|
*,
|
||||||
|
total: int,
|
||||||
|
completed: int,
|
||||||
|
failed: int,
|
||||||
|
status: str = "running",
|
||||||
|
has_partial_failure: bool = False,
|
||||||
|
ttl: int = 900,
|
||||||
|
) -> None:
|
||||||
|
"""Write/update batch progress metadata to Redis."""
|
||||||
|
client = get_redis_client()
|
||||||
|
if client is None:
|
||||||
|
return
|
||||||
|
key = get_key(f"batch:{cache_prefix}:{query_hash}:meta")
|
||||||
|
pct = round(completed / total * 100, 1) if total else 0
|
||||||
|
mapping = {
|
||||||
|
"total": str(total),
|
||||||
|
"completed": str(completed),
|
||||||
|
"failed": str(failed),
|
||||||
|
"pct": str(pct),
|
||||||
|
"status": status,
|
||||||
|
"has_partial_failure": str(has_partial_failure),
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
client.hset(key, mapping=mapping)
|
||||||
|
client.expire(key, ttl)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Failed to update batch progress: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
def get_batch_progress(
|
||||||
|
cache_prefix: str,
|
||||||
|
query_hash: str,
|
||||||
|
) -> Optional[Dict[str, str]]:
|
||||||
|
"""Read batch progress metadata from Redis."""
|
||||||
|
client = get_redis_client()
|
||||||
|
if client is None:
|
||||||
|
return None
|
||||||
|
key = get_key(f"batch:{cache_prefix}:{query_hash}:meta")
|
||||||
|
try:
|
||||||
|
data = client.hgetall(key)
|
||||||
|
return data if data else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 5. Execute plan
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
# Type alias for the function each chunk calls.
|
||||||
|
# Signature: query_fn(chunk, max_rows_per_chunk) -> pd.DataFrame
|
||||||
|
QueryFn = Callable[..., pd.DataFrame]
|
||||||
|
|
||||||
|
|
||||||
|
def execute_plan(
|
||||||
|
chunks: List[Dict[str, Any]],
|
||||||
|
query_fn: QueryFn,
|
||||||
|
*,
|
||||||
|
parallel: int = 1,
|
||||||
|
query_hash: Optional[str] = None,
|
||||||
|
skip_cached: bool = True,
|
||||||
|
cache_prefix: str = "",
|
||||||
|
chunk_ttl: int = 900,
|
||||||
|
max_rows_per_chunk: Optional[int] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Execute *chunks* through *query_fn* with caching + guards.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunks: List of chunk descriptors (dicts from decompose_*).
|
||||||
|
query_fn: ``fn(chunk_dict, max_rows_per_chunk=…) -> DataFrame``.
|
||||||
|
Must use ``read_sql_df_slow`` internally.
|
||||||
|
parallel: Max concurrent chunks (default 1 = sequential).
|
||||||
|
query_hash: Precomputed hash; auto-generated if None.
|
||||||
|
skip_cached: Skip chunks already in Redis (default True).
|
||||||
|
cache_prefix: Service prefix for Redis keys (e.g. "reject").
|
||||||
|
chunk_ttl: TTL in seconds for each chunk key (default 900).
|
||||||
|
max_rows_per_chunk: Passed to *query_fn* for SQL-level
|
||||||
|
``FETCH FIRST N ROWS ONLY``.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The ``query_hash`` identifying this batch.
|
||||||
|
"""
|
||||||
|
if query_hash is None:
|
||||||
|
query_hash = compute_query_hash({"chunks": chunks})
|
||||||
|
|
||||||
|
total = len(chunks)
|
||||||
|
completed = 0
|
||||||
|
failed = 0
|
||||||
|
has_partial_failure = False
|
||||||
|
|
||||||
|
_update_progress(
|
||||||
|
cache_prefix, query_hash,
|
||||||
|
total=total, completed=0, failed=0, status="running", ttl=chunk_ttl,
|
||||||
|
)
|
||||||
|
|
||||||
|
effective_parallel = _effective_parallelism(parallel)
|
||||||
|
|
||||||
|
if effective_parallel <= 1:
|
||||||
|
# --- Sequential path ---
|
||||||
|
for idx, chunk in enumerate(chunks):
|
||||||
|
if skip_cached and redis_chunk_exists(cache_prefix, query_hash, idx):
|
||||||
|
completed += 1
|
||||||
|
logger.debug("chunk %d/%d cached, skipping", idx, total)
|
||||||
|
_update_progress(
|
||||||
|
cache_prefix, query_hash,
|
||||||
|
total=total, completed=completed, failed=failed,
|
||||||
|
has_partial_failure=has_partial_failure, ttl=chunk_ttl,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
ok = _execute_single_chunk(
|
||||||
|
idx, chunk, query_fn, cache_prefix, query_hash,
|
||||||
|
chunk_ttl, max_rows_per_chunk,
|
||||||
|
)
|
||||||
|
if ok:
|
||||||
|
completed += 1
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
has_partial_failure = True
|
||||||
|
_update_progress(
|
||||||
|
cache_prefix, query_hash,
|
||||||
|
total=total, completed=completed, failed=failed,
|
||||||
|
has_partial_failure=has_partial_failure, ttl=chunk_ttl,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# --- Parallel path ---
|
||||||
|
completed, failed, has_partial_failure = _execute_parallel(
|
||||||
|
chunks, query_fn, cache_prefix, query_hash,
|
||||||
|
chunk_ttl, max_rows_per_chunk, skip_cached,
|
||||||
|
effective_parallel,
|
||||||
|
)
|
||||||
|
|
||||||
|
final_status = "completed" if failed == 0 else ("failed" if completed == 0 else "partial")
|
||||||
|
_update_progress(
|
||||||
|
cache_prefix, query_hash,
|
||||||
|
total=total, completed=completed, failed=failed,
|
||||||
|
status=final_status,
|
||||||
|
has_partial_failure=has_partial_failure,
|
||||||
|
ttl=chunk_ttl,
|
||||||
|
)
|
||||||
|
|
||||||
|
return query_hash
|
||||||
|
|
||||||
|
|
||||||
|
def _effective_parallelism(requested: int) -> int:
|
||||||
|
"""Cap parallelism at ``min(requested, semaphore_available - 1)``.
|
||||||
|
|
||||||
|
If semaphore is fully occupied, degrade to sequential (1).
|
||||||
|
"""
|
||||||
|
if requested <= 1:
|
||||||
|
return 1
|
||||||
|
try:
|
||||||
|
from mes_dashboard.core.database import _get_slow_query_semaphore
|
||||||
|
sem = _get_slow_query_semaphore()
|
||||||
|
# threading.Semaphore doesn't expose available count directly;
|
||||||
|
# use a non-blocking acquire/release to estimate.
|
||||||
|
acquired = sem.acquire(blocking=False)
|
||||||
|
if not acquired:
|
||||||
|
logger.info("Semaphore fully occupied; degrading to sequential")
|
||||||
|
return 1
|
||||||
|
sem.release()
|
||||||
|
# We got one permit, so at least 1 is available.
|
||||||
|
# Conservative cap: min(requested, available - 1) where available >= 1.
|
||||||
|
# Since we can't know exact available, just cap at requested.
|
||||||
|
return min(requested, 3) # hard ceiling to be safe
|
||||||
|
except Exception:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def _execute_single_chunk(
|
||||||
|
idx: int,
|
||||||
|
chunk: Dict[str, Any],
|
||||||
|
query_fn: QueryFn,
|
||||||
|
cache_prefix: str,
|
||||||
|
query_hash: str,
|
||||||
|
chunk_ttl: int,
|
||||||
|
max_rows_per_chunk: Optional[int],
|
||||||
|
) -> bool:
|
||||||
|
"""Run one chunk through *query_fn*, apply guards, store result.
|
||||||
|
|
||||||
|
Returns True on success, False on failure.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
df = query_fn(chunk, max_rows_per_chunk=max_rows_per_chunk)
|
||||||
|
if df is None:
|
||||||
|
df = pd.DataFrame()
|
||||||
|
|
||||||
|
# ---- Memory guard ----
|
||||||
|
mem_bytes = df.memory_usage(deep=True).sum()
|
||||||
|
mem_mb = mem_bytes / (1024 * 1024)
|
||||||
|
if mem_mb > BATCH_CHUNK_MAX_MEMORY_MB:
|
||||||
|
logger.warning(
|
||||||
|
"Chunk %d memory %.1f MB exceeds limit %d MB — discarded",
|
||||||
|
idx, mem_mb, BATCH_CHUNK_MAX_MEMORY_MB,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ---- Truncation flag ----
|
||||||
|
truncated = (
|
||||||
|
max_rows_per_chunk is not None
|
||||||
|
and len(df) == max_rows_per_chunk
|
||||||
|
)
|
||||||
|
if truncated:
|
||||||
|
logger.info("Chunk %d returned exactly max_rows_per_chunk=%d (truncated)", idx, max_rows_per_chunk)
|
||||||
|
|
||||||
|
# ---- Store to Redis ----
|
||||||
|
stored = redis_store_chunk(cache_prefix, query_hash, idx, df, ttl=chunk_ttl)
|
||||||
|
if not stored:
|
||||||
|
logger.warning(
|
||||||
|
"Chunk %d failed to persist into Redis, marking as failed", idx
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"Chunk %d completed: %d rows, %.1f MB",
|
||||||
|
idx, len(df), mem_mb,
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(
|
||||||
|
"Chunk %d failed: %s", idx, exc, exc_info=True,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _execute_parallel(
|
||||||
|
chunks: List[Dict[str, Any]],
|
||||||
|
query_fn: QueryFn,
|
||||||
|
cache_prefix: str,
|
||||||
|
query_hash: str,
|
||||||
|
chunk_ttl: int,
|
||||||
|
max_rows_per_chunk: Optional[int],
|
||||||
|
skip_cached: bool,
|
||||||
|
max_workers: int,
|
||||||
|
) -> tuple:
|
||||||
|
"""Execute chunks in parallel via ThreadPoolExecutor.
|
||||||
|
|
||||||
|
Returns (completed, failed, has_partial_failure).
|
||||||
|
"""
|
||||||
|
total = len(chunks)
|
||||||
|
completed = 0
|
||||||
|
failed = 0
|
||||||
|
has_partial_failure = False
|
||||||
|
|
||||||
|
futures = {}
|
||||||
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||||
|
for idx, chunk in enumerate(chunks):
|
||||||
|
if skip_cached and redis_chunk_exists(cache_prefix, query_hash, idx):
|
||||||
|
completed += 1
|
||||||
|
continue
|
||||||
|
future = executor.submit(
|
||||||
|
_execute_single_chunk,
|
||||||
|
idx, chunk, query_fn,
|
||||||
|
cache_prefix, query_hash, chunk_ttl, max_rows_per_chunk,
|
||||||
|
)
|
||||||
|
futures[future] = idx
|
||||||
|
|
||||||
|
for future in as_completed(futures):
|
||||||
|
idx = futures[future]
|
||||||
|
try:
|
||||||
|
ok = future.result()
|
||||||
|
if ok:
|
||||||
|
completed += 1
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
has_partial_failure = True
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Chunk %d future error: %s", idx, exc)
|
||||||
|
failed += 1
|
||||||
|
has_partial_failure = True
|
||||||
|
|
||||||
|
_update_progress(
|
||||||
|
cache_prefix, query_hash,
|
||||||
|
total=total, completed=completed, failed=failed,
|
||||||
|
has_partial_failure=has_partial_failure, ttl=chunk_ttl,
|
||||||
|
)
|
||||||
|
|
||||||
|
return completed, failed, has_partial_failure
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 6. Merge / iterate chunks
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
def merge_chunks(
|
||||||
|
cache_prefix: str,
|
||||||
|
query_hash: str,
|
||||||
|
total: Optional[int] = None,
|
||||||
|
max_total_rows: Optional[int] = None,
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""Load all chunks from Redis and concatenate into one DataFrame.
|
||||||
|
|
||||||
|
If *total* is not given, reads it from the progress metadata.
|
||||||
|
Missing chunks are skipped (``has_partial_failure`` semantics).
|
||||||
|
"""
|
||||||
|
if total is None:
|
||||||
|
progress = get_batch_progress(cache_prefix, query_hash)
|
||||||
|
if progress:
|
||||||
|
total = int(progress.get("total", 0))
|
||||||
|
else:
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
dfs: List[pd.DataFrame] = []
|
||||||
|
total_rows = 0
|
||||||
|
for idx in range(total):
|
||||||
|
df = redis_load_chunk(cache_prefix, query_hash, idx)
|
||||||
|
if df is not None and not df.empty:
|
||||||
|
if max_total_rows is not None and total_rows >= max_total_rows:
|
||||||
|
logger.warning(
|
||||||
|
"merge_chunks reached max_total_rows=%d (prefix=%s, query_hash=%s)",
|
||||||
|
max_total_rows,
|
||||||
|
cache_prefix,
|
||||||
|
query_hash,
|
||||||
|
)
|
||||||
|
break
|
||||||
|
if max_total_rows is not None:
|
||||||
|
remaining = max_total_rows - total_rows
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
if len(df) > remaining:
|
||||||
|
df = df.head(remaining).copy()
|
||||||
|
logger.warning(
|
||||||
|
"merge_chunks truncated chunk %d to %d rows (max_total_rows=%d)",
|
||||||
|
idx,
|
||||||
|
remaining,
|
||||||
|
max_total_rows,
|
||||||
|
)
|
||||||
|
dfs.append(df)
|
||||||
|
total_rows += len(df)
|
||||||
|
|
||||||
|
if not dfs:
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
return pd.concat(dfs, ignore_index=True)
|
||||||
|
|
||||||
|
|
||||||
|
def iterate_chunks(
|
||||||
|
cache_prefix: str,
|
||||||
|
query_hash: str,
|
||||||
|
total: Optional[int] = None,
|
||||||
|
) -> Generator[pd.DataFrame, None, None]:
|
||||||
|
"""Yield chunk DataFrames one at a time (memory-friendly).
|
||||||
|
|
||||||
|
Skips missing chunks.
|
||||||
|
"""
|
||||||
|
if total is None:
|
||||||
|
progress = get_batch_progress(cache_prefix, query_hash)
|
||||||
|
if progress:
|
||||||
|
total = int(progress.get("total", 0))
|
||||||
|
else:
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
for idx in range(total):
|
||||||
|
df = redis_load_chunk(cache_prefix, query_hash, idx)
|
||||||
|
if df is not None:
|
||||||
|
yield df
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 7. Convenience: should_use_engine?
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
def should_decompose_by_time(start_date: str, end_date: str) -> bool:
|
||||||
|
"""Return True if the date range exceeds the threshold for engine use."""
|
||||||
|
try:
|
||||||
|
dt_start = datetime.strptime(start_date, "%Y-%m-%d")
|
||||||
|
dt_end = datetime.strptime(end_date, "%Y-%m-%d")
|
||||||
|
return (dt_end - dt_start).days > BATCH_QUERY_TIME_THRESHOLD_DAYS
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def should_decompose_by_ids(ids: List[Any]) -> bool:
|
||||||
|
"""Return True if the ID list exceeds the threshold for engine use."""
|
||||||
|
return len(ids) > BATCH_QUERY_ID_THRESHOLD
|
||||||
@@ -11,9 +11,7 @@ Cache layers:
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
@@ -24,11 +22,7 @@ import pandas as pd
|
|||||||
|
|
||||||
from mes_dashboard.core.cache import ProcessLevelCache, register_process_cache
|
from mes_dashboard.core.cache import ProcessLevelCache, register_process_cache
|
||||||
from mes_dashboard.core.database import read_sql_df_slow as read_sql_df
|
from mes_dashboard.core.database import read_sql_df_slow as read_sql_df
|
||||||
from mes_dashboard.core.redis_client import (
|
from mes_dashboard.core.redis_df_store import redis_load_df, redis_store_df
|
||||||
REDIS_ENABLED,
|
|
||||||
get_key,
|
|
||||||
get_redis_client,
|
|
||||||
)
|
|
||||||
from mes_dashboard.services.filter_cache import get_workcenter_group as _get_wc_group
|
from mes_dashboard.services.filter_cache import get_workcenter_group as _get_wc_group
|
||||||
from mes_dashboard.services.hold_history_service import (
|
from mes_dashboard.services.hold_history_service import (
|
||||||
_clean_text,
|
_clean_text,
|
||||||
@@ -79,44 +73,16 @@ def _make_query_id(params: dict) -> str:
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# Redis L2 helpers (parquet <-> base64 string)
|
# Redis L2 helpers (delegated to shared redis_df_store)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
def _redis_key(query_id: str) -> str:
|
|
||||||
return get_key(f"{_REDIS_NAMESPACE}:{query_id}")
|
|
||||||
|
|
||||||
|
|
||||||
def _redis_store_df(query_id: str, df: pd.DataFrame) -> None:
|
def _redis_store_df(query_id: str, df: pd.DataFrame) -> None:
|
||||||
if not REDIS_ENABLED:
|
redis_store_df(f"{_REDIS_NAMESPACE}:{query_id}", df, ttl=_CACHE_TTL)
|
||||||
return
|
|
||||||
client = get_redis_client()
|
|
||||||
if client is None:
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
buf = io.BytesIO()
|
|
||||||
df.to_parquet(buf, engine="pyarrow", index=False)
|
|
||||||
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
|
|
||||||
client.setex(_redis_key(query_id), _CACHE_TTL, encoded)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning("Failed to store DataFrame in Redis: %s", exc)
|
|
||||||
|
|
||||||
|
|
||||||
def _redis_load_df(query_id: str) -> Optional[pd.DataFrame]:
|
def _redis_load_df(query_id: str) -> Optional[pd.DataFrame]:
|
||||||
if not REDIS_ENABLED:
|
return redis_load_df(f"{_REDIS_NAMESPACE}:{query_id}")
|
||||||
return None
|
|
||||||
client = get_redis_client()
|
|
||||||
if client is None:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
encoded = client.get(_redis_key(query_id))
|
|
||||||
if encoded is None:
|
|
||||||
return None
|
|
||||||
raw = base64.b64decode(encoded)
|
|
||||||
return pd.read_parquet(io.BytesIO(raw), engine="pyarrow")
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning("Failed to load DataFrame from Redis: %s", exc)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -164,11 +130,49 @@ def execute_primary_query(
|
|||||||
logger.info(
|
logger.info(
|
||||||
"Hold dataset cache miss for query_id=%s, querying Oracle", query_id
|
"Hold dataset cache miss for query_id=%s, querying Oracle", query_id
|
||||||
)
|
)
|
||||||
sql = _load_sql("base_facts")
|
|
||||||
params = {"start_date": start_date, "end_date": end_date}
|
from mes_dashboard.services.batch_query_engine import (
|
||||||
df = read_sql_df(sql, params)
|
decompose_by_time_range,
|
||||||
if df is None:
|
execute_plan,
|
||||||
df = pd.DataFrame()
|
merge_chunks,
|
||||||
|
compute_query_hash,
|
||||||
|
should_decompose_by_time,
|
||||||
|
)
|
||||||
|
|
||||||
|
if should_decompose_by_time(start_date, end_date):
|
||||||
|
# --- Engine path for long date ranges ---
|
||||||
|
engine_chunks = decompose_by_time_range(start_date, end_date)
|
||||||
|
engine_hash = compute_query_hash(
|
||||||
|
{"start_date": start_date, "end_date": end_date}
|
||||||
|
)
|
||||||
|
base_sql = _load_sql("base_facts")
|
||||||
|
|
||||||
|
def _run_hold_chunk(chunk, max_rows_per_chunk=None):
|
||||||
|
params = {
|
||||||
|
"start_date": chunk["chunk_start"],
|
||||||
|
"end_date": chunk["chunk_end"],
|
||||||
|
}
|
||||||
|
result = read_sql_df(base_sql, params)
|
||||||
|
return result if result is not None else pd.DataFrame()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Engine activated for hold: %d chunks (query_id=%s)",
|
||||||
|
len(engine_chunks), query_id,
|
||||||
|
)
|
||||||
|
execute_plan(
|
||||||
|
engine_chunks, _run_hold_chunk,
|
||||||
|
query_hash=engine_hash,
|
||||||
|
cache_prefix="hold",
|
||||||
|
chunk_ttl=_CACHE_TTL,
|
||||||
|
)
|
||||||
|
df = merge_chunks("hold", engine_hash)
|
||||||
|
else:
|
||||||
|
# --- Direct path (short query) ---
|
||||||
|
sql = _load_sql("base_facts")
|
||||||
|
params = {"start_date": start_date, "end_date": end_date}
|
||||||
|
df = read_sql_df(sql, params)
|
||||||
|
if df is None:
|
||||||
|
df = pd.DataFrame()
|
||||||
|
|
||||||
if not df.empty:
|
if not df.empty:
|
||||||
df["_QUERY_START"] = pd.Timestamp(start_date)
|
df["_QUERY_START"] = pd.Timestamp(start_date)
|
||||||
|
|||||||
@@ -140,6 +140,9 @@ def _build_resource_filter_sql(
|
|||||||
# Query Functions
|
# Query Functions
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
|
_JOB_CACHE_TTL = 600 # 10 min for job query results
|
||||||
|
|
||||||
|
|
||||||
def get_jobs_by_resources(
|
def get_jobs_by_resources(
|
||||||
resource_ids: List[str],
|
resource_ids: List[str],
|
||||||
start_date: str,
|
start_date: str,
|
||||||
@@ -147,6 +150,10 @@ def get_jobs_by_resources(
|
|||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Query jobs for selected resources within date range.
|
"""Query jobs for selected resources within date range.
|
||||||
|
|
||||||
|
For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 60),
|
||||||
|
the query is decomposed into monthly chunks via BatchQueryEngine.
|
||||||
|
Results are cached in Redis to avoid redundant Oracle queries.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
resource_ids: List of RESOURCEID values to query
|
resource_ids: List of RESOURCEID values to query
|
||||||
start_date: Start date in YYYY-MM-DD format
|
start_date: Start date in YYYY-MM-DD format
|
||||||
@@ -165,22 +172,78 @@ def get_jobs_by_resources(
|
|||||||
return {'error': validation_error}
|
return {'error': validation_error}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Build resource filter
|
from mes_dashboard.services.batch_query_engine import (
|
||||||
resource_filter, resource_params = _build_resource_filter_sql(
|
decompose_by_time_range,
|
||||||
resource_ids, return_params=True
|
execute_plan,
|
||||||
|
merge_chunks,
|
||||||
|
compute_query_hash,
|
||||||
|
should_decompose_by_time,
|
||||||
)
|
)
|
||||||
|
from mes_dashboard.core.redis_df_store import redis_load_df, redis_store_df
|
||||||
|
|
||||||
# Load SQL template
|
# Check Redis cache first
|
||||||
sql = SQLLoader.load("job_query/job_list")
|
cache_hash = compute_query_hash({
|
||||||
sql = sql.replace("{{ RESOURCE_FILTER }}", resource_filter)
|
"resource_ids": sorted(resource_ids),
|
||||||
|
"start_date": start_date,
|
||||||
|
"end_date": end_date,
|
||||||
|
})
|
||||||
|
cache_key = f"job_query:{cache_hash}"
|
||||||
|
cached_df = redis_load_df(cache_key)
|
||||||
|
if cached_df is not None:
|
||||||
|
logger.info("Job query cache hit (hash=%s)", cache_hash)
|
||||||
|
df = cached_df
|
||||||
|
elif should_decompose_by_time(start_date, end_date):
|
||||||
|
# --- Engine path for long date ranges ---
|
||||||
|
engine_chunks = decompose_by_time_range(start_date, end_date)
|
||||||
|
|
||||||
# Execute query
|
# Build resource filter once (reused across all chunks)
|
||||||
params = {
|
resource_filter, resource_params = _build_resource_filter_sql(
|
||||||
'start_date': start_date,
|
resource_ids, return_params=True
|
||||||
'end_date': end_date,
|
)
|
||||||
**resource_params,
|
sql = SQLLoader.load("job_query/job_list")
|
||||||
}
|
sql = sql.replace("{{ RESOURCE_FILTER }}", resource_filter)
|
||||||
df = read_sql_df(sql, params)
|
|
||||||
|
def _run_job_chunk(chunk, max_rows_per_chunk=None):
|
||||||
|
chunk_params = {
|
||||||
|
'start_date': chunk['chunk_start'],
|
||||||
|
'end_date': chunk['chunk_end'],
|
||||||
|
**resource_params,
|
||||||
|
}
|
||||||
|
result = read_sql_df(sql, chunk_params)
|
||||||
|
return result if result is not None else pd.DataFrame()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Engine activated for job query: %d chunks, %d resources",
|
||||||
|
len(engine_chunks), len(resource_ids),
|
||||||
|
)
|
||||||
|
execute_plan(
|
||||||
|
engine_chunks, _run_job_chunk,
|
||||||
|
query_hash=cache_hash,
|
||||||
|
cache_prefix="job",
|
||||||
|
chunk_ttl=_JOB_CACHE_TTL,
|
||||||
|
)
|
||||||
|
df = merge_chunks("job", cache_hash)
|
||||||
|
# Store merged result for fast re-access
|
||||||
|
if not df.empty:
|
||||||
|
redis_store_df(cache_key, df, ttl=_JOB_CACHE_TTL)
|
||||||
|
else:
|
||||||
|
# --- Direct path (short query) ---
|
||||||
|
resource_filter, resource_params = _build_resource_filter_sql(
|
||||||
|
resource_ids, return_params=True
|
||||||
|
)
|
||||||
|
sql = SQLLoader.load("job_query/job_list")
|
||||||
|
sql = sql.replace("{{ RESOURCE_FILTER }}", resource_filter)
|
||||||
|
params = {
|
||||||
|
'start_date': start_date,
|
||||||
|
'end_date': end_date,
|
||||||
|
**resource_params,
|
||||||
|
}
|
||||||
|
df = read_sql_df(sql, params)
|
||||||
|
if df is None:
|
||||||
|
df = pd.DataFrame()
|
||||||
|
# Cache the result
|
||||||
|
if not df.empty:
|
||||||
|
redis_store_df(cache_key, df, ttl=_JOB_CACHE_TTL)
|
||||||
|
|
||||||
# Convert to records
|
# Convert to records
|
||||||
data = []
|
data = []
|
||||||
|
|||||||
@@ -56,8 +56,8 @@ from mes_dashboard.config.workcenter_groups import WORKCENTER_GROUPS, get_group_
|
|||||||
|
|
||||||
logger = logging.getLogger('mes_dashboard.mid_section_defect')
|
logger = logging.getLogger('mes_dashboard.mid_section_defect')
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
MAX_QUERY_DAYS = 365
|
MAX_QUERY_DAYS = 365
|
||||||
CACHE_TTL_DETECTION = 300 # 5 min for detection data
|
CACHE_TTL_DETECTION = 300 # 5 min for detection data
|
||||||
CACHE_TTL_LOSS_REASONS = 86400 # 24h for loss reason list (daily sync)
|
CACHE_TTL_LOSS_REASONS = 86400 # 24h for loss reason list (daily sync)
|
||||||
|
|
||||||
@@ -610,11 +610,11 @@ def query_analysis_detail(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def query_all_loss_reasons() -> Optional[Dict[str, Any]]:
|
def query_all_loss_reasons() -> Optional[Dict[str, Any]]:
|
||||||
"""Get all loss reasons (cached daily in Redis).
|
"""Get all loss reasons (cached daily in Redis).
|
||||||
|
|
||||||
Lightweight query: DISTINCT LOSSREASONNAME from last 365 days.
|
Lightweight query: DISTINCT LOSSREASONNAME from last 365 days.
|
||||||
Cached with 24h TTL — suitable for dropdown population on page load.
|
Cached with 24h TTL — suitable for dropdown population on page load.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict with 'loss_reasons' list, or None on failure.
|
Dict with 'loss_reasons' list, or None on failure.
|
||||||
@@ -861,7 +861,12 @@ def _fetch_station_detection_data(
|
|||||||
end_date: str,
|
end_date: str,
|
||||||
station: str = '測試',
|
station: str = '測試',
|
||||||
) -> Optional[pd.DataFrame]:
|
) -> Optional[pd.DataFrame]:
|
||||||
"""Execute station_detection.sql and return raw DataFrame."""
|
"""Execute station_detection.sql and return raw DataFrame.
|
||||||
|
|
||||||
|
For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 60),
|
||||||
|
the query is decomposed into monthly chunks via BatchQueryEngine to
|
||||||
|
prevent Oracle timeout on high-volume stations.
|
||||||
|
"""
|
||||||
cache_key = make_cache_key(
|
cache_key = make_cache_key(
|
||||||
"mid_section_detection",
|
"mid_section_detection",
|
||||||
filters={
|
filters={
|
||||||
@@ -885,16 +890,58 @@ def _fetch_station_detection_data(
|
|||||||
STATION_FILTER=wip_filter,
|
STATION_FILTER=wip_filter,
|
||||||
STATION_FILTER_REJECTS=rej_filter,
|
STATION_FILTER_REJECTS=rej_filter,
|
||||||
)
|
)
|
||||||
bind_params = {
|
|
||||||
'start_date': start_date,
|
from mes_dashboard.services.batch_query_engine import (
|
||||||
'end_date': end_date,
|
decompose_by_time_range,
|
||||||
**wip_params,
|
execute_plan,
|
||||||
**rej_params,
|
merge_chunks,
|
||||||
}
|
compute_query_hash,
|
||||||
df = read_sql_df(sql, bind_params)
|
should_decompose_by_time,
|
||||||
if df is None:
|
)
|
||||||
logger.error("Station detection query returned None (station=%s)", station)
|
|
||||||
return None
|
if should_decompose_by_time(start_date, end_date):
|
||||||
|
# --- Engine path for long date ranges ---
|
||||||
|
engine_chunks = decompose_by_time_range(start_date, end_date)
|
||||||
|
engine_hash = compute_query_hash({
|
||||||
|
"station": station,
|
||||||
|
"start_date": start_date,
|
||||||
|
"end_date": end_date,
|
||||||
|
})
|
||||||
|
|
||||||
|
def _run_detection_chunk(chunk, max_rows_per_chunk=None):
|
||||||
|
chunk_params = {
|
||||||
|
'start_date': chunk['chunk_start'],
|
||||||
|
'end_date': chunk['chunk_end'],
|
||||||
|
**wip_params,
|
||||||
|
**rej_params,
|
||||||
|
}
|
||||||
|
result = read_sql_df(sql, chunk_params)
|
||||||
|
return result if result is not None else pd.DataFrame()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Engine activated for detection (%s): %d chunks",
|
||||||
|
station, len(engine_chunks),
|
||||||
|
)
|
||||||
|
execute_plan(
|
||||||
|
engine_chunks, _run_detection_chunk,
|
||||||
|
query_hash=engine_hash,
|
||||||
|
cache_prefix="msd_detect",
|
||||||
|
chunk_ttl=CACHE_TTL_DETECTION,
|
||||||
|
)
|
||||||
|
df = merge_chunks("msd_detect", engine_hash)
|
||||||
|
else:
|
||||||
|
# --- Direct path (short query) ---
|
||||||
|
bind_params = {
|
||||||
|
'start_date': start_date,
|
||||||
|
'end_date': end_date,
|
||||||
|
**wip_params,
|
||||||
|
**rej_params,
|
||||||
|
}
|
||||||
|
df = read_sql_df(sql, bind_params)
|
||||||
|
if df is None:
|
||||||
|
logger.error("Station detection query returned None (station=%s)", station)
|
||||||
|
return None
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Station detection (%s): %d rows, %d unique lots",
|
"Station detection (%s): %d rows, %d unique lots",
|
||||||
station,
|
station,
|
||||||
|
|||||||
@@ -40,13 +40,13 @@ except ImportError:
|
|||||||
logger = logging.getLogger('mes_dashboard.query_tool')
|
logger = logging.getLogger('mes_dashboard.query_tool')
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
BATCH_SIZE = 1000 # Oracle IN clause limit
|
BATCH_SIZE = 1000 # Oracle IN clause limit
|
||||||
MAX_LOT_IDS = 100
|
MAX_LOT_IDS = 100
|
||||||
MAX_SERIAL_NUMBERS = 100
|
MAX_SERIAL_NUMBERS = 100
|
||||||
MAX_WORK_ORDERS = 50
|
MAX_WORK_ORDERS = 50
|
||||||
MAX_GD_WORK_ORDERS = 100
|
MAX_GD_WORK_ORDERS = 100
|
||||||
MAX_EQUIPMENTS = 20
|
MAX_EQUIPMENTS = 20
|
||||||
MAX_DATE_RANGE_DAYS = 365
|
MAX_DATE_RANGE_DAYS = 365
|
||||||
DEFAULT_TIME_WINDOW_HOURS = 168 # 1 week for better PJ_TYPE detection
|
DEFAULT_TIME_WINDOW_HOURS = 168 # 1 week for better PJ_TYPE detection
|
||||||
ADJACENT_LOTS_COUNT = 3
|
ADJACENT_LOTS_COUNT = 3
|
||||||
|
|
||||||
@@ -102,14 +102,14 @@ def validate_lot_input(input_type: str, values: List[str]) -> Optional[str]:
|
|||||||
if not values:
|
if not values:
|
||||||
return '請輸入至少一個查詢條件'
|
return '請輸入至少一個查詢條件'
|
||||||
|
|
||||||
limits = {
|
limits = {
|
||||||
'lot_id': MAX_LOT_IDS,
|
'lot_id': MAX_LOT_IDS,
|
||||||
'wafer_lot': MAX_LOT_IDS,
|
'wafer_lot': MAX_LOT_IDS,
|
||||||
'gd_lot_id': MAX_LOT_IDS,
|
'gd_lot_id': MAX_LOT_IDS,
|
||||||
'serial_number': MAX_SERIAL_NUMBERS,
|
'serial_number': MAX_SERIAL_NUMBERS,
|
||||||
'work_order': MAX_WORK_ORDERS,
|
'work_order': MAX_WORK_ORDERS,
|
||||||
'gd_work_order': MAX_GD_WORK_ORDERS,
|
'gd_work_order': MAX_GD_WORK_ORDERS,
|
||||||
}
|
}
|
||||||
|
|
||||||
limit = limits.get(input_type, MAX_LOT_IDS)
|
limit = limits.get(input_type, MAX_LOT_IDS)
|
||||||
if len(values) > limit:
|
if len(values) > limit:
|
||||||
@@ -385,7 +385,7 @@ def _resolve_by_lot_id(lot_ids: List[str]) -> Dict[str, Any]:
|
|||||||
CONTAINER_FILTER=builder.get_conditions_sql(),
|
CONTAINER_FILTER=builder.get_conditions_sql(),
|
||||||
)
|
)
|
||||||
|
|
||||||
df = read_sql_df(sql, builder.params)
|
df = read_sql_df_slow(sql, builder.params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
matched, not_found, expansion_info = _match_rows_by_tokens(
|
matched, not_found, expansion_info = _match_rows_by_tokens(
|
||||||
lot_ids,
|
lot_ids,
|
||||||
@@ -424,7 +424,7 @@ def _resolve_by_wafer_lot(wafer_lots: List[str]) -> Dict[str, Any]:
|
|||||||
WAFER_FILTER=builder.get_conditions_sql(),
|
WAFER_FILTER=builder.get_conditions_sql(),
|
||||||
)
|
)
|
||||||
|
|
||||||
df = read_sql_df(sql, builder.params)
|
df = read_sql_df_slow(sql, builder.params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
matched, not_found, expansion_info = _match_rows_by_tokens(
|
matched, not_found, expansion_info = _match_rows_by_tokens(
|
||||||
wafer_lots,
|
wafer_lots,
|
||||||
@@ -482,7 +482,7 @@ def _resolve_by_gd_lot_id(gd_lot_ids: List[str]) -> Dict[str, Any]:
|
|||||||
CONTAINER_FILTER=builder.get_conditions_sql(),
|
CONTAINER_FILTER=builder.get_conditions_sql(),
|
||||||
)
|
)
|
||||||
|
|
||||||
df = read_sql_df(sql, builder.params)
|
df = read_sql_df_slow(sql, builder.params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
matched, not_found, expansion_info = _match_rows_by_tokens(
|
matched, not_found, expansion_info = _match_rows_by_tokens(
|
||||||
gd_lot_ids,
|
gd_lot_ids,
|
||||||
@@ -574,7 +574,7 @@ def _resolve_by_serial_number(serial_numbers: List[str]) -> Dict[str, Any]:
|
|||||||
config['sql_name'],
|
config['sql_name'],
|
||||||
**{config['filter_key']: builder.get_conditions_sql()},
|
**{config['filter_key']: builder.get_conditions_sql()},
|
||||||
)
|
)
|
||||||
df = read_sql_df(sql, builder.params)
|
df = read_sql_df_slow(sql, builder.params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
matched, _, _ = _match_rows_by_tokens(
|
matched, _, _ = _match_rows_by_tokens(
|
||||||
tokens,
|
tokens,
|
||||||
@@ -660,7 +660,7 @@ def _resolve_by_work_order(work_orders: List[str]) -> Dict[str, Any]:
|
|||||||
WORK_ORDER_FILTER=builder.get_conditions_sql(),
|
WORK_ORDER_FILTER=builder.get_conditions_sql(),
|
||||||
)
|
)
|
||||||
|
|
||||||
df = read_sql_df(sql, builder.params)
|
df = read_sql_df_slow(sql, builder.params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
matched, not_found, expansion_info = _match_rows_by_tokens(
|
matched, not_found, expansion_info = _match_rows_by_tokens(
|
||||||
work_orders,
|
work_orders,
|
||||||
@@ -703,7 +703,7 @@ def _resolve_by_gd_work_order(work_orders: List[str]) -> Dict[str, Any]:
|
|||||||
WORK_ORDER_FILTER=builder.get_conditions_sql(),
|
WORK_ORDER_FILTER=builder.get_conditions_sql(),
|
||||||
)
|
)
|
||||||
|
|
||||||
df = read_sql_df(sql, builder.params)
|
df = read_sql_df_slow(sql, builder.params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
matched, not_found, expansion_info = _match_rows_by_tokens(
|
matched, not_found, expansion_info = _match_rows_by_tokens(
|
||||||
work_orders,
|
work_orders,
|
||||||
@@ -853,7 +853,7 @@ def get_adjacent_lots(
|
|||||||
'time_window_hours': time_window_hours,
|
'time_window_hours': time_window_hours,
|
||||||
}
|
}
|
||||||
|
|
||||||
df = read_sql_df(sql, params)
|
df = read_sql_df_slow(sql, params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
logger.debug(f"Adjacent lots: {len(data)} records for {equipment_id}")
|
logger.debug(f"Adjacent lots: {len(data)} records for {equipment_id}")
|
||||||
@@ -1127,11 +1127,8 @@ def get_lot_split_merge_history(
|
|||||||
f"Starting split/merge history query for MFGORDERNAME={work_order} mode={mode}"
|
f"Starting split/merge history query for MFGORDERNAME={work_order} mode={mode}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if full_history:
|
# Both modes use slow query path for timeout protection.
|
||||||
# Full mode uses dedicated slow query timeout path.
|
df = read_sql_df_slow(sql, params)
|
||||||
df = read_sql_df_slow(sql, params)
|
|
||||||
else:
|
|
||||||
df = read_sql_df(sql, params)
|
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
# Process records for display
|
# Process records for display
|
||||||
@@ -1209,7 +1206,7 @@ def _get_mfg_order_for_lot(container_id: str) -> Optional[str]:
|
|||||||
WHERE CONTAINERID = :container_id
|
WHERE CONTAINERID = :container_id
|
||||||
AND MFGORDERNAME IS NOT NULL
|
AND MFGORDERNAME IS NOT NULL
|
||||||
"""
|
"""
|
||||||
df = read_sql_df(sql, {'container_id': container_id})
|
df = read_sql_df_slow(sql, {'container_id': container_id})
|
||||||
if not df.empty:
|
if not df.empty:
|
||||||
return df.iloc[0]['MFGORDERNAME']
|
return df.iloc[0]['MFGORDERNAME']
|
||||||
return None
|
return None
|
||||||
@@ -1304,7 +1301,7 @@ def get_lot_splits(
|
|||||||
sql = SQLLoader.load("query_tool/lot_splits")
|
sql = SQLLoader.load("query_tool/lot_splits")
|
||||||
params = {'container_id': container_id}
|
params = {'container_id': container_id}
|
||||||
|
|
||||||
df = read_sql_df(sql, params)
|
df = read_sql_df_slow(sql, params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
# Group by FINISHEDNAME to show combined structure
|
# Group by FINISHEDNAME to show combined structure
|
||||||
@@ -1395,7 +1392,7 @@ def get_lot_jobs(
|
|||||||
'time_end': end,
|
'time_end': end,
|
||||||
}
|
}
|
||||||
|
|
||||||
df = read_sql_df(sql, params)
|
df = read_sql_df_slow(sql, params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
logger.debug(f"LOT jobs: {len(data)} records for {equipment_id}")
|
logger.debug(f"LOT jobs: {len(data)} records for {equipment_id}")
|
||||||
@@ -1452,7 +1449,7 @@ def get_lot_jobs_with_history(
|
|||||||
'time_end': end,
|
'time_end': end,
|
||||||
}
|
}
|
||||||
|
|
||||||
df = read_sql_df(sql, params)
|
df = read_sql_df_slow(sql, params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@@ -1503,16 +1500,33 @@ def get_equipment_status_hours(
|
|||||||
return {'error': validation_error}
|
return {'error': validation_error}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
builder = QueryBuilder()
|
from mes_dashboard.services.batch_query_engine import compute_query_hash
|
||||||
builder.add_in_condition("r.RESOURCEID", equipment_ids)
|
from mes_dashboard.core.redis_df_store import redis_load_df, redis_store_df
|
||||||
sql = SQLLoader.load_with_params(
|
|
||||||
"query_tool/equipment_status_hours",
|
cache_hash = compute_query_hash({
|
||||||
EQUIPMENT_FILTER=builder.get_conditions_sql(),
|
"fn": "equipment_status_hours",
|
||||||
)
|
"equipment_ids": sorted(equipment_ids),
|
||||||
|
"start_date": start_date,
|
||||||
|
"end_date": end_date,
|
||||||
|
})
|
||||||
|
cache_key = f"qt:equip_status:{cache_hash}"
|
||||||
|
cached_df = redis_load_df(cache_key)
|
||||||
|
|
||||||
|
if cached_df is not None:
|
||||||
|
df = cached_df
|
||||||
|
else:
|
||||||
|
builder = QueryBuilder()
|
||||||
|
builder.add_in_condition("r.RESOURCEID", equipment_ids)
|
||||||
|
sql = SQLLoader.load_with_params(
|
||||||
|
"query_tool/equipment_status_hours",
|
||||||
|
EQUIPMENT_FILTER=builder.get_conditions_sql(),
|
||||||
|
)
|
||||||
|
params = {'start_date': start_date, 'end_date': end_date}
|
||||||
|
params.update(builder.params)
|
||||||
|
df = read_sql_df_slow(sql, params)
|
||||||
|
if df is not None and not df.empty:
|
||||||
|
redis_store_df(cache_key, df, ttl=300)
|
||||||
|
|
||||||
params = {'start_date': start_date, 'end_date': end_date}
|
|
||||||
params.update(builder.params)
|
|
||||||
df = read_sql_df(sql, params)
|
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
# Calculate totals
|
# Calculate totals
|
||||||
@@ -1584,7 +1598,7 @@ def get_equipment_lots(
|
|||||||
|
|
||||||
params = {'start_date': start_date, 'end_date': end_date}
|
params = {'start_date': start_date, 'end_date': end_date}
|
||||||
params.update(builder.params)
|
params.update(builder.params)
|
||||||
df = read_sql_df(sql, params)
|
df = read_sql_df_slow(sql, params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
logger.info(f"Equipment lots: {len(data)} records")
|
logger.info(f"Equipment lots: {len(data)} records")
|
||||||
@@ -1634,7 +1648,7 @@ def get_equipment_materials(
|
|||||||
|
|
||||||
params = {'start_date': start_date, 'end_date': end_date}
|
params = {'start_date': start_date, 'end_date': end_date}
|
||||||
params.update(builder.params)
|
params.update(builder.params)
|
||||||
df = read_sql_df(sql, params)
|
df = read_sql_df_slow(sql, params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
logger.info(f"Equipment materials: {len(data)} records")
|
logger.info(f"Equipment materials: {len(data)} records")
|
||||||
@@ -1684,7 +1698,7 @@ def get_equipment_rejects(
|
|||||||
|
|
||||||
params = {'start_date': start_date, 'end_date': end_date}
|
params = {'start_date': start_date, 'end_date': end_date}
|
||||||
params.update(builder.params)
|
params.update(builder.params)
|
||||||
df = read_sql_df(sql, params)
|
df = read_sql_df_slow(sql, params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
logger.info(f"Equipment rejects: {len(data)} records")
|
logger.info(f"Equipment rejects: {len(data)} records")
|
||||||
@@ -1736,7 +1750,7 @@ def get_equipment_jobs(
|
|||||||
|
|
||||||
params = {'start_date': start_date, 'end_date': end_date}
|
params = {'start_date': start_date, 'end_date': end_date}
|
||||||
params.update(builder.params)
|
params.update(builder.params)
|
||||||
df = read_sql_df(sql, params)
|
df = read_sql_df_slow(sql, params)
|
||||||
data = _df_to_records(df)
|
data = _df_to_records(df)
|
||||||
|
|
||||||
logger.info(f"Equipment jobs: {len(data)} records")
|
logger.info(f"Equipment jobs: {len(data)} records")
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -11,9 +11,7 @@ Cache layers:
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
@@ -24,11 +22,7 @@ import pandas as pd
|
|||||||
|
|
||||||
from mes_dashboard.core.cache import ProcessLevelCache, register_process_cache
|
from mes_dashboard.core.cache import ProcessLevelCache, register_process_cache
|
||||||
from mes_dashboard.core.database import read_sql_df_slow as read_sql_df
|
from mes_dashboard.core.database import read_sql_df_slow as read_sql_df
|
||||||
from mes_dashboard.core.redis_client import (
|
from mes_dashboard.core.redis_df_store import redis_load_df, redis_store_df
|
||||||
REDIS_ENABLED,
|
|
||||||
get_key,
|
|
||||||
get_redis_client,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = logging.getLogger("mes_dashboard.resource_dataset_cache")
|
logger = logging.getLogger("mes_dashboard.resource_dataset_cache")
|
||||||
|
|
||||||
@@ -67,44 +61,16 @@ def _make_query_id(params: dict) -> str:
|
|||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# Redis L2 helpers (parquet <-> base64 string)
|
# Redis L2 helpers (delegated to shared redis_df_store)
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
def _redis_key(query_id: str) -> str:
|
|
||||||
return get_key(f"{_REDIS_NAMESPACE}:{query_id}")
|
|
||||||
|
|
||||||
|
|
||||||
def _redis_store_df(query_id: str, df: pd.DataFrame) -> None:
|
def _redis_store_df(query_id: str, df: pd.DataFrame) -> None:
|
||||||
if not REDIS_ENABLED:
|
redis_store_df(f"{_REDIS_NAMESPACE}:{query_id}", df, ttl=_CACHE_TTL)
|
||||||
return
|
|
||||||
client = get_redis_client()
|
|
||||||
if client is None:
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
buf = io.BytesIO()
|
|
||||||
df.to_parquet(buf, engine="pyarrow", index=False)
|
|
||||||
encoded = base64.b64encode(buf.getvalue()).decode("ascii")
|
|
||||||
client.setex(_redis_key(query_id), _CACHE_TTL, encoded)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning("Failed to store DataFrame in Redis: %s", exc)
|
|
||||||
|
|
||||||
|
|
||||||
def _redis_load_df(query_id: str) -> Optional[pd.DataFrame]:
|
def _redis_load_df(query_id: str) -> Optional[pd.DataFrame]:
|
||||||
if not REDIS_ENABLED:
|
return redis_load_df(f"{_REDIS_NAMESPACE}:{query_id}")
|
||||||
return None
|
|
||||||
client = get_redis_client()
|
|
||||||
if client is None:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
encoded = client.get(_redis_key(query_id))
|
|
||||||
if encoded is None:
|
|
||||||
return None
|
|
||||||
raw = base64.b64decode(encoded)
|
|
||||||
return pd.read_parquet(io.BytesIO(raw), engine="pyarrow")
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning("Failed to load DataFrame from Redis: %s", exc)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
@@ -233,12 +199,47 @@ def execute_primary_query(
|
|||||||
"detail": _empty_detail(),
|
"detail": _empty_detail(),
|
||||||
}
|
}
|
||||||
|
|
||||||
sql = _load_sql("base_facts")
|
from mes_dashboard.services.batch_query_engine import (
|
||||||
sql = sql.replace("{{ HISTORYID_FILTER }}", historyid_filter)
|
decompose_by_time_range,
|
||||||
params = {"start_date": start_date, "end_date": end_date}
|
execute_plan,
|
||||||
df = read_sql_df(sql, params)
|
merge_chunks,
|
||||||
if df is None:
|
compute_query_hash,
|
||||||
df = pd.DataFrame()
|
should_decompose_by_time,
|
||||||
|
)
|
||||||
|
|
||||||
|
base_sql = _load_sql("base_facts")
|
||||||
|
base_sql = base_sql.replace("{{ HISTORYID_FILTER }}", historyid_filter)
|
||||||
|
|
||||||
|
if should_decompose_by_time(start_date, end_date):
|
||||||
|
# --- Engine path for long date ranges ---
|
||||||
|
engine_chunks = decompose_by_time_range(start_date, end_date)
|
||||||
|
engine_hash = compute_query_hash(query_id_input)
|
||||||
|
|
||||||
|
def _run_resource_chunk(chunk, max_rows_per_chunk=None):
|
||||||
|
params = {
|
||||||
|
"start_date": chunk["chunk_start"],
|
||||||
|
"end_date": chunk["chunk_end"],
|
||||||
|
}
|
||||||
|
result = read_sql_df(base_sql, params)
|
||||||
|
return result if result is not None else pd.DataFrame()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Engine activated for resource: %d chunks (query_id=%s)",
|
||||||
|
len(engine_chunks), query_id,
|
||||||
|
)
|
||||||
|
execute_plan(
|
||||||
|
engine_chunks, _run_resource_chunk,
|
||||||
|
query_hash=engine_hash,
|
||||||
|
cache_prefix="resource",
|
||||||
|
chunk_ttl=_CACHE_TTL,
|
||||||
|
)
|
||||||
|
df = merge_chunks("resource", engine_hash)
|
||||||
|
else:
|
||||||
|
# --- Direct path (short query) ---
|
||||||
|
params = {"start_date": start_date, "end_date": end_date}
|
||||||
|
df = read_sql_df(base_sql, params)
|
||||||
|
if df is None:
|
||||||
|
df = pd.DataFrame()
|
||||||
|
|
||||||
if not df.empty:
|
if not df.empty:
|
||||||
_store_df(query_id, df)
|
_store_df(query_id, df)
|
||||||
|
|||||||
98
tests/e2e/test_reject_history_e2e.py
Normal file
98
tests/e2e/test_reject_history_e2e.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""E2E tests for reject-history long-range query flow."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
|
||||||
|
def _post_reject_query(app_server: str, body: dict, timeout: float = 420.0) -> requests.Response:
|
||||||
|
return requests.post(
|
||||||
|
f"{app_server}/api/reject-history/query",
|
||||||
|
json=body,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.e2e
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("RUN_LONG_E2E") != "1",
|
||||||
|
reason="Long-range reject-history E2E disabled; set RUN_LONG_E2E=1 to run.",
|
||||||
|
)
|
||||||
|
class TestRejectHistoryLongRangeE2E:
|
||||||
|
"""Real backend E2E checks for long-range reject history query."""
|
||||||
|
|
||||||
|
def test_query_365_day_range_returns_success(self, app_server: str):
|
||||||
|
response = _post_reject_query(
|
||||||
|
app_server,
|
||||||
|
{
|
||||||
|
"mode": "date_range",
|
||||||
|
"start_date": "2025-01-01",
|
||||||
|
"end_date": "2025-12-31",
|
||||||
|
"include_excluded_scrap": False,
|
||||||
|
"exclude_material_scrap": True,
|
||||||
|
"exclude_pb_diode": True,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 200, response.text[:500]
|
||||||
|
payload = response.json()
|
||||||
|
assert payload.get("success") is True, payload
|
||||||
|
assert payload.get("query_id")
|
||||||
|
|
||||||
|
def test_query_then_view_returns_cached_result(self, app_server: str):
|
||||||
|
query_resp = _post_reject_query(
|
||||||
|
app_server,
|
||||||
|
{
|
||||||
|
"mode": "date_range",
|
||||||
|
"start_date": "2025-01-01",
|
||||||
|
"end_date": "2025-12-31",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert query_resp.status_code == 200, query_resp.text[:500]
|
||||||
|
query_payload = query_resp.json()
|
||||||
|
assert query_payload.get("success") is True, query_payload
|
||||||
|
query_id = query_payload.get("query_id")
|
||||||
|
assert query_id
|
||||||
|
|
||||||
|
view_resp = requests.get(
|
||||||
|
f"{app_server}/api/reject-history/view",
|
||||||
|
params={
|
||||||
|
"query_id": query_id,
|
||||||
|
"page": 1,
|
||||||
|
"per_page": 50,
|
||||||
|
"exclude_material_scrap": "true",
|
||||||
|
"exclude_pb_diode": "true",
|
||||||
|
},
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
assert view_resp.status_code == 200, view_resp.text[:500]
|
||||||
|
view_payload = view_resp.json()
|
||||||
|
assert view_payload.get("success") is True, view_payload
|
||||||
|
|
||||||
|
def test_query_then_export_cached_returns_csv(self, app_server: str):
|
||||||
|
query_resp = _post_reject_query(
|
||||||
|
app_server,
|
||||||
|
{
|
||||||
|
"mode": "date_range",
|
||||||
|
"start_date": "2025-01-01",
|
||||||
|
"end_date": "2025-12-31",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert query_resp.status_code == 200, query_resp.text[:500]
|
||||||
|
query_payload = query_resp.json()
|
||||||
|
assert query_payload.get("success") is True, query_payload
|
||||||
|
query_id = query_payload.get("query_id")
|
||||||
|
assert query_id
|
||||||
|
|
||||||
|
export_resp = requests.get(
|
||||||
|
f"{app_server}/api/reject-history/export-cached",
|
||||||
|
params={"query_id": query_id},
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
assert export_resp.status_code == 200, export_resp.text[:300]
|
||||||
|
assert "text/csv" in export_resp.headers.get("Content-Type", "")
|
||||||
|
assert "LOT" in export_resp.text[:200]
|
||||||
102
tests/stress/test_reject_history_stress.py
Normal file
102
tests/stress/test_reject_history_stress.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Stress tests for reject-history long-range query stability."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import concurrent.futures
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import requests
|
||||||
|
|
||||||
|
try:
|
||||||
|
import redis
|
||||||
|
except Exception: # pragma: no cover - optional runtime dependency
|
||||||
|
redis = None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.stress
|
||||||
|
@pytest.mark.load
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("RUN_LONG_STRESS") != "1",
|
||||||
|
reason="Long-range reject-history stress disabled; set RUN_LONG_STRESS=1 to run.",
|
||||||
|
)
|
||||||
|
class TestRejectHistoryLongRangeStress:
|
||||||
|
"""Concurrent long-range reject-history queries should stay recoverable."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _redis_used_memory_bytes() -> int | None:
|
||||||
|
if redis is None:
|
||||||
|
return None
|
||||||
|
redis_url = os.environ.get("STRESS_REDIS_URL", os.environ.get("REDIS_URL", "redis://localhost:6379/0"))
|
||||||
|
try:
|
||||||
|
client = redis.Redis.from_url(redis_url, decode_responses=True)
|
||||||
|
info = client.info("memory")
|
||||||
|
used = info.get("used_memory")
|
||||||
|
return int(used) if used is not None else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _run_query(base_url: str, timeout: float, seed: int) -> tuple[bool, float, str]:
|
||||||
|
start = time.time()
|
||||||
|
try:
|
||||||
|
year = 2024 + (seed % 2)
|
||||||
|
response = requests.post(
|
||||||
|
f"{base_url}/api/reject-history/query",
|
||||||
|
json={
|
||||||
|
"mode": "date_range",
|
||||||
|
"start_date": f"{year}-01-01",
|
||||||
|
"end_date": f"{year}-12-31",
|
||||||
|
"exclude_material_scrap": True,
|
||||||
|
"exclude_pb_diode": True,
|
||||||
|
},
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
duration = time.time() - start
|
||||||
|
if response.status_code != 200:
|
||||||
|
return False, duration, f"HTTP {response.status_code}"
|
||||||
|
payload = response.json()
|
||||||
|
if payload.get("success") is True and payload.get("query_id"):
|
||||||
|
return True, duration, ""
|
||||||
|
return False, duration, f"success={payload.get('success')} error={payload.get('error')}"
|
||||||
|
except Exception as exc: # pragma: no cover - runtime/network dependent
|
||||||
|
return False, time.time() - start, str(exc)[:180]
|
||||||
|
|
||||||
|
def test_concurrent_365_day_queries_no_crash(self, base_url: str, stress_result):
|
||||||
|
result = stress_result("Reject History Long-Range Concurrent")
|
||||||
|
timeout = float(os.environ.get("STRESS_REJECT_HISTORY_TIMEOUT", "420"))
|
||||||
|
concurrent_users = int(os.environ.get("STRESS_REJECT_HISTORY_CONCURRENCY", "3"))
|
||||||
|
rounds = int(os.environ.get("STRESS_REJECT_HISTORY_ROUNDS", "2"))
|
||||||
|
max_redis_delta_mb = int(os.environ.get("STRESS_REJECT_REDIS_MAX_DELTA_MB", "256"))
|
||||||
|
total_requests = concurrent_users * rounds
|
||||||
|
redis_before = self._redis_used_memory_bytes()
|
||||||
|
|
||||||
|
started = time.time()
|
||||||
|
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_users) as executor:
|
||||||
|
futures = [
|
||||||
|
executor.submit(self._run_query, base_url, timeout, idx)
|
||||||
|
for idx in range(total_requests)
|
||||||
|
]
|
||||||
|
for future in concurrent.futures.as_completed(futures):
|
||||||
|
ok, duration, error = future.result()
|
||||||
|
if ok:
|
||||||
|
result.add_success(duration)
|
||||||
|
else:
|
||||||
|
result.add_failure(error, duration)
|
||||||
|
result.total_duration = time.time() - started
|
||||||
|
|
||||||
|
print(result.report())
|
||||||
|
assert result.total_requests == total_requests
|
||||||
|
assert result.success_rate >= 90.0, f"Success rate too low: {result.success_rate:.2f}%"
|
||||||
|
|
||||||
|
health_resp = requests.get(f"{base_url}/health", timeout=10)
|
||||||
|
assert health_resp.status_code in (200, 503)
|
||||||
|
|
||||||
|
redis_after = self._redis_used_memory_bytes()
|
||||||
|
if redis_before is not None and redis_after is not None:
|
||||||
|
delta_mb = (redis_after - redis_before) / (1024 * 1024)
|
||||||
|
assert delta_mb <= max_redis_delta_mb, (
|
||||||
|
f"Redis memory delta too high: {delta_mb:.1f}MB > {max_redis_delta_mb}MB"
|
||||||
|
)
|
||||||
576
tests/test_batch_query_engine.py
Normal file
576
tests/test_batch_query_engine.py
Normal file
@@ -0,0 +1,576 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Unit tests for BatchQueryEngine module."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock, call
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.services.batch_query_engine import (
|
||||||
|
compute_query_hash,
|
||||||
|
decompose_by_ids,
|
||||||
|
decompose_by_time_range,
|
||||||
|
execute_plan,
|
||||||
|
merge_chunks,
|
||||||
|
iterate_chunks,
|
||||||
|
should_decompose_by_time,
|
||||||
|
should_decompose_by_ids,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.1 decompose_by_time_range
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestDecomposeByTimeRange:
|
||||||
|
def test_90_days_yields_3_chunks(self):
|
||||||
|
chunks = decompose_by_time_range("2025-01-01", "2025-03-31", grain_days=31)
|
||||||
|
assert len(chunks) == 3
|
||||||
|
# First chunk: Jan 1 – Jan 31
|
||||||
|
assert chunks[0] == {"chunk_start": "2025-01-01", "chunk_end": "2025-01-31"}
|
||||||
|
# Second chunk: Feb 1 – Mar 3
|
||||||
|
assert chunks[1]["chunk_start"] == "2025-02-01"
|
||||||
|
# Third chunk ends Mar 31
|
||||||
|
assert chunks[2]["chunk_end"] == "2025-03-31"
|
||||||
|
|
||||||
|
def test_31_days_yields_1_chunk(self):
|
||||||
|
chunks = decompose_by_time_range("2025-01-01", "2025-01-31", grain_days=31)
|
||||||
|
assert len(chunks) == 1
|
||||||
|
assert chunks[0] == {"chunk_start": "2025-01-01", "chunk_end": "2025-01-31"}
|
||||||
|
|
||||||
|
def test_single_day(self):
|
||||||
|
chunks = decompose_by_time_range("2025-06-15", "2025-06-15")
|
||||||
|
assert len(chunks) == 1
|
||||||
|
assert chunks[0] == {"chunk_start": "2025-06-15", "chunk_end": "2025-06-15"}
|
||||||
|
|
||||||
|
def test_contiguous_no_overlap_no_gap(self):
|
||||||
|
"""Verify closed-interval boundary semantics: no overlap, no gap."""
|
||||||
|
chunks = decompose_by_time_range("2025-01-01", "2025-06-30", grain_days=31)
|
||||||
|
for i in range(1, len(chunks)):
|
||||||
|
prev_end = chunks[i - 1]["chunk_end"]
|
||||||
|
cur_start = chunks[i]["chunk_start"]
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
prev_dt = datetime.strptime(prev_end, "%Y-%m-%d")
|
||||||
|
cur_dt = datetime.strptime(cur_start, "%Y-%m-%d")
|
||||||
|
assert cur_dt == prev_dt + timedelta(days=1), (
|
||||||
|
f"Gap/overlap between chunk {i-1} end={prev_end} and chunk {i} start={cur_start}"
|
||||||
|
)
|
||||||
|
# First starts at start_date, last ends at end_date
|
||||||
|
assert chunks[0]["chunk_start"] == "2025-01-01"
|
||||||
|
assert chunks[-1]["chunk_end"] == "2025-06-30"
|
||||||
|
|
||||||
|
def test_final_chunk_may_be_shorter(self):
|
||||||
|
chunks = decompose_by_time_range("2025-01-01", "2025-02-10", grain_days=31)
|
||||||
|
assert len(chunks) == 2
|
||||||
|
# Second chunk: Feb 1 – Feb 10 (10 days < 31)
|
||||||
|
assert chunks[1] == {"chunk_start": "2025-02-01", "chunk_end": "2025-02-10"}
|
||||||
|
|
||||||
|
def test_inverted_range_raises(self):
|
||||||
|
with pytest.raises(ValueError, match="must be <="):
|
||||||
|
decompose_by_time_range("2025-12-31", "2025-01-01")
|
||||||
|
|
||||||
|
def test_365_days(self):
|
||||||
|
chunks = decompose_by_time_range("2025-01-01", "2025-12-31", grain_days=31)
|
||||||
|
assert len(chunks) == 12 # roughly 365/31 ≈ 12
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.2 decompose_by_ids
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestDecomposeByIds:
|
||||||
|
def test_2500_ids_yields_3_batches(self):
|
||||||
|
ids = list(range(2500))
|
||||||
|
batches = decompose_by_ids(ids, batch_size=1000)
|
||||||
|
assert len(batches) == 3
|
||||||
|
assert len(batches[0]) == 1000
|
||||||
|
assert len(batches[1]) == 1000
|
||||||
|
assert len(batches[2]) == 500
|
||||||
|
|
||||||
|
def test_500_ids_yields_1_batch(self):
|
||||||
|
ids = list(range(500))
|
||||||
|
batches = decompose_by_ids(ids, batch_size=1000)
|
||||||
|
assert len(batches) == 1
|
||||||
|
assert len(batches[0]) == 500
|
||||||
|
|
||||||
|
def test_empty_ids(self):
|
||||||
|
assert decompose_by_ids([]) == []
|
||||||
|
|
||||||
|
def test_exact_batch_size(self):
|
||||||
|
ids = list(range(1000))
|
||||||
|
batches = decompose_by_ids(ids, batch_size=1000)
|
||||||
|
assert len(batches) == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.3 execute_plan sequential
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestExecutePlanSequential:
|
||||||
|
def _mock_redis(self):
|
||||||
|
"""Set up mock redis for chunk store/load/exists."""
|
||||||
|
stored = {}
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||||
|
mock_client.hset.return_value = None
|
||||||
|
mock_client.expire.return_value = None
|
||||||
|
return mock_client, stored
|
||||||
|
|
||||||
|
def test_sequential_execution_stores_chunks(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
mock_client, stored = self._mock_redis()
|
||||||
|
|
||||||
|
call_log = []
|
||||||
|
|
||||||
|
def fake_query_fn(chunk, max_rows_per_chunk=None):
|
||||||
|
call_log.append(chunk)
|
||||||
|
return pd.DataFrame({"V": [1, 2]})
|
||||||
|
|
||||||
|
chunks = [
|
||||||
|
{"chunk_start": "2025-01-01", "chunk_end": "2025-01-31"},
|
||||||
|
{"chunk_start": "2025-02-01", "chunk_end": "2025-02-28"},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||||
|
qh = execute_plan(
|
||||||
|
chunks, fake_query_fn,
|
||||||
|
query_hash="testhash",
|
||||||
|
cache_prefix="test",
|
||||||
|
skip_cached=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert qh == "testhash"
|
||||||
|
assert len(call_log) == 2
|
||||||
|
# Chunks should be stored in Redis
|
||||||
|
assert any("chunk:0" in k for k in stored)
|
||||||
|
assert any("chunk:1" in k for k in stored)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.4 execute_plan parallel
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestExecutePlanParallel:
|
||||||
|
def test_parallel_uses_threadpool(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||||
|
mock_client.hset.return_value = None
|
||||||
|
mock_client.expire.return_value = None
|
||||||
|
|
||||||
|
call_count = {"n": 0}
|
||||||
|
|
||||||
|
def fake_query_fn(chunk, max_rows_per_chunk=None):
|
||||||
|
call_count["n"] += 1
|
||||||
|
return pd.DataFrame({"V": [1]})
|
||||||
|
|
||||||
|
chunks = [{"i": i} for i in range(4)]
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "_effective_parallelism", return_value=2):
|
||||||
|
qh = execute_plan(
|
||||||
|
chunks, fake_query_fn,
|
||||||
|
parallel=2,
|
||||||
|
query_hash="ptest",
|
||||||
|
cache_prefix="p",
|
||||||
|
skip_cached=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert call_count["n"] == 4
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.5 partial cache hit
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestPartialCacheHit:
|
||||||
|
def test_skips_cached_chunks(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.hset.return_value = None
|
||||||
|
mock_client.expire.return_value = None
|
||||||
|
|
||||||
|
# Pre-populate chunks 0 and 1 as "cached"
|
||||||
|
pre_cached_keys = set()
|
||||||
|
|
||||||
|
def fake_exists(k):
|
||||||
|
return 1 if k in pre_cached_keys else (1 if k in stored else 0)
|
||||||
|
|
||||||
|
mock_client.exists.side_effect = fake_exists
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||||
|
# Pre-store 2 chunks
|
||||||
|
rds.redis_store_chunk("test", "hash5", 0, pd.DataFrame({"A": [1]}), ttl=60)
|
||||||
|
rds.redis_store_chunk("test", "hash5", 1, pd.DataFrame({"A": [2]}), ttl=60)
|
||||||
|
|
||||||
|
# Now mark those keys as existing
|
||||||
|
pre_cached_keys.update(stored.keys())
|
||||||
|
|
||||||
|
call_log = []
|
||||||
|
|
||||||
|
def fake_query_fn(chunk, max_rows_per_chunk=None):
|
||||||
|
call_log.append(chunk)
|
||||||
|
return pd.DataFrame({"A": [99]})
|
||||||
|
|
||||||
|
chunks = [{"i": i} for i in range(5)]
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||||
|
execute_plan(
|
||||||
|
chunks, fake_query_fn,
|
||||||
|
query_hash="hash5",
|
||||||
|
cache_prefix="test",
|
||||||
|
skip_cached=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Only chunks 2, 3, 4 should have been executed
|
||||||
|
assert len(call_log) == 3
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.6 memory guard
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestMemoryGuard:
|
||||||
|
def test_oversized_chunk_discarded(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||||
|
mock_client.hset.return_value = None
|
||||||
|
mock_client.expire.return_value = None
|
||||||
|
|
||||||
|
def oversized_query_fn(chunk, max_rows_per_chunk=None):
|
||||||
|
# Create DF that reports large memory
|
||||||
|
df = pd.DataFrame({"X": [1]})
|
||||||
|
return df
|
||||||
|
|
||||||
|
chunks = [{"i": 0}]
|
||||||
|
|
||||||
|
# Set memory limit to 0 MB so any DF exceeds it
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "BATCH_CHUNK_MAX_MEMORY_MB", 0):
|
||||||
|
qh = execute_plan(
|
||||||
|
chunks, oversized_query_fn,
|
||||||
|
query_hash="memtest",
|
||||||
|
cache_prefix="m",
|
||||||
|
skip_cached=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Chunk should NOT be stored (memory exceeded)
|
||||||
|
assert not any("chunk:0" in k for k in stored)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.7 result row count limit
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestMaxRowsPerChunk:
|
||||||
|
def test_max_rows_passed_to_query_fn(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.setex.return_value = None
|
||||||
|
mock_client.get.return_value = None
|
||||||
|
mock_client.exists.return_value = 0
|
||||||
|
mock_client.hset.return_value = None
|
||||||
|
mock_client.expire.return_value = None
|
||||||
|
|
||||||
|
received_max_rows = []
|
||||||
|
|
||||||
|
def capture_query_fn(chunk, max_rows_per_chunk=None):
|
||||||
|
received_max_rows.append(max_rows_per_chunk)
|
||||||
|
return pd.DataFrame({"V": [1]})
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||||
|
execute_plan(
|
||||||
|
[{"i": 0}], capture_query_fn,
|
||||||
|
query_hash="rowtest",
|
||||||
|
cache_prefix="r",
|
||||||
|
max_rows_per_chunk=5000,
|
||||||
|
skip_cached=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert received_max_rows == [5000]
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.8 merge_chunks
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestMergeChunks:
|
||||||
|
def test_merge_produces_correct_df(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.hgetall.return_value = {"total": "3", "completed": "3", "failed": "0"}
|
||||||
|
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client):
|
||||||
|
rds.redis_store_chunk("t", "h", 0, pd.DataFrame({"A": [1, 2]}))
|
||||||
|
rds.redis_store_chunk("t", "h", 1, pd.DataFrame({"A": [3, 4]}))
|
||||||
|
rds.redis_store_chunk("t", "h", 2, pd.DataFrame({"A": [5]}))
|
||||||
|
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||||
|
merged = merge_chunks("t", "h")
|
||||||
|
|
||||||
|
assert len(merged) == 5
|
||||||
|
assert list(merged["A"]) == [1, 2, 3, 4, 5]
|
||||||
|
|
||||||
|
def test_merge_respects_max_total_rows(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.hgetall.return_value = {"total": "3", "completed": "3", "failed": "0"}
|
||||||
|
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client):
|
||||||
|
rds.redis_store_chunk("t", "cap", 0, pd.DataFrame({"A": [1, 2]}))
|
||||||
|
rds.redis_store_chunk("t", "cap", 1, pd.DataFrame({"A": [3, 4]}))
|
||||||
|
rds.redis_store_chunk("t", "cap", 2, pd.DataFrame({"A": [5, 6]}))
|
||||||
|
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||||
|
merged = merge_chunks("t", "cap", max_total_rows=4)
|
||||||
|
|
||||||
|
assert len(merged) == 4
|
||||||
|
assert list(merged["A"]) == [1, 2, 3, 4]
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.9 progress tracking
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestProgressTracking:
|
||||||
|
def test_hset_updated_after_each_chunk(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.setex.return_value = None
|
||||||
|
mock_client.get.return_value = None
|
||||||
|
mock_client.exists.return_value = 0
|
||||||
|
mock_client.hset.return_value = None
|
||||||
|
mock_client.expire.return_value = None
|
||||||
|
|
||||||
|
hset_calls = []
|
||||||
|
original_hset = mock_client.hset
|
||||||
|
|
||||||
|
def track_hset(key, mapping=None):
|
||||||
|
hset_calls.append(mapping.copy() if mapping else {})
|
||||||
|
return original_hset(key, mapping=mapping)
|
||||||
|
|
||||||
|
mock_client.hset.side_effect = track_hset
|
||||||
|
|
||||||
|
def fake_query_fn(chunk, max_rows_per_chunk=None):
|
||||||
|
return pd.DataFrame({"V": [1]})
|
||||||
|
|
||||||
|
chunks = [{"i": 0}, {"i": 1}, {"i": 2}]
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||||
|
execute_plan(
|
||||||
|
chunks, fake_query_fn,
|
||||||
|
query_hash="progtest",
|
||||||
|
cache_prefix="p",
|
||||||
|
skip_cached=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should have initial + 3 per-chunk + final = 5 hset calls
|
||||||
|
assert len(hset_calls) >= 4
|
||||||
|
# Last call should show completed status
|
||||||
|
last = hset_calls[-1]
|
||||||
|
assert last["status"] == "completed"
|
||||||
|
assert last["completed"] == "3"
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 4.10 chunk failure resilience
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestChunkFailureResilience:
|
||||||
|
def test_one_chunk_fails_others_complete(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||||
|
mock_client.hset.return_value = None
|
||||||
|
mock_client.expire.return_value = None
|
||||||
|
|
||||||
|
call_count = {"n": 0}
|
||||||
|
|
||||||
|
def failing_query_fn(chunk, max_rows_per_chunk=None):
|
||||||
|
call_count["n"] += 1
|
||||||
|
if chunk.get("i") == 1:
|
||||||
|
raise RuntimeError("Oracle timeout")
|
||||||
|
return pd.DataFrame({"V": [chunk["i"]]})
|
||||||
|
|
||||||
|
chunks = [{"i": 0}, {"i": 1}, {"i": 2}]
|
||||||
|
|
||||||
|
hset_calls = []
|
||||||
|
mock_client.hset.side_effect = lambda k, mapping=None: hset_calls.append(
|
||||||
|
mapping.copy() if mapping else {}
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||||
|
qh = execute_plan(
|
||||||
|
chunks, failing_query_fn,
|
||||||
|
query_hash="failtest",
|
||||||
|
cache_prefix="f",
|
||||||
|
skip_cached=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# All 3 chunks attempted
|
||||||
|
assert call_count["n"] == 3
|
||||||
|
# Final metadata should reflect partial failure
|
||||||
|
last = hset_calls[-1]
|
||||||
|
assert last["status"] == "partial"
|
||||||
|
assert last["completed"] == "2"
|
||||||
|
assert last["failed"] == "1"
|
||||||
|
assert last["has_partial_failure"] == "True"
|
||||||
|
|
||||||
|
def test_chunk_store_failure_is_marked_partial(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||||
|
mock_client.hset.return_value = None
|
||||||
|
mock_client.expire.return_value = None
|
||||||
|
|
||||||
|
def query_fn(chunk, max_rows_per_chunk=None):
|
||||||
|
return pd.DataFrame({"V": [chunk["i"]]})
|
||||||
|
|
||||||
|
original_store_chunk = bqe.redis_store_chunk
|
||||||
|
|
||||||
|
def fail_one_store(prefix, query_hash, idx, df, ttl=900):
|
||||||
|
if idx == 1:
|
||||||
|
return False
|
||||||
|
return original_store_chunk(prefix, query_hash, idx, df, ttl=ttl)
|
||||||
|
|
||||||
|
hset_calls = []
|
||||||
|
mock_client.hset.side_effect = lambda k, mapping=None: hset_calls.append(
|
||||||
|
mapping.copy() if mapping else {}
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "get_redis_client", return_value=mock_client), \
|
||||||
|
patch.object(bqe, "redis_store_chunk", side_effect=fail_one_store):
|
||||||
|
execute_plan(
|
||||||
|
[{"i": 0}, {"i": 1}, {"i": 2}],
|
||||||
|
query_fn,
|
||||||
|
query_hash="storefail",
|
||||||
|
cache_prefix="sf",
|
||||||
|
skip_cached=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
last = hset_calls[-1]
|
||||||
|
assert last["status"] == "partial"
|
||||||
|
assert last["completed"] == "2"
|
||||||
|
assert last["failed"] == "1"
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# query_hash stability
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestQueryHash:
|
||||||
|
def test_same_params_different_order(self):
|
||||||
|
h1 = compute_query_hash({"a": 1, "b": [3, 1, 2]})
|
||||||
|
h2 = compute_query_hash({"b": [2, 1, 3], "a": 1})
|
||||||
|
assert h1 == h2
|
||||||
|
|
||||||
|
def test_different_params_different_hash(self):
|
||||||
|
h1 = compute_query_hash({"mode": "date_range", "start": "2025-01-01"})
|
||||||
|
h2 = compute_query_hash({"mode": "date_range", "start": "2025-06-01"})
|
||||||
|
assert h1 != h2
|
||||||
|
|
||||||
|
def test_hash_is_16_chars(self):
|
||||||
|
h = compute_query_hash({"x": 1})
|
||||||
|
assert len(h) == 16
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# should_decompose helpers
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestShouldDecompose:
|
||||||
|
def test_long_range_true(self):
|
||||||
|
assert should_decompose_by_time("2025-01-01", "2025-12-31")
|
||||||
|
|
||||||
|
def test_short_range_false(self):
|
||||||
|
assert not should_decompose_by_time("2025-01-01", "2025-02-01")
|
||||||
|
|
||||||
|
def test_large_ids_true(self):
|
||||||
|
assert should_decompose_by_ids(list(range(2000)))
|
||||||
|
|
||||||
|
def test_small_ids_false(self):
|
||||||
|
assert not should_decompose_by_ids(list(range(500)))
|
||||||
@@ -117,3 +117,4 @@ def test_runtime_config_includes_fetchmany_size():
|
|||||||
assert "slow_fetchmany_size" in runtime
|
assert "slow_fetchmany_size" in runtime
|
||||||
assert isinstance(runtime["slow_fetchmany_size"], int)
|
assert isinstance(runtime["slow_fetchmany_size"], int)
|
||||||
assert runtime["slow_fetchmany_size"] > 0
|
assert runtime["slow_fetchmany_size"] > 0
|
||||||
|
assert "slow_pool_enabled" in runtime
|
||||||
|
|||||||
101
tests/test_database_slow_pool.py
Normal file
101
tests/test_database_slow_pool.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Unit tests for isolated slow-query pool path."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import mes_dashboard.core.database as db
|
||||||
|
|
||||||
|
|
||||||
|
@patch.object(db, "oracledb")
|
||||||
|
@patch.object(db, "get_slow_engine")
|
||||||
|
@patch.object(db, "_get_slow_query_semaphore")
|
||||||
|
@patch.object(db, "get_db_runtime_config")
|
||||||
|
def test_read_sql_df_slow_uses_slow_pool_when_enabled(
|
||||||
|
mock_runtime,
|
||||||
|
mock_sem_fn,
|
||||||
|
mock_get_slow_engine,
|
||||||
|
mock_oracledb,
|
||||||
|
):
|
||||||
|
"""Slow query should checkout connection from isolated slow pool."""
|
||||||
|
mock_runtime.return_value = {
|
||||||
|
"slow_pool_enabled": True,
|
||||||
|
"slow_call_timeout_ms": 60000,
|
||||||
|
"slow_fetchmany_size": 5000,
|
||||||
|
"tcp_connect_timeout": 10,
|
||||||
|
"retry_count": 1,
|
||||||
|
"retry_delay": 1.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
sem = MagicMock()
|
||||||
|
sem.acquire.return_value = True
|
||||||
|
mock_sem_fn.return_value = sem
|
||||||
|
|
||||||
|
cursor = MagicMock()
|
||||||
|
cursor.description = [("COL_A",), ("COL_B",)]
|
||||||
|
cursor.fetchall.return_value = [("v1", "v2")]
|
||||||
|
|
||||||
|
conn = MagicMock()
|
||||||
|
conn.cursor.return_value = cursor
|
||||||
|
|
||||||
|
engine = MagicMock()
|
||||||
|
engine.raw_connection.return_value = conn
|
||||||
|
mock_get_slow_engine.return_value = engine
|
||||||
|
|
||||||
|
df = db.read_sql_df_slow("SELECT 1", {"p0": "x"})
|
||||||
|
|
||||||
|
assert list(df.columns) == ["COL_A", "COL_B"]
|
||||||
|
assert len(df) == 1
|
||||||
|
mock_get_slow_engine.assert_called_once()
|
||||||
|
mock_oracledb.connect.assert_not_called()
|
||||||
|
conn.close.assert_called_once()
|
||||||
|
sem.release.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
@patch.object(db, "oracledb")
|
||||||
|
@patch.object(db, "get_slow_engine")
|
||||||
|
@patch.object(db, "_get_slow_query_semaphore")
|
||||||
|
@patch.object(db, "get_db_runtime_config")
|
||||||
|
def test_read_sql_df_slow_iter_uses_slow_pool_when_enabled(
|
||||||
|
mock_runtime,
|
||||||
|
mock_sem_fn,
|
||||||
|
mock_get_slow_engine,
|
||||||
|
mock_oracledb,
|
||||||
|
):
|
||||||
|
"""Slow iterator query should checkout connection from isolated slow pool."""
|
||||||
|
mock_runtime.return_value = {
|
||||||
|
"slow_pool_enabled": True,
|
||||||
|
"slow_call_timeout_ms": 60000,
|
||||||
|
"slow_fetchmany_size": 2,
|
||||||
|
"tcp_connect_timeout": 10,
|
||||||
|
"retry_count": 1,
|
||||||
|
"retry_delay": 1.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
sem = MagicMock()
|
||||||
|
sem.acquire.return_value = True
|
||||||
|
mock_sem_fn.return_value = sem
|
||||||
|
|
||||||
|
cursor = MagicMock()
|
||||||
|
cursor.description = [("COL_A",), ("COL_B",)]
|
||||||
|
cursor.fetchmany.side_effect = [
|
||||||
|
[("r1a", "r1b")],
|
||||||
|
[],
|
||||||
|
]
|
||||||
|
|
||||||
|
conn = MagicMock()
|
||||||
|
conn.cursor.return_value = cursor
|
||||||
|
|
||||||
|
engine = MagicMock()
|
||||||
|
engine.raw_connection.return_value = conn
|
||||||
|
mock_get_slow_engine.return_value = engine
|
||||||
|
|
||||||
|
batches = list(db.read_sql_df_slow_iter("SELECT 1", {"p0": "x"}, batch_size=2))
|
||||||
|
|
||||||
|
assert batches == [(["COL_A", "COL_B"], [("r1a", "r1b")])]
|
||||||
|
mock_get_slow_engine.assert_called_once()
|
||||||
|
mock_oracledb.connect.assert_not_called()
|
||||||
|
conn.close.assert_called_once()
|
||||||
|
sem.release.assert_called_once()
|
||||||
|
|
||||||
97
tests/test_hold_dataset_cache.py
Normal file
97
tests/test_hold_dataset_cache.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Unit tests for hold_dataset_cache — engine integration (task 6.4)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.services import hold_dataset_cache as cache_svc
|
||||||
|
|
||||||
|
|
||||||
|
class TestHoldEngineDecomposition:
|
||||||
|
"""6.4 — hold-history with long date range triggers engine."""
|
||||||
|
|
||||||
|
def test_long_range_triggers_engine(self, monkeypatch):
|
||||||
|
"""90-day range → engine decomposition activated."""
|
||||||
|
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||||
|
|
||||||
|
engine_calls = {"execute": 0, "merge": 0}
|
||||||
|
|
||||||
|
def fake_execute_plan(chunks, query_fn, **kwargs):
|
||||||
|
engine_calls["execute"] += 1
|
||||||
|
assert len(chunks) == 3 # 90 days / 31 = 3 chunks
|
||||||
|
return kwargs.get("query_hash", "fake_hash")
|
||||||
|
|
||||||
|
result_df = pd.DataFrame({
|
||||||
|
"CONTAINERID": ["C1"],
|
||||||
|
"HOLDTYPE": ["Quality"],
|
||||||
|
})
|
||||||
|
|
||||||
|
def fake_merge_chunks(prefix, qhash, **kwargs):
|
||||||
|
engine_calls["merge"] += 1
|
||||||
|
return result_df
|
||||||
|
|
||||||
|
monkeypatch.setattr(engine_mod, "execute_plan", fake_execute_plan)
|
||||||
|
monkeypatch.setattr(engine_mod, "merge_chunks", fake_merge_chunks)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.hold_dataset_cache._get_cached_df",
|
||||||
|
lambda _: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.hold_dataset_cache._store_df",
|
||||||
|
lambda *a, **kw: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.hold_dataset_cache._load_sql",
|
||||||
|
lambda name: "SELECT 1 FROM dual",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.hold_dataset_cache._derive_all_views",
|
||||||
|
lambda df, **kw: {
|
||||||
|
"summary": {"total": 1},
|
||||||
|
"detail": {"items": [], "pagination": {"total": 1}},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
result = cache_svc.execute_primary_query(
|
||||||
|
start_date="2025-01-01",
|
||||||
|
end_date="2025-03-31",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["execute"] == 1
|
||||||
|
assert engine_calls["merge"] == 1
|
||||||
|
|
||||||
|
def test_short_range_skips_engine(self, monkeypatch):
|
||||||
|
"""30-day range → direct path, no engine."""
|
||||||
|
engine_calls = {"execute": 0}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.hold_dataset_cache._get_cached_df",
|
||||||
|
lambda _: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.hold_dataset_cache._load_sql",
|
||||||
|
lambda name: "SELECT 1 FROM dual",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.hold_dataset_cache.read_sql_df",
|
||||||
|
lambda sql, params: pd.DataFrame({"CONTAINERID": ["C1"]}),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.hold_dataset_cache._store_df",
|
||||||
|
lambda *a, **kw: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.hold_dataset_cache._derive_all_views",
|
||||||
|
lambda df, **kw: {
|
||||||
|
"summary": {"total": 1},
|
||||||
|
"detail": {"items": [], "pagination": {"total": 1}},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
result = cache_svc.execute_primary_query(
|
||||||
|
start_date="2025-06-01",
|
||||||
|
end_date="2025-06-30",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["execute"] == 0 # Engine NOT used
|
||||||
116
tests/test_job_query_engine.py
Normal file
116
tests/test_job_query_engine.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Unit tests for job_query_service — engine integration (tasks 9.1-9.4)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.services import job_query_service as job_svc
|
||||||
|
|
||||||
|
|
||||||
|
class TestJobQueryEngineDecomposition:
|
||||||
|
"""9.4 — full-year query with many resources → engine decomposition."""
|
||||||
|
|
||||||
|
def test_long_range_triggers_engine(self, monkeypatch):
|
||||||
|
"""90-day range → engine decomposition for job query."""
|
||||||
|
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
engine_calls = {"execute": 0, "merge": 0}
|
||||||
|
|
||||||
|
def fake_execute_plan(chunks, query_fn, **kwargs):
|
||||||
|
engine_calls["execute"] += 1
|
||||||
|
assert len(chunks) == 3 # 90 days / 31 = 3 chunks
|
||||||
|
assert kwargs.get("cache_prefix") == "job"
|
||||||
|
return kwargs.get("query_hash", "fake_hash")
|
||||||
|
|
||||||
|
result_df = pd.DataFrame({
|
||||||
|
"JOBID": ["J1", "J2"],
|
||||||
|
"RESOURCEID": ["R1", "R2"],
|
||||||
|
})
|
||||||
|
|
||||||
|
def fake_merge_chunks(prefix, qhash, **kwargs):
|
||||||
|
engine_calls["merge"] += 1
|
||||||
|
return result_df
|
||||||
|
|
||||||
|
monkeypatch.setattr(engine_mod, "execute_plan", fake_execute_plan)
|
||||||
|
monkeypatch.setattr(engine_mod, "merge_chunks", fake_merge_chunks)
|
||||||
|
monkeypatch.setattr(rds, "redis_load_df", lambda key: None)
|
||||||
|
monkeypatch.setattr(rds, "redis_store_df", lambda key, df, ttl=None: None)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.job_query_service.SQLLoader",
|
||||||
|
type("FakeLoader", (), {
|
||||||
|
"load": staticmethod(lambda name: "SELECT 1 FROM dual WHERE {{ RESOURCE_FILTER }}"),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = job_svc.get_jobs_by_resources(
|
||||||
|
resource_ids=["R1", "R2", "R3"],
|
||||||
|
start_date="2025-01-01",
|
||||||
|
end_date="2025-03-31",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["execute"] == 1
|
||||||
|
assert engine_calls["merge"] == 1
|
||||||
|
assert result["total"] == 2
|
||||||
|
assert "error" not in result
|
||||||
|
|
||||||
|
def test_short_range_skips_engine(self, monkeypatch):
|
||||||
|
"""30-day range → direct path, no engine."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
engine_calls = {"execute": 0}
|
||||||
|
|
||||||
|
monkeypatch.setattr(rds, "redis_load_df", lambda key: None)
|
||||||
|
monkeypatch.setattr(rds, "redis_store_df", lambda key, df, ttl=None: None)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.job_query_service.SQLLoader",
|
||||||
|
type("FakeLoader", (), {
|
||||||
|
"load": staticmethod(lambda name: "SELECT 1 FROM dual WHERE {{ RESOURCE_FILTER }}"),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.job_query_service.read_sql_df",
|
||||||
|
lambda sql, params: pd.DataFrame({"JOBID": ["J1"]}),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = job_svc.get_jobs_by_resources(
|
||||||
|
resource_ids=["R1"],
|
||||||
|
start_date="2025-06-01",
|
||||||
|
end_date="2025-06-30",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["execute"] == 0 # Engine NOT used
|
||||||
|
assert result["total"] == 1
|
||||||
|
|
||||||
|
def test_redis_cache_hit_skips_query(self, monkeypatch):
|
||||||
|
"""Redis cache hit → returns cached DataFrame without Oracle query."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
query_calls = {"sql": 0}
|
||||||
|
|
||||||
|
cached_df = pd.DataFrame({
|
||||||
|
"JOBID": ["J-CACHED"],
|
||||||
|
"RESOURCEID": ["R1"],
|
||||||
|
})
|
||||||
|
|
||||||
|
monkeypatch.setattr(rds, "redis_load_df", lambda key: cached_df)
|
||||||
|
|
||||||
|
def fail_sql(*args, **kwargs):
|
||||||
|
query_calls["sql"] += 1
|
||||||
|
raise RuntimeError("Should not reach Oracle")
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.job_query_service.read_sql_df",
|
||||||
|
fail_sql,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = job_svc.get_jobs_by_resources(
|
||||||
|
resource_ids=["R1"],
|
||||||
|
start_date="2025-06-01",
|
||||||
|
end_date="2025-06-30",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert query_calls["sql"] == 0 # Oracle NOT called
|
||||||
|
assert result["total"] == 1
|
||||||
|
assert result["data"][0]["JOBID"] == "J-CACHED"
|
||||||
94
tests/test_mid_section_defect_engine.py
Normal file
94
tests/test_mid_section_defect_engine.py
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Unit tests for mid_section_defect_service — engine integration (task 8.4)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.services import mid_section_defect_service as msd_svc
|
||||||
|
|
||||||
|
|
||||||
|
class TestDetectionEngineDecomposition:
|
||||||
|
"""8.4 — large date range + high-volume station → engine decomposition."""
|
||||||
|
|
||||||
|
def test_long_range_triggers_engine(self, monkeypatch):
|
||||||
|
"""90-day range → engine decomposition for detection query."""
|
||||||
|
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||||
|
|
||||||
|
engine_calls = {"execute": 0, "merge": 0}
|
||||||
|
|
||||||
|
def fake_execute_plan(chunks, query_fn, **kwargs):
|
||||||
|
engine_calls["execute"] += 1
|
||||||
|
assert len(chunks) == 3 # 90 days / 31 = 3 chunks
|
||||||
|
assert kwargs.get("cache_prefix") == "msd_detect"
|
||||||
|
return kwargs.get("query_hash", "fake_hash")
|
||||||
|
|
||||||
|
result_df = pd.DataFrame({
|
||||||
|
"CONTAINERID": ["C1", "C2"],
|
||||||
|
"WORKCENTERNAME": ["TEST-WC-A", "TEST-WC-B"],
|
||||||
|
})
|
||||||
|
|
||||||
|
def fake_merge_chunks(prefix, qhash, **kwargs):
|
||||||
|
engine_calls["merge"] += 1
|
||||||
|
return result_df
|
||||||
|
|
||||||
|
monkeypatch.setattr(engine_mod, "execute_plan", fake_execute_plan)
|
||||||
|
monkeypatch.setattr(engine_mod, "merge_chunks", fake_merge_chunks)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.mid_section_defect_service.cache_get",
|
||||||
|
lambda key: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.mid_section_defect_service.cache_set",
|
||||||
|
lambda key, val, ttl=None: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.mid_section_defect_service.SQLLoader",
|
||||||
|
type("FakeLoader", (), {
|
||||||
|
"load_with_params": staticmethod(lambda name, **kw: "SELECT 1 FROM dual"),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
df = msd_svc._fetch_station_detection_data(
|
||||||
|
start_date="2025-01-01",
|
||||||
|
end_date="2025-03-31",
|
||||||
|
station="測試",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["execute"] == 1
|
||||||
|
assert engine_calls["merge"] == 1
|
||||||
|
assert df is not None
|
||||||
|
assert len(df) == 2
|
||||||
|
|
||||||
|
def test_short_range_skips_engine(self, monkeypatch):
|
||||||
|
"""30-day range → direct path, no engine."""
|
||||||
|
engine_calls = {"execute": 0}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.mid_section_defect_service.cache_get",
|
||||||
|
lambda key: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.mid_section_defect_service.cache_set",
|
||||||
|
lambda key, val, ttl=None: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.mid_section_defect_service.SQLLoader",
|
||||||
|
type("FakeLoader", (), {
|
||||||
|
"load_with_params": staticmethod(lambda name, **kw: "SELECT 1 FROM dual"),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.mid_section_defect_service.read_sql_df",
|
||||||
|
lambda sql, params: pd.DataFrame({"CONTAINERID": ["C1"]}),
|
||||||
|
)
|
||||||
|
|
||||||
|
df = msd_svc._fetch_station_detection_data(
|
||||||
|
start_date="2025-06-01",
|
||||||
|
end_date="2025-06-30",
|
||||||
|
station="測試",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["execute"] == 0 # Engine NOT used
|
||||||
|
assert df is not None
|
||||||
|
assert len(df) == 1
|
||||||
155
tests/test_query_spool_store.py
Normal file
155
tests/test_query_spool_store.py
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Unit tests for parquet query spool store."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import fnmatch
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.core.redis_client import get_key
|
||||||
|
from mes_dashboard.core import query_spool_store as spool
|
||||||
|
|
||||||
|
|
||||||
|
class FakeRedis:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._data: dict[str, str] = {}
|
||||||
|
self._expires: dict[str, int] = {}
|
||||||
|
|
||||||
|
def _purge_if_expired(self, key: str) -> None:
|
||||||
|
exp = self._expires.get(key)
|
||||||
|
if exp is not None and exp <= int(time.time()):
|
||||||
|
self._data.pop(key, None)
|
||||||
|
self._expires.pop(key, None)
|
||||||
|
|
||||||
|
def setex(self, key: str, ttl: int, value: str) -> bool:
|
||||||
|
self._data[key] = value
|
||||||
|
self._expires[key] = int(time.time()) + int(ttl)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get(self, key: str):
|
||||||
|
self._purge_if_expired(key)
|
||||||
|
return self._data.get(key)
|
||||||
|
|
||||||
|
def delete(self, *keys) -> int:
|
||||||
|
deleted = 0
|
||||||
|
for key in keys:
|
||||||
|
if key in self._data:
|
||||||
|
deleted += 1
|
||||||
|
self._data.pop(key, None)
|
||||||
|
self._expires.pop(key, None)
|
||||||
|
return deleted
|
||||||
|
|
||||||
|
def scan_iter(self, match: str | None = None, count: int = 100):
|
||||||
|
for key in list(self._data.keys()):
|
||||||
|
self._purge_if_expired(key)
|
||||||
|
if key not in self._data:
|
||||||
|
continue
|
||||||
|
if match and not fnmatch.fnmatch(key, match):
|
||||||
|
continue
|
||||||
|
yield key
|
||||||
|
|
||||||
|
|
||||||
|
def _build_df() -> pd.DataFrame:
|
||||||
|
return pd.DataFrame(
|
||||||
|
{
|
||||||
|
"CONTAINERID": ["C1", "C2"],
|
||||||
|
"LOSSREASONNAME": ["001_A", "002_B"],
|
||||||
|
"REJECT_TOTAL_QTY": [10, 20],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_spool_store_and_load_roundtrip(monkeypatch, tmp_path):
|
||||||
|
fake = FakeRedis()
|
||||||
|
monkeypatch.setattr(spool, "QUERY_SPOOL_ENABLED", True)
|
||||||
|
monkeypatch.setattr(spool, "QUERY_SPOOL_DIR", tmp_path / "query_spool")
|
||||||
|
monkeypatch.setattr(spool, "get_redis_client", lambda: fake)
|
||||||
|
|
||||||
|
ok = spool.store_spooled_df("reject_dataset", "qid-roundtrip-1", _build_df(), ttl_seconds=1200)
|
||||||
|
assert ok is True
|
||||||
|
|
||||||
|
metadata = spool.get_spool_metadata("reject_dataset", "qid-roundtrip-1")
|
||||||
|
assert metadata is not None
|
||||||
|
assert metadata.get("row_count") == 2
|
||||||
|
|
||||||
|
loaded = spool.load_spooled_df("reject_dataset", "qid-roundtrip-1")
|
||||||
|
assert loaded is not None
|
||||||
|
pd.testing.assert_frame_equal(
|
||||||
|
loaded.sort_values("CONTAINERID").reset_index(drop=True),
|
||||||
|
_build_df().sort_values("CONTAINERID").reset_index(drop=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_spool_load_returns_none_when_metadata_hash_mismatch(monkeypatch, tmp_path):
|
||||||
|
fake = FakeRedis()
|
||||||
|
monkeypatch.setattr(spool, "QUERY_SPOOL_ENABLED", True)
|
||||||
|
monkeypatch.setattr(spool, "QUERY_SPOOL_DIR", tmp_path / "query_spool")
|
||||||
|
monkeypatch.setattr(spool, "get_redis_client", lambda: fake)
|
||||||
|
|
||||||
|
assert spool.store_spooled_df("reject_dataset", "qid-hash-1", _build_df(), ttl_seconds=1200)
|
||||||
|
key = get_key(spool._meta_key("reject_dataset", "qid-hash-1"))
|
||||||
|
metadata = json.loads(fake.get(key))
|
||||||
|
metadata["columns_hash"] = "deadbeefdeadbeef"
|
||||||
|
fake.setex(key, 1200, json.dumps(metadata, ensure_ascii=False))
|
||||||
|
|
||||||
|
loaded = spool.load_spooled_df("reject_dataset", "qid-hash-1")
|
||||||
|
assert loaded is None
|
||||||
|
assert fake.get(key) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_spool_load_returns_none_when_file_missing(monkeypatch, tmp_path):
|
||||||
|
fake = FakeRedis()
|
||||||
|
monkeypatch.setattr(spool, "QUERY_SPOOL_ENABLED", True)
|
||||||
|
monkeypatch.setattr(spool, "QUERY_SPOOL_DIR", tmp_path / "query_spool")
|
||||||
|
monkeypatch.setattr(spool, "get_redis_client", lambda: fake)
|
||||||
|
|
||||||
|
assert spool.store_spooled_df("reject_dataset", "qid-missing-file-1", _build_df(), ttl_seconds=1200)
|
||||||
|
metadata = spool.get_spool_metadata("reject_dataset", "qid-missing-file-1")
|
||||||
|
assert metadata is not None
|
||||||
|
path = spool._path_from_relative(metadata["relative_path"])
|
||||||
|
assert path is not None and path.exists()
|
||||||
|
path.unlink()
|
||||||
|
|
||||||
|
loaded = spool.load_spooled_df("reject_dataset", "qid-missing-file-1")
|
||||||
|
assert loaded is None
|
||||||
|
assert spool.get_spool_metadata("reject_dataset", "qid-missing-file-1") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_cleanup_expired_and_orphan_files(monkeypatch, tmp_path):
|
||||||
|
fake = FakeRedis()
|
||||||
|
root = tmp_path / "query_spool"
|
||||||
|
monkeypatch.setattr(spool, "QUERY_SPOOL_ENABLED", True)
|
||||||
|
monkeypatch.setattr(spool, "QUERY_SPOOL_DIR", root)
|
||||||
|
monkeypatch.setattr(spool, "QUERY_SPOOL_ORPHAN_GRACE_SECONDS", 1)
|
||||||
|
monkeypatch.setattr(spool, "get_redis_client", lambda: fake)
|
||||||
|
|
||||||
|
now = int(time.time())
|
||||||
|
|
||||||
|
assert spool.store_spooled_df("reject_dataset", "qid-valid-1", _build_df(), ttl_seconds=1200)
|
||||||
|
assert spool.store_spooled_df("reject_dataset", "qid-expired-1", _build_df(), ttl_seconds=1200)
|
||||||
|
|
||||||
|
expired_key = get_key(spool._meta_key("reject_dataset", "qid-expired-1"))
|
||||||
|
expired_meta = json.loads(fake.get(expired_key))
|
||||||
|
expired_path = spool._path_from_relative(expired_meta["relative_path"])
|
||||||
|
assert expired_path is not None and expired_path.exists()
|
||||||
|
expired_meta["expires_at"] = now - 10
|
||||||
|
fake.setex(expired_key, 1200, json.dumps(expired_meta, ensure_ascii=False))
|
||||||
|
|
||||||
|
orphan_dir = root / "reject_dataset"
|
||||||
|
orphan_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
orphan_path = orphan_dir / "orphan.parquet"
|
||||||
|
_build_df().to_parquet(orphan_path, engine="pyarrow", index=False)
|
||||||
|
old_time = now - 120
|
||||||
|
os.utime(orphan_path, (old_time, old_time))
|
||||||
|
|
||||||
|
stats = spool.cleanup_expired_spool(namespace="reject_dataset")
|
||||||
|
assert stats["meta_deleted"] >= 1
|
||||||
|
assert stats["expired_files_deleted"] >= 1
|
||||||
|
assert stats["orphan_files_deleted"] >= 1
|
||||||
|
assert not orphan_path.exists()
|
||||||
|
assert not expired_path.exists()
|
||||||
|
assert spool.get_spool_metadata("reject_dataset", "qid-valid-1") is not None
|
||||||
151
tests/test_query_tool_engine.py
Normal file
151
tests/test_query_tool_engine.py
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Unit tests for query_tool_service — slow-query migration + caching (tasks 10.1-10.5)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.services import query_tool_service as qt_svc
|
||||||
|
|
||||||
|
|
||||||
|
class TestSlowQueryMigration:
|
||||||
|
"""10.2 — verify high-risk read_sql_df paths migrated to read_sql_df_slow."""
|
||||||
|
|
||||||
|
def test_resolve_by_lot_id_uses_slow(self, monkeypatch):
|
||||||
|
"""_resolve_by_lot_id should call read_sql_df_slow, not read_sql_df."""
|
||||||
|
calls = {"slow": 0, "fast": 0}
|
||||||
|
|
||||||
|
def fake_slow(sql, params=None, **kw):
|
||||||
|
calls["slow"] += 1
|
||||||
|
return pd.DataFrame({"CONTAINERID": ["C1"], "CONTAINERNAME": ["LOT-1"]})
|
||||||
|
|
||||||
|
def fake_fast(sql, params=None):
|
||||||
|
calls["fast"] += 1
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
monkeypatch.setattr(qt_svc, "read_sql_df_slow", fake_slow)
|
||||||
|
monkeypatch.setattr(qt_svc, "read_sql_df", fake_fast)
|
||||||
|
monkeypatch.setattr(qt_svc, "SQLLoader",
|
||||||
|
type("FakeLoader", (), {
|
||||||
|
"load_with_params": staticmethod(lambda name, **kw: "SELECT 1 FROM dual"),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = qt_svc._resolve_by_lot_id(["LOT-1"])
|
||||||
|
|
||||||
|
assert calls["slow"] == 1
|
||||||
|
assert calls["fast"] == 0
|
||||||
|
|
||||||
|
def test_resolve_by_work_order_uses_slow(self, monkeypatch):
|
||||||
|
"""_resolve_by_work_order should call read_sql_df_slow."""
|
||||||
|
calls = {"slow": 0, "fast": 0}
|
||||||
|
|
||||||
|
def fake_slow(sql, params=None, **kw):
|
||||||
|
calls["slow"] += 1
|
||||||
|
return pd.DataFrame({
|
||||||
|
"CONTAINERID": ["C1"],
|
||||||
|
"CONTAINERNAME": ["LOT-1"],
|
||||||
|
"MFGORDERNAME": ["GA25010101"],
|
||||||
|
})
|
||||||
|
|
||||||
|
def fake_fast(sql, params=None):
|
||||||
|
calls["fast"] += 1
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
monkeypatch.setattr(qt_svc, "read_sql_df_slow", fake_slow)
|
||||||
|
monkeypatch.setattr(qt_svc, "read_sql_df", fake_fast)
|
||||||
|
monkeypatch.setattr(qt_svc, "SQLLoader",
|
||||||
|
type("FakeLoader", (), {
|
||||||
|
"load_with_params": staticmethod(lambda name, **kw: "SELECT 1 FROM dual"),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = qt_svc._resolve_by_work_order(["GA25010101"])
|
||||||
|
|
||||||
|
assert calls["slow"] >= 1
|
||||||
|
assert calls["fast"] == 0
|
||||||
|
|
||||||
|
def test_equipment_status_hours_uses_slow(self, monkeypatch):
|
||||||
|
"""get_equipment_status_hours should call read_sql_df_slow."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
calls = {"slow": 0, "fast": 0}
|
||||||
|
|
||||||
|
def fake_slow(sql, params=None, **kw):
|
||||||
|
calls["slow"] += 1
|
||||||
|
return pd.DataFrame({
|
||||||
|
"RESOURCEID": ["EQ1"],
|
||||||
|
"PRD_HOURS": [100.0],
|
||||||
|
"SBY_HOURS": [20.0],
|
||||||
|
"UDT_HOURS": [10.0],
|
||||||
|
"SDT_HOURS": [5.0],
|
||||||
|
"EGT_HOURS": [3.0],
|
||||||
|
"NST_HOURS": [2.0],
|
||||||
|
"TOTAL_HOURS": [140.0],
|
||||||
|
})
|
||||||
|
|
||||||
|
def fake_fast(sql, params=None):
|
||||||
|
calls["fast"] += 1
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
monkeypatch.setattr(qt_svc, "read_sql_df_slow", fake_slow)
|
||||||
|
monkeypatch.setattr(qt_svc, "read_sql_df", fake_fast)
|
||||||
|
monkeypatch.setattr(rds, "redis_load_df", lambda key: None)
|
||||||
|
monkeypatch.setattr(rds, "redis_store_df", lambda key, df, ttl=None: None)
|
||||||
|
monkeypatch.setattr(qt_svc, "SQLLoader",
|
||||||
|
type("FakeLoader", (), {
|
||||||
|
"load_with_params": staticmethod(lambda name, **kw: "SELECT 1 FROM dual"),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
|
result = qt_svc.get_equipment_status_hours(
|
||||||
|
equipment_ids=["EQ1"],
|
||||||
|
start_date="2025-01-01",
|
||||||
|
end_date="2025-01-31",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert calls["slow"] == 1
|
||||||
|
assert calls["fast"] == 0
|
||||||
|
assert "error" not in result
|
||||||
|
assert result["totals"]["PRD_HOURS"] == 100.0
|
||||||
|
|
||||||
|
|
||||||
|
class TestEquipmentCaching:
|
||||||
|
"""10.4/10.5 — equipment query caching via Redis."""
|
||||||
|
|
||||||
|
def test_equipment_status_cache_hit(self, monkeypatch):
|
||||||
|
"""Redis cache hit → returns cached result without Oracle query."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
calls = {"sql": 0}
|
||||||
|
|
||||||
|
cached_df = pd.DataFrame({
|
||||||
|
"RESOURCEID": ["EQ-CACHED"],
|
||||||
|
"PRD_HOURS": [50.0],
|
||||||
|
"SBY_HOURS": [10.0],
|
||||||
|
"UDT_HOURS": [5.0],
|
||||||
|
"SDT_HOURS": [2.0],
|
||||||
|
"EGT_HOURS": [1.0],
|
||||||
|
"NST_HOURS": [0.0],
|
||||||
|
"TOTAL_HOURS": [68.0],
|
||||||
|
})
|
||||||
|
|
||||||
|
monkeypatch.setattr(rds, "redis_load_df", lambda key: cached_df)
|
||||||
|
|
||||||
|
def fail_sql(*args, **kwargs):
|
||||||
|
calls["sql"] += 1
|
||||||
|
raise RuntimeError("Should not reach Oracle")
|
||||||
|
|
||||||
|
monkeypatch.setattr(qt_svc, "read_sql_df_slow", fail_sql)
|
||||||
|
monkeypatch.setattr(qt_svc, "read_sql_df", fail_sql)
|
||||||
|
|
||||||
|
result = qt_svc.get_equipment_status_hours(
|
||||||
|
equipment_ids=["EQ1"],
|
||||||
|
start_date="2025-01-01",
|
||||||
|
end_date="2025-01-31",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert calls["sql"] == 0 # Oracle NOT called
|
||||||
|
assert result["data"][0]["RESOURCEID"] == "EQ-CACHED"
|
||||||
185
tests/test_redis_df_store.py
Normal file
185
tests/test_redis_df_store.py
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Unit tests for redis_df_store module."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
from decimal import Decimal
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
class TestRedisStoreDf:
|
||||||
|
"""3.1 — round-trip store/load."""
|
||||||
|
|
||||||
|
def test_round_trip(self):
|
||||||
|
"""Store a DF, load it back, verify equality."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
|
||||||
|
def fake_setex(key, ttl, value):
|
||||||
|
stored[key] = value
|
||||||
|
|
||||||
|
def fake_get(key):
|
||||||
|
return stored.get(key)
|
||||||
|
|
||||||
|
mock_client.setex.side_effect = fake_setex
|
||||||
|
mock_client.get.side_effect = fake_get
|
||||||
|
|
||||||
|
df = pd.DataFrame({"A": [1, 2, 3], "B": ["x", "y", "z"]})
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client):
|
||||||
|
rds.redis_store_df("test:key", df, ttl=60)
|
||||||
|
loaded = rds.redis_load_df("test:key")
|
||||||
|
|
||||||
|
assert loaded is not None
|
||||||
|
pd.testing.assert_frame_equal(loaded, df)
|
||||||
|
|
||||||
|
def test_store_empty_df(self):
|
||||||
|
"""Round-trip with an empty DataFrame preserves schema."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
|
||||||
|
df = pd.DataFrame({"COL": pd.Series([], dtype="int64")})
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client):
|
||||||
|
rds.redis_store_df("test:empty", df, ttl=60)
|
||||||
|
loaded = rds.redis_load_df("test:empty")
|
||||||
|
|
||||||
|
assert loaded is not None
|
||||||
|
assert len(loaded) == 0
|
||||||
|
assert list(loaded.columns) == ["COL"]
|
||||||
|
|
||||||
|
def test_decimal_object_column_round_trip(self):
|
||||||
|
"""Mixed-precision Decimal object columns should store without serialization errors."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
|
||||||
|
df = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"REJECT_SHARE_PCT": [Decimal("12.345"), Decimal("1.2"), None],
|
||||||
|
"REJECT_RATE_PCT": [Decimal("0.123456"), Decimal("10.9"), Decimal("9.000001")],
|
||||||
|
"LABEL": ["A", "B", "C"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client):
|
||||||
|
assert rds.redis_store_df("test:decimal", df, ttl=60)
|
||||||
|
loaded = rds.redis_load_df("test:decimal")
|
||||||
|
|
||||||
|
assert loaded is not None
|
||||||
|
assert loaded["REJECT_SHARE_PCT"].dtype.kind in ("f", "i")
|
||||||
|
assert loaded["REJECT_RATE_PCT"].dtype.kind in ("f", "i")
|
||||||
|
assert loaded.loc[0, "REJECT_SHARE_PCT"] == pytest.approx(12.345)
|
||||||
|
assert loaded.loc[2, "REJECT_RATE_PCT"] == pytest.approx(9.000001)
|
||||||
|
|
||||||
|
|
||||||
|
class TestChunkHelpers:
|
||||||
|
"""3.2 — chunk-level helpers round-trip."""
|
||||||
|
|
||||||
|
def test_chunk_round_trip(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||||
|
|
||||||
|
df = pd.DataFrame({"X": [10, 20]})
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client):
|
||||||
|
rds.redis_store_chunk("reject", "abc123", 0, df, ttl=60)
|
||||||
|
assert rds.redis_chunk_exists("reject", "abc123", 0)
|
||||||
|
loaded = rds.redis_load_chunk("reject", "abc123", 0)
|
||||||
|
|
||||||
|
assert loaded is not None
|
||||||
|
pd.testing.assert_frame_equal(loaded, df)
|
||||||
|
|
||||||
|
def test_chunk_not_exists(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.exists.return_value = 0
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client):
|
||||||
|
assert not rds.redis_chunk_exists("reject", "abc123", 99)
|
||||||
|
|
||||||
|
def test_clear_batch_removes_chunk_and_meta_keys(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
deleted = {"keys": []}
|
||||||
|
|
||||||
|
mock_client.keys.return_value = [
|
||||||
|
"mes-dashboard:batch:reject:q123:chunk:0",
|
||||||
|
"mes-dashboard:batch:reject:q123:chunk:1",
|
||||||
|
]
|
||||||
|
mock_client.delete.side_effect = lambda *keys: deleted["keys"].extend(keys) or len(keys)
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=mock_client):
|
||||||
|
count = rds.redis_clear_batch("reject", "q123")
|
||||||
|
|
||||||
|
assert count == 3
|
||||||
|
assert any("chunk:0" in key for key in deleted["keys"])
|
||||||
|
assert any("chunk:1" in key for key in deleted["keys"])
|
||||||
|
assert any("meta" in key for key in deleted["keys"])
|
||||||
|
|
||||||
|
|
||||||
|
class TestRedisUnavailable:
|
||||||
|
"""3.3 — graceful fallback when Redis is unavailable."""
|
||||||
|
|
||||||
|
def test_store_no_redis(self):
|
||||||
|
"""store returns without error when Redis disabled."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
df = pd.DataFrame({"A": [1]})
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", False):
|
||||||
|
rds.redis_store_df("key", df) # no exception
|
||||||
|
|
||||||
|
def test_load_no_redis(self):
|
||||||
|
"""load returns None when Redis disabled."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", False):
|
||||||
|
result = rds.redis_load_df("key")
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_chunk_exists_no_redis(self):
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", False):
|
||||||
|
assert not rds.redis_chunk_exists("p", "h", 0)
|
||||||
|
|
||||||
|
def test_store_client_none(self):
|
||||||
|
"""store returns without error when client is None."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
df = pd.DataFrame({"A": [1]})
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=None):
|
||||||
|
rds.redis_store_df("key", df) # no exception
|
||||||
|
|
||||||
|
def test_load_client_none(self):
|
||||||
|
"""load returns None when client is None."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
|
||||||
|
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||||
|
patch.object(rds, "get_redis_client", return_value=None):
|
||||||
|
result = rds.redis_load_df("key")
|
||||||
|
assert result is None
|
||||||
@@ -3,6 +3,9 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from decimal import Decimal
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
@@ -292,3 +295,359 @@ def test_apply_pareto_selection_filter_supports_multi_dimension_and_logic():
|
|||||||
|
|
||||||
assert len(filtered) == 1
|
assert len(filtered) == 1
|
||||||
assert set(filtered["CONTAINERNAME"].tolist()) == {"LOT-002"}
|
assert set(filtered["CONTAINERNAME"].tolist()) == {"LOT-002"}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 5.9 — 365-day date range → engine decomposition, no Oracle timeout
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestEngineDecompositionDateRange:
|
||||||
|
"""Verify engine routing for long date ranges."""
|
||||||
|
|
||||||
|
def test_365_day_range_triggers_engine(self, monkeypatch):
|
||||||
|
"""5.9: 365-day date range → chunks decomposed, engine path used."""
|
||||||
|
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||||
|
|
||||||
|
# Track calls via engine module (local imports inside function pull from here)
|
||||||
|
engine_calls = {
|
||||||
|
"decompose": 0,
|
||||||
|
"execute": 0,
|
||||||
|
"merge": 0,
|
||||||
|
"chunk_count": 0,
|
||||||
|
"parallel": 0,
|
||||||
|
"max_rows_per_chunk": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
original_decompose = engine_mod.decompose_by_time_range
|
||||||
|
|
||||||
|
def tracked_decompose(*args, **kwargs):
|
||||||
|
engine_calls["decompose"] += 1
|
||||||
|
return original_decompose(*args, **kwargs)
|
||||||
|
|
||||||
|
def fake_execute_plan(chunks, query_fn, **kwargs):
|
||||||
|
engine_calls["execute"] += 1
|
||||||
|
engine_calls["chunk_count"] = len(chunks)
|
||||||
|
engine_calls["parallel"] = int(kwargs.get("parallel", 1))
|
||||||
|
engine_calls["max_rows_per_chunk"] = int(kwargs.get("max_rows_per_chunk", 0))
|
||||||
|
return kwargs.get("query_hash", "fake_hash")
|
||||||
|
|
||||||
|
result_df = pd.DataFrame({
|
||||||
|
"CONTAINERID": ["C1"],
|
||||||
|
"LOSSREASONNAME": ["R1"],
|
||||||
|
"REJECT_TOTAL_QTY": [10],
|
||||||
|
})
|
||||||
|
|
||||||
|
def fake_merge_chunks(prefix, qhash, **kwargs):
|
||||||
|
engine_calls["merge"] += 1
|
||||||
|
return result_df
|
||||||
|
|
||||||
|
# Mock on engine module (local imports will pick these up)
|
||||||
|
monkeypatch.setattr(engine_mod, "decompose_by_time_range", tracked_decompose)
|
||||||
|
monkeypatch.setattr(engine_mod, "execute_plan", fake_execute_plan)
|
||||||
|
monkeypatch.setattr(engine_mod, "merge_chunks", fake_merge_chunks)
|
||||||
|
# Mock service-level helpers
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._prepare_sql",
|
||||||
|
lambda *a, **kw: "SELECT 1 FROM dual",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._store_df",
|
||||||
|
lambda *a, **kw: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._get_cached_df",
|
||||||
|
lambda _: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._apply_policy_filters",
|
||||||
|
lambda df, **kw: df,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._build_primary_response",
|
||||||
|
lambda qid, df, meta, ri: {"query_id": qid, "rows": len(df)},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._build_where_clause",
|
||||||
|
lambda **kw: ("", {}, {}),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._validate_range",
|
||||||
|
lambda *a: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache.redis_clear_batch",
|
||||||
|
lambda *a, **kw: 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = cache_svc.execute_primary_query(
|
||||||
|
mode="date_range",
|
||||||
|
start_date="2025-01-01",
|
||||||
|
end_date="2025-12-31",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["decompose"] == 1
|
||||||
|
assert engine_calls["execute"] == 1
|
||||||
|
assert engine_calls["merge"] == 1
|
||||||
|
assert result["rows"] == 1
|
||||||
|
|
||||||
|
expected_chunks = original_decompose(
|
||||||
|
"2025-01-01",
|
||||||
|
"2025-12-31",
|
||||||
|
grain_days=cache_svc._REJECT_ENGINE_GRAIN_DAYS,
|
||||||
|
)
|
||||||
|
assert engine_calls["chunk_count"] == len(expected_chunks)
|
||||||
|
assert engine_calls["parallel"] == cache_svc._REJECT_ENGINE_PARALLEL
|
||||||
|
assert engine_calls["max_rows_per_chunk"] == cache_svc._REJECT_ENGINE_MAX_ROWS_PER_CHUNK
|
||||||
|
|
||||||
|
def test_short_range_skips_engine(self, monkeypatch):
|
||||||
|
"""Short date range (<= threshold) uses direct path, no engine."""
|
||||||
|
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||||
|
|
||||||
|
engine_calls = {"decompose": 0}
|
||||||
|
|
||||||
|
original_decompose = engine_mod.decompose_by_time_range
|
||||||
|
|
||||||
|
def tracked_decompose(*args, **kwargs):
|
||||||
|
engine_calls["decompose"] += 1
|
||||||
|
return original_decompose(*args, **kwargs)
|
||||||
|
|
||||||
|
monkeypatch.setattr(engine_mod, "decompose_by_time_range", tracked_decompose)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._get_cached_df",
|
||||||
|
lambda _: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._prepare_sql",
|
||||||
|
lambda *a, **kw: "SELECT 1 FROM dual",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache.read_sql_df",
|
||||||
|
lambda sql, params: pd.DataFrame({"CONTAINERID": ["C1"]}),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._store_df",
|
||||||
|
lambda *a, **kw: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._apply_policy_filters",
|
||||||
|
lambda df, **kw: df,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._build_primary_response",
|
||||||
|
lambda qid, df, meta, ri: {"query_id": qid, "rows": len(df)},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._build_where_clause",
|
||||||
|
lambda **kw: ("", {}, {}),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache.redis_clear_batch",
|
||||||
|
lambda *a, **kw: 0,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._validate_range",
|
||||||
|
lambda *a: None,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = cache_svc.execute_primary_query(
|
||||||
|
mode="date_range",
|
||||||
|
start_date="2025-06-01",
|
||||||
|
end_date="2025-06-30",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["decompose"] == 0 # Engine NOT used
|
||||||
|
assert result["rows"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# 5.10 — Large workorder (500+ containers) → ID batching
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestEngineDecompositionContainerIDs:
|
||||||
|
"""Verify engine routing for large container ID sets."""
|
||||||
|
|
||||||
|
def test_large_container_set_triggers_engine(self, monkeypatch):
|
||||||
|
"""5.10: 1500 container IDs → engine ID batching activated."""
|
||||||
|
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||||
|
|
||||||
|
engine_calls = {"execute": 0, "merge": 0}
|
||||||
|
fake_ids = [f"CID-{i:04d}" for i in range(1500)]
|
||||||
|
|
||||||
|
def fake_execute_plan(chunks, query_fn, **kwargs):
|
||||||
|
engine_calls["execute"] += 1
|
||||||
|
# Verify correct number of chunks
|
||||||
|
assert len(chunks) == 2 # 1500 / 1000 = 2 batches
|
||||||
|
return kwargs.get("query_hash", "fake_hash")
|
||||||
|
|
||||||
|
result_df = pd.DataFrame({"CONTAINERID": fake_ids[:5]})
|
||||||
|
|
||||||
|
def fake_merge_chunks(prefix, qhash, **kwargs):
|
||||||
|
engine_calls["merge"] += 1
|
||||||
|
return result_df
|
||||||
|
|
||||||
|
monkeypatch.setattr(engine_mod, "execute_plan", fake_execute_plan)
|
||||||
|
monkeypatch.setattr(engine_mod, "merge_chunks", fake_merge_chunks)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache.resolve_containers",
|
||||||
|
lambda input_type, values: {
|
||||||
|
"container_ids": fake_ids,
|
||||||
|
"resolution_info": {"type": input_type, "count": len(fake_ids)},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._get_cached_df",
|
||||||
|
lambda _: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._prepare_sql",
|
||||||
|
lambda *a, **kw: "SELECT 1 FROM dual",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._store_df",
|
||||||
|
lambda *a, **kw: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._apply_policy_filters",
|
||||||
|
lambda df, **kw: df,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._build_primary_response",
|
||||||
|
lambda qid, df, meta, ri: {"query_id": qid, "rows": len(df)},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache._build_where_clause",
|
||||||
|
lambda **kw: ("", {}, {}),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.reject_dataset_cache.redis_clear_batch",
|
||||||
|
lambda *a, **kw: 0,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = cache_svc.execute_primary_query(
|
||||||
|
mode="container",
|
||||||
|
container_input_type="workorder",
|
||||||
|
container_values=["WO-BIG"],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["execute"] == 1
|
||||||
|
assert engine_calls["merge"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_engine_path_stores_mixed_precision_decimal_chunks_without_redis_serialization_error(
|
||||||
|
monkeypatch, caplog
|
||||||
|
):
|
||||||
|
"""Long-range engine path should handle Decimal object columns in chunk cache."""
|
||||||
|
import mes_dashboard.core.redis_df_store as rds
|
||||||
|
import mes_dashboard.services.batch_query_engine as bqe
|
||||||
|
|
||||||
|
mock_client = MagicMock()
|
||||||
|
stored = {}
|
||||||
|
hashes = {}
|
||||||
|
|
||||||
|
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||||
|
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||||
|
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||||
|
mock_client.hset.side_effect = lambda k, mapping=None: hashes.setdefault(k, {}).update(mapping or {})
|
||||||
|
mock_client.hgetall.side_effect = lambda k: hashes.get(k, {})
|
||||||
|
mock_client.expire.return_value = None
|
||||||
|
|
||||||
|
engine_row = pd.DataFrame(
|
||||||
|
{
|
||||||
|
"CONTAINERID": ["C-1", "C-2"],
|
||||||
|
"LOSSREASONNAME": ["001_A", "002_B"],
|
||||||
|
"REJECT_TOTAL_QTY": [10, 20],
|
||||||
|
"REJECT_SHARE_PCT": [Decimal("12.345"), Decimal("1.2")],
|
||||||
|
"REJECT_RATE_PCT": [Decimal("0.123456"), Decimal("9.000001")],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _: None)
|
||||||
|
monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
|
||||||
|
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
|
||||||
|
monkeypatch.setattr(cache_svc, "_validate_range", lambda *a: None)
|
||||||
|
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda df, **kw: df)
|
||||||
|
monkeypatch.setattr(cache_svc, "_build_primary_response", lambda qid, df, meta, ri: {"rows": len(df)})
|
||||||
|
monkeypatch.setattr(cache_svc, "read_sql_df", lambda sql, params: engine_row.copy())
|
||||||
|
monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *a, **kw: 0)
|
||||||
|
|
||||||
|
monkeypatch.setattr(rds, "REDIS_ENABLED", True)
|
||||||
|
monkeypatch.setattr(rds, "get_redis_client", lambda: mock_client)
|
||||||
|
monkeypatch.setattr(bqe, "get_redis_client", lambda: mock_client)
|
||||||
|
result = cache_svc.execute_primary_query(
|
||||||
|
mode="date_range",
|
||||||
|
start_date="2025-01-01",
|
||||||
|
end_date="2025-12-31",
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_chunks = bqe.decompose_by_time_range(
|
||||||
|
"2025-01-01",
|
||||||
|
"2025-12-31",
|
||||||
|
grain_days=cache_svc._REJECT_ENGINE_GRAIN_DAYS,
|
||||||
|
)
|
||||||
|
assert result["rows"] == len(expected_chunks) * 2
|
||||||
|
assert "Failed to store DataFrame in Redis" not in caplog.text
|
||||||
|
assert any("batch:reject" in key for key in stored)
|
||||||
|
|
||||||
|
|
||||||
|
def test_large_result_spills_to_parquet_and_view_export_use_spool_fallback(monkeypatch):
|
||||||
|
"""13.8: long-range oversized result should use spool and still serve view/export."""
|
||||||
|
spool_data = {}
|
||||||
|
df = _build_detail_filter_df().copy()
|
||||||
|
|
||||||
|
cache_svc._dataset_cache.clear()
|
||||||
|
monkeypatch.setattr(cache_svc, "_redis_load_df", lambda _qid: None)
|
||||||
|
monkeypatch.setattr(cache_svc, "_validate_range", lambda *_: None)
|
||||||
|
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
|
||||||
|
monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
|
||||||
|
monkeypatch.setattr(cache_svc, "read_sql_df", lambda sql, params: df.copy())
|
||||||
|
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
cache_svc,
|
||||||
|
"_build_primary_response",
|
||||||
|
lambda qid, result_df, meta, resolution_info: {"query_id": qid, "rows": len(result_df)},
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(cache_svc, "_REJECT_ENGINE_SPILL_ENABLED", True)
|
||||||
|
monkeypatch.setattr(cache_svc, "_REJECT_ENGINE_MAX_TOTAL_ROWS", 1)
|
||||||
|
monkeypatch.setattr(cache_svc, "_REJECT_ENGINE_MAX_RESULT_MB", 1)
|
||||||
|
monkeypatch.setattr(cache_svc, "_store_df", lambda *_a, **_kw: (_ for _ in ()).throw(AssertionError("_store_df should not be called for spill path")))
|
||||||
|
monkeypatch.setattr(cache_svc, "_redis_delete_df", lambda *_a, **_kw: None)
|
||||||
|
|
||||||
|
def fake_store_spooled_df(namespace, query_id, data, ttl_seconds=None):
|
||||||
|
spool_data[(namespace, query_id)] = data.copy()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def fake_load_spooled_df(namespace, query_id):
|
||||||
|
stored = spool_data.get((namespace, query_id))
|
||||||
|
return stored.copy() if stored is not None else None
|
||||||
|
|
||||||
|
monkeypatch.setattr(cache_svc, "store_spooled_df", fake_store_spooled_df)
|
||||||
|
monkeypatch.setattr(cache_svc, "load_spooled_df", fake_load_spooled_df)
|
||||||
|
|
||||||
|
result = cache_svc.execute_primary_query(
|
||||||
|
mode="date_range",
|
||||||
|
start_date="2025-01-01",
|
||||||
|
end_date="2025-01-31",
|
||||||
|
)
|
||||||
|
|
||||||
|
query_id = result["query_id"]
|
||||||
|
assert result["rows"] == len(df)
|
||||||
|
assert (cache_svc._REDIS_NAMESPACE, query_id) in spool_data
|
||||||
|
|
||||||
|
# Force cache miss for L1/L2 and verify spool fallback serves view/export.
|
||||||
|
cache_svc._dataset_cache.clear()
|
||||||
|
monkeypatch.setattr(cache_svc, "_redis_load_df", lambda _qid: None)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.scrap_reason_exclusion_cache.get_excluded_reasons",
|
||||||
|
lambda: [],
|
||||||
|
)
|
||||||
|
|
||||||
|
view_result = cache_svc.apply_view(query_id=query_id, page=1, per_page=200)
|
||||||
|
assert view_result is not None
|
||||||
|
assert view_result["detail"]["pagination"]["total"] == len(df)
|
||||||
|
|
||||||
|
export_rows = cache_svc.export_csv_from_cache(query_id=query_id)
|
||||||
|
assert export_rows is not None
|
||||||
|
assert len(export_rows) == len(df)
|
||||||
|
|||||||
134
tests/test_resource_dataset_cache.py
Normal file
134
tests/test_resource_dataset_cache.py
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Unit tests for resource_dataset_cache — engine integration (task 7.4)."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from mes_dashboard.services import resource_dataset_cache as cache_svc
|
||||||
|
|
||||||
|
|
||||||
|
class TestResourceEngineDecomposition:
|
||||||
|
"""7.4 — resource-history with long date range triggers engine."""
|
||||||
|
|
||||||
|
def test_long_range_triggers_engine(self, monkeypatch):
|
||||||
|
"""90-day range → engine decomposition activated."""
|
||||||
|
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||||
|
|
||||||
|
engine_calls = {"execute": 0, "merge": 0}
|
||||||
|
|
||||||
|
def fake_execute_plan(chunks, query_fn, **kwargs):
|
||||||
|
engine_calls["execute"] += 1
|
||||||
|
assert len(chunks) == 3 # 90 days / 31 = 3 chunks
|
||||||
|
return kwargs.get("query_hash", "fake_hash")
|
||||||
|
|
||||||
|
result_df = pd.DataFrame({
|
||||||
|
"HISTORYID": [1, 2],
|
||||||
|
"RESOURCEID": ["R1", "R2"],
|
||||||
|
})
|
||||||
|
|
||||||
|
def fake_merge_chunks(prefix, qhash, **kwargs):
|
||||||
|
engine_calls["merge"] += 1
|
||||||
|
return result_df
|
||||||
|
|
||||||
|
monkeypatch.setattr(engine_mod, "execute_plan", fake_execute_plan)
|
||||||
|
monkeypatch.setattr(engine_mod, "merge_chunks", fake_merge_chunks)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._get_cached_df",
|
||||||
|
lambda _: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._store_df",
|
||||||
|
lambda *a, **kw: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._load_sql",
|
||||||
|
lambda name: "SELECT 1 FROM dual",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._get_filtered_resources_and_lookup",
|
||||||
|
lambda **kw: (
|
||||||
|
[{"RESOURCEID": "R1", "RESOURCENAME": "Machine-1"}],
|
||||||
|
{"R1": {"RESOURCENAME": "Machine-1"}},
|
||||||
|
"h.HISTORYID IN (SELECT HISTORYID FROM RESOURCEHISTORY)",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._get_resource_lookup",
|
||||||
|
lambda: {},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._get_workcenter_mapping",
|
||||||
|
lambda: {},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._derive_summary",
|
||||||
|
lambda df, rl, wc, gran: {"total_hours": 100},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._derive_detail",
|
||||||
|
lambda df, rl, wc: {"items": [], "pagination": {"total": 2}},
|
||||||
|
)
|
||||||
|
|
||||||
|
result = cache_svc.execute_primary_query(
|
||||||
|
start_date="2025-01-01",
|
||||||
|
end_date="2025-03-31",
|
||||||
|
workcenter_groups=["WB"],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["execute"] == 1
|
||||||
|
assert engine_calls["merge"] == 1
|
||||||
|
assert result["query_id"] is not None
|
||||||
|
|
||||||
|
def test_short_range_skips_engine(self, monkeypatch):
|
||||||
|
"""30-day range → direct path, no engine."""
|
||||||
|
engine_calls = {"execute": 0}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._get_cached_df",
|
||||||
|
lambda _: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._load_sql",
|
||||||
|
lambda name: "SELECT 1 FROM dual",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache.read_sql_df",
|
||||||
|
lambda sql, params: pd.DataFrame({"HISTORYID": [1]}),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._store_df",
|
||||||
|
lambda *a, **kw: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._get_filtered_resources_and_lookup",
|
||||||
|
lambda **kw: (
|
||||||
|
[{"RESOURCEID": "R1"}],
|
||||||
|
{"R1": {"RESOURCENAME": "Machine-1"}},
|
||||||
|
"h.HISTORYID IN (SELECT HISTORYID FROM RESOURCEHISTORY)",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._get_resource_lookup",
|
||||||
|
lambda: {},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._get_workcenter_mapping",
|
||||||
|
lambda: {},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._derive_summary",
|
||||||
|
lambda df, rl, wc, gran: {},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"mes_dashboard.services.resource_dataset_cache._derive_detail",
|
||||||
|
lambda df, rl, wc: {"items": [], "pagination": {"total": 1}},
|
||||||
|
)
|
||||||
|
|
||||||
|
result = cache_svc.execute_primary_query(
|
||||||
|
start_date="2025-06-01",
|
||||||
|
end_date="2025-06-30",
|
||||||
|
workcenter_groups=["WB"],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert engine_calls["execute"] == 0 # Engine NOT used
|
||||||
Reference in New Issue
Block a user