feat(reject-history): add materialized Pareto aggregate layer with feature-flagged rollout
Pre-compute 6-dimension metric cubes from cached LOT-level DataFrames so interactive Pareto requests read compact snapshots instead of re-scanning detail rows on every filter change. Includes single-flight build guard, TTL/size guardrails, cross-filter exclude-self evaluation, safe legacy fallback, response metadata exposure, telemetry counters, and a 3-stage rollout plan (telemetry-only → build-enabled → read-through). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
51
.env.example
51
.env.example
@@ -348,13 +348,13 @@ REJECT_HISTORY_OPTIONS_CACHE_TTL_SECONDS=14400
|
||||
# ============================================================
|
||||
# Batch chunking controls (for long-range reject queries)
|
||||
REJECT_ENGINE_GRAIN_DAYS=10
|
||||
REJECT_ENGINE_PARALLEL=2
|
||||
REJECT_ENGINE_PARALLEL=1
|
||||
REJECT_ENGINE_MAX_ROWS_PER_CHUNK=50000
|
||||
REJECT_ENGINE_MAX_TOTAL_ROWS=300000
|
||||
REJECT_ENGINE_MAX_TOTAL_ROWS=200000
|
||||
|
||||
# Large result spill controls
|
||||
REJECT_ENGINE_SPILL_ENABLED=true
|
||||
REJECT_ENGINE_MAX_RESULT_MB=64
|
||||
REJECT_ENGINE_MAX_RESULT_MB=48
|
||||
QUERY_SPOOL_DIR=tmp/query_spool
|
||||
REJECT_ENGINE_SPOOL_TTL_SECONDS=21600
|
||||
REJECT_ENGINE_SPOOL_MAX_BYTES=2147483648
|
||||
@@ -362,10 +362,53 @@ REJECT_ENGINE_SPOOL_WARN_RATIO=0.85
|
||||
REJECT_ENGINE_SPOOL_CLEANUP_INTERVAL_SECONDS=300
|
||||
REJECT_ENGINE_SPOOL_ORPHAN_GRACE_SECONDS=600
|
||||
|
||||
# Cache-derived interactive compute guards (view / pareto / export)
|
||||
REJECT_DERIVE_MAX_INPUT_MB=96
|
||||
REJECT_DERIVE_MAX_PROJECTED_RSS_MB=1100
|
||||
REJECT_DERIVE_WORKING_SET_FACTOR=1.8
|
||||
REJECT_DERIVE_FORCE_GC=true
|
||||
|
||||
# ============================================================
|
||||
# Pareto Materialization (Rollout Plan)
|
||||
# ============================================================
|
||||
# Rollout stages:
|
||||
# Stage 1 – Telemetry-only (default):
|
||||
# Both flags false. Module loads, telemetry counters run, but
|
||||
# no snapshots are built or served. Use admin/performance-detail
|
||||
# to verify counters appear.
|
||||
# Stage 2 – Build enabled, read disabled:
|
||||
# PARETO_MATERIALIZATION_ENABLED=true
|
||||
# Snapshots are built on cache miss but NOT served to users.
|
||||
# Monitor build latency, payload size, and hit/miss in telemetry.
|
||||
# Stage 3 – Full read-through:
|
||||
# PARETO_MATERIALIZATION_READ_ENABLED=true
|
||||
# Snapshots are served when available; legacy fallback on miss.
|
||||
# Monitor fallback_reasons and hit_rate.
|
||||
# Rollback:
|
||||
# Set PARETO_MATERIALIZATION_READ_ENABLED=false to immediately
|
||||
# revert to legacy DataFrame compute. No API schema changes.
|
||||
PARETO_MATERIALIZATION_ENABLED=false
|
||||
PARETO_MATERIALIZATION_READ_ENABLED=false
|
||||
|
||||
# Snapshot L1 cache TTL (seconds, default: 600 = 10 min)
|
||||
PARETO_SNAPSHOT_TTL_SECONDS=600
|
||||
|
||||
# Max cube rows before build is rejected (default: 100000)
|
||||
PARETO_SNAPSHOT_MAX_CUBE_ROWS=100000
|
||||
|
||||
# Max serialized payload size in MB (default: 8)
|
||||
PARETO_SNAPSHOT_MAX_PAYLOAD_MB=8
|
||||
|
||||
# L1 cache capacity for snapshots (default: 16)
|
||||
PARETO_SNAPSHOT_L1_MAX_SIZE=16
|
||||
|
||||
# Single-flight build wait timeout (seconds, default: 10)
|
||||
PARETO_BUILD_WAIT_TIMEOUT=10
|
||||
|
||||
# Batch query engine thresholds
|
||||
BATCH_QUERY_TIME_THRESHOLD_DAYS=10
|
||||
BATCH_QUERY_ID_THRESHOLD=1000
|
||||
BATCH_CHUNK_MAX_MEMORY_MB=256
|
||||
BATCH_CHUNK_MAX_MEMORY_MB=192
|
||||
|
||||
# ============================================================
|
||||
# Runtime Resilience Diagnostics Thresholds
|
||||
|
||||
@@ -30,8 +30,8 @@
|
||||
"route": "/reject-history",
|
||||
"name": "報廢歷史查詢",
|
||||
"status": "released",
|
||||
"drawer_id": "drawer-2",
|
||||
"order": 4
|
||||
"drawer_id": "drawer",
|
||||
"order": 1
|
||||
},
|
||||
{
|
||||
"route": "/wip-detail",
|
||||
@@ -83,28 +83,28 @@
|
||||
"name": "設備維修查詢",
|
||||
"status": "released",
|
||||
"drawer_id": "drawer",
|
||||
"order": 1
|
||||
"order": 2
|
||||
},
|
||||
{
|
||||
"route": "/query-tool",
|
||||
"name": "批次追蹤工具",
|
||||
"status": "released",
|
||||
"drawer_id": "drawer",
|
||||
"drawer_id": "drawer-3",
|
||||
"order": 2
|
||||
},
|
||||
{
|
||||
"route": "/mid-section-defect",
|
||||
"name": "製程不良追溯分析",
|
||||
"status": "released",
|
||||
"drawer_id": "drawer",
|
||||
"drawer_id": "drawer-3",
|
||||
"order": 3
|
||||
},
|
||||
{
|
||||
"route": "/material-trace",
|
||||
"name": "原物料追溯查詢",
|
||||
"status": "released",
|
||||
"drawer_id": "drawer",
|
||||
"order": 4
|
||||
"drawer_id": "drawer-3",
|
||||
"order": 3
|
||||
},
|
||||
{
|
||||
"route": "/admin/pages",
|
||||
@@ -152,6 +152,12 @@
|
||||
"name": "歷史報表",
|
||||
"order": 2,
|
||||
"admin_only": false
|
||||
},
|
||||
{
|
||||
"id": "drawer-3",
|
||||
"name": "追溯工具",
|
||||
"order": 4,
|
||||
"admin_only": false
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -110,7 +110,7 @@ onMounted(async () => {
|
||||
<SectionCard>
|
||||
<template #header>
|
||||
<div class="job-query-title-row">
|
||||
<strong>工單結果</strong>
|
||||
<strong>維修紀錄</strong>
|
||||
<span class="job-query-muted">{{ jobs.length }} 筆</span>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
@@ -13,8 +13,8 @@ keepalive = 5 # Keep-alive connections timeout
|
||||
|
||||
# Worker lifecycle management - prevent state accumulation.
|
||||
# Make these configurable so high-load test environments can raise the ceiling.
|
||||
max_requests = int(os.getenv("GUNICORN_MAX_REQUESTS", "5000"))
|
||||
max_requests_jitter = int(os.getenv("GUNICORN_MAX_REQUESTS_JITTER", "500"))
|
||||
max_requests = int(os.getenv("GUNICORN_MAX_REQUESTS", "1200"))
|
||||
max_requests_jitter = int(os.getenv("GUNICORN_MAX_REQUESTS_JITTER", "300"))
|
||||
|
||||
|
||||
# ============================================================
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
schema: spec-driven
|
||||
created: 2026-03-03
|
||||
@@ -0,0 +1,89 @@
|
||||
## Context
|
||||
|
||||
Reject History 目前採兩階段模式:`POST /api/reject-history/query` 先建立 lot-level dataset cache,後續 `reason-pareto` / `batch-pareto` / `view` 反覆在 worker 內以 pandas 對全量明細重算。當使用者在大日期區間下進行多維 Pareto 互相篩選時,會在多 worker 環境放大記憶體壓力,造成高 RSS、回應抖動,甚至 worker 不穩定。
|
||||
|
||||
本變更目標是在不改前端 API contract 的前提下,新增「Pareto 預聚合/物化」層,讓互動式 Pareto 請求改讀聚合快照,而非每次掃描 lot-level 明細。
|
||||
|
||||
## Goals / Non-Goals
|
||||
|
||||
**Goals:**
|
||||
- 讓 `batch-pareto` 與 `reason-pareto`(cache 路徑)優先讀取 materialized Pareto snapshot。
|
||||
- 降低每次互動所需的 worker 計算記憶體與 CPU,避免反覆全量 groupby。
|
||||
- 對 snapshot 建立一致性規則(version/freshness/invalidation),確保不回傳過期或錯配資料。
|
||||
- 補齊可營運觀測:build latency、hit/miss、fallback 原因、snapshot 大小。
|
||||
- 保持回傳 schema 與前端互動語意(cross-filter、top80/top20)相容。
|
||||
|
||||
**Non-Goals:**
|
||||
- 不改 Oracle 明細 SQL 與 primary query 的資料來源。
|
||||
- 不新增「大日期範圍硬拒絕」規則。
|
||||
- 不改前端 Pareto 元件與 URL 參數契約。
|
||||
- 不在此變更導入全新儲存系統(沿用現有 process cache + Redis/spool 生態)。
|
||||
|
||||
## Decisions
|
||||
|
||||
### 1) 新增獨立 Pareto materialization service,與 lot-level cache 解耦
|
||||
- Decision: 新增 `reject_pareto_materialized.py`(名稱可調整)負責 build/read/invalidate snapshot,不把邏輯直接塞進 route。
|
||||
- Why: 可將聚合生命週期、key 策略、遙測統一管理,降低 `reject_dataset_cache.py` 複雜度。
|
||||
- Alternative considered:
|
||||
- 直接在 `compute_batch_pareto()` 內加 dict 快取:容易與 dataset cache 生命週期糾纏,且跨 worker 命中率不足。
|
||||
|
||||
### 2) Snapshot key 綁定 query dataset 與 filter context,並附 schema version
|
||||
- Decision: key 至少包含 `query_id`、policy toggles、supplementary filters、trend_dates hash、materialization schema version。
|
||||
- Why: 避免不同篩選上下文誤用同一 snapshot;schema 變更可強制失效舊資料。
|
||||
- Alternative considered:
|
||||
- 僅用 `query_id`:無法區分補充篩選,容易回傳錯誤 Pareto。
|
||||
|
||||
### 3) Materialized payload 儲存「可交叉運算所需最小聚合」,而非完整明細
|
||||
- Decision: snapshot 儲存 6 維度聚合結果與交叉篩選必要中介結構(以可重建 cross-filter 為原則),不複製 lot-level rows。
|
||||
- Why: 目標是減少記憶體放大,若儲存完整明細會失去價值。
|
||||
- Alternative considered:
|
||||
- 直接存每個 dimension 最終 items:空間小但無法支援任意 cross-filter 重算。
|
||||
|
||||
### 4) Read path 採「materialized 優先 + 安全 fallback」
|
||||
- Decision: `batch-pareto` / `reason-pareto(query_id)` 先讀 snapshot;miss/stale/build-fail 時 fallback 到既有 cache DataFrame 計算,並打 telemetry。
|
||||
- Why: 保留功能可用性與漸進上線,避免一次切換造成功能中斷。
|
||||
- Alternative considered:
|
||||
- snapshot miss 直接錯誤:風險高,對既有使用者不友善。
|
||||
|
||||
### 5) 建立 single-flight build 與容量上限
|
||||
- Decision: 同一 snapshot key 同時間僅允許一個 build,其餘請求等待或讀舊值;並限制單 snapshot size 與總 key TTL。
|
||||
- Why: 避免 thundering herd 與 Redis/spool 不受控成長。
|
||||
- Alternative considered:
|
||||
- 完全不鎖:高併發下會重複建構,放大 CPU/記憶體與 IO。
|
||||
|
||||
### 6) 將 observability 併入既有 cache telemetry 合約
|
||||
- Decision: 在現有 cache observability 結構新增 pareto-materialized 欄位(hit/miss/fallback/build/size/freshness)。
|
||||
- Why: operations 可以在同一入口判斷 cache 問題來源,不需分散查多個 API。
|
||||
- Alternative considered:
|
||||
- 只寫 log:難做趨勢與告警。
|
||||
|
||||
## Risks / Trade-offs
|
||||
|
||||
- [Risk] Snapshot key 組成不完整導致污染(cross-user/filter 汙染)。
|
||||
- Mitigation: key builder 單元測試覆蓋參數排序、空值正規化與 version 隔離。
|
||||
|
||||
- [Risk] 中介聚合結構設計錯誤,cross-filter 結果與舊路徑不一致。
|
||||
- Mitigation: 建立 parity tests,對同一 query_id 比對 materialized 與 legacy 計算結果。
|
||||
|
||||
- [Risk] Fallback 比率長期偏高,materialization 失去效益。
|
||||
- Mitigation: telemetry 強制輸出 fallback reason,設定告警門檻並納入 rollout gate。
|
||||
|
||||
- [Risk] 新增 build 步驟拉長首次 Pareto 回應時間。
|
||||
- Mitigation: 首次 build 可回 legacy 結果並背景填充,或採同步 build 但有 timeout 上限;由配置控制。
|
||||
|
||||
## Migration Plan
|
||||
|
||||
1. 實作 materialization service(key、payload、build/read/invalidate、telemetry)。
|
||||
2. 在 `compute_batch_pareto` 與 `compute_dimension_pareto(query_id path)` 接入 read-through/fallback。
|
||||
3. 加入 parity 測試(legacy vs materialized)與壓力測試(重複 cross-filter 切換)。
|
||||
4. 灰度啟用:先開 telemetry-only / build-disabled,再開 read-through,再提高命中比例。
|
||||
5. 監控 hit ratio、fallback ratio、worker RSS 趨勢;達標後再考慮收斂 legacy 路徑。
|
||||
|
||||
Rollback strategy:
|
||||
- 以 feature flag 關閉 materialized read path,立即回退至現行 DataFrame 計算,不影響 API schema。
|
||||
|
||||
## Open Questions
|
||||
|
||||
- materialized payload 要儲存在 Redis(快速)還是 spool(耐久)為主?是否需要雙層策略。
|
||||
- 首次 build 採同步阻塞還是背景建置(與 UX latency 取捨)。
|
||||
- 長期是否將 materialization 前移至 primary query 完成後立即建立,以換取互動穩定性。
|
||||
@@ -0,0 +1,37 @@
|
||||
## Why
|
||||
|
||||
The reject-history Pareto workflow currently recomputes aggregates from full lot-level cached datasets on every interactive filter change. Under wide date ranges and cross-filtering, this drives repeated high-memory pandas operations and can destabilize workers.
|
||||
|
||||
## What Changes
|
||||
|
||||
- Introduce a materialized Pareto aggregate layer for reject-history that precomputes dimension metrics from cached query datasets.
|
||||
- Serve `/api/reject-history/batch-pareto` and related Pareto reads from pre-aggregated artifacts instead of recomputing from full detail rows each request.
|
||||
- Add freshness/version metadata so aggregate snapshots stay aligned with source query datasets and policy toggles.
|
||||
- Add bounded invalidation and lifecycle rules for aggregate artifacts to avoid stale growth and memory pressure.
|
||||
- Add observability for aggregate build latency, hit ratio, memory footprint, and fallback reasons.
|
||||
- Keep API response schema compatible with existing frontend contracts to avoid UI rewrites.
|
||||
|
||||
## Capabilities
|
||||
|
||||
### New Capabilities
|
||||
- `reject-history-pareto-materialized-aggregate`: Build, store, and read pre-aggregated Pareto data for interactive cross-filter workflows.
|
||||
|
||||
### Modified Capabilities
|
||||
- `reject-history-api`: Route Pareto endpoints to materialized aggregates with cache-consistency and fallback behavior.
|
||||
- `cache-observability-hardening`: Extend telemetry to cover aggregate generation/hit/fallback and memory-guard events.
|
||||
|
||||
## Impact
|
||||
|
||||
- Affected backend code:
|
||||
- `src/mes_dashboard/services/reject_dataset_cache.py`
|
||||
- `src/mes_dashboard/services/reject_history_service.py`
|
||||
- `src/mes_dashboard/routes/reject_history_routes.py`
|
||||
- new aggregate builder/storage modules under `src/mes_dashboard/services/`
|
||||
- Affected APIs:
|
||||
- `/api/reject-history/batch-pareto`
|
||||
- `/api/reject-history/reason-pareto` (cache-backed path)
|
||||
- Affected runtime systems:
|
||||
- Process L1 cache, Redis/L2 cache, spool lifecycle and metrics history
|
||||
- Dependencies/ops:
|
||||
- Additional Redis key space and retention policy tuning for aggregate artifacts
|
||||
- New monitoring/alerts for aggregate freshness and fallback rates
|
||||
@@ -0,0 +1,30 @@
|
||||
## MODIFIED Requirements
|
||||
|
||||
### Requirement: Cache Telemetry MUST be Queryable for Operations
|
||||
The system MUST provide cache telemetry suitable for operations diagnostics, including materialized Pareto cache behavior for reject-history workloads.
|
||||
|
||||
#### Scenario: Telemetry inspection
|
||||
- **WHEN** operators request deep health status
|
||||
- **THEN** cache-related metrics/state SHALL be present and interpretable for troubleshooting
|
||||
|
||||
#### Scenario: Materialized Pareto telemetry visibility
|
||||
- **WHEN** materialized Pareto cache is enabled
|
||||
- **THEN** telemetry SHALL expose at least hit count/rate, miss count/rate, build count, build failure count, and fallback count
|
||||
- **THEN** telemetry SHALL expose latest snapshot freshness indicators and aggregate payload size indicators
|
||||
|
||||
## ADDED Requirements
|
||||
|
||||
### Requirement: Pareto materialization fallback reasons SHALL be operationally classifiable
|
||||
Telemetry MUST classify fallback outcomes with stable reason codes so repeated degradations can be monitored and alerted.
|
||||
|
||||
#### Scenario: Snapshot miss fallback reason
|
||||
- **WHEN** request falls back because no snapshot exists
|
||||
- **THEN** telemetry SHALL record a stable reason code for snapshot miss
|
||||
|
||||
#### Scenario: Snapshot stale fallback reason
|
||||
- **WHEN** request falls back because snapshot fails freshness/version checks
|
||||
- **THEN** telemetry SHALL record a stable reason code for stale/incompatible snapshot
|
||||
|
||||
#### Scenario: Build failure fallback reason
|
||||
- **WHEN** request falls back because materialization build failed
|
||||
- **THEN** telemetry SHALL record a stable reason code for build failure
|
||||
@@ -0,0 +1,57 @@
|
||||
## MODIFIED Requirements
|
||||
|
||||
### Requirement: Reject History API SHALL provide batch Pareto endpoint with cross-filter
|
||||
The API SHALL provide a batch Pareto endpoint that returns all 6 dimension Pareto results in a single response, supporting cross-dimension filtering with exclude-self logic, and SHALL prefer materialized Pareto snapshots over full detail regrouping.
|
||||
|
||||
#### Scenario: Batch Pareto response structure
|
||||
- **WHEN** `GET /api/reject-history/batch-pareto` is called with valid `query_id`
|
||||
- **THEN** response SHALL be `{ success: true, data: { dimensions: { reason: {...}, package: {...}, type: {...}, workflow: {...}, workcenter: {...}, equipment: {...} } } }`
|
||||
- **THEN** each dimension object SHALL include `items` array with schema (`reason`, `metric_value`, `pct`, `cumPct`, `MOVEIN_QTY`, `REJECT_TOTAL_QTY`, `DEFECT_QTY`, `count`)
|
||||
|
||||
#### Scenario: Cross-filter exclude-self logic
|
||||
- **WHEN** `sel_reason=A&sel_type=X` is provided
|
||||
- **THEN** reason Pareto SHALL be computed with type=X filter applied (but NOT reason=A filter)
|
||||
- **THEN** type Pareto SHALL be computed with reason=A filter applied (but NOT type=X filter)
|
||||
- **THEN** package/workflow/workcenter/equipment Paretos SHALL be computed with both reason=A AND type=X filters applied
|
||||
|
||||
#### Scenario: Empty selections return unfiltered Paretos
|
||||
- **WHEN** batch-pareto is called with no `sel_*` parameters
|
||||
- **THEN** all 6 dimensions SHALL return their full Pareto distribution (subject to `pareto_scope`)
|
||||
|
||||
#### Scenario: Cache-only computation
|
||||
- **WHEN** `query_id` does not exist in cache
|
||||
- **THEN** the endpoint SHALL return HTTP 400 with error message indicating cache miss
|
||||
- **THEN** the endpoint SHALL NOT fall back to Oracle query
|
||||
|
||||
#### Scenario: Materialized snapshot preferred
|
||||
- **WHEN** a valid and fresh materialized Pareto snapshot exists for the request context
|
||||
- **THEN** the endpoint SHALL return results from that snapshot
|
||||
- **THEN** the endpoint SHALL avoid full lot-level DataFrame regrouping for the same request
|
||||
|
||||
#### Scenario: Materialized miss fallback behavior
|
||||
- **WHEN** materialized snapshot is unavailable, stale, or build fails
|
||||
- **THEN** the endpoint SHALL fall back to legacy cache DataFrame computation
|
||||
- **THEN** the response schema and filter semantics SHALL remain unchanged
|
||||
|
||||
#### Scenario: Supplementary and policy filters apply
|
||||
- **WHEN** batch-pareto is called with supplementary filters (packages, workcenter_groups, reason) and policy toggles
|
||||
- **THEN** all 6 dimension Paretos SHALL be computed after applying policy and supplementary filters first (before cross-filter)
|
||||
|
||||
#### Scenario: Display scope (TOP20) support
|
||||
- **WHEN** `pareto_display_scope=top20` is provided
|
||||
- **THEN** applicable dimensions (type, workflow, equipment) SHALL truncate results to top 20 items after sorting
|
||||
- **WHEN** `pareto_display_scope` is omitted or `all`
|
||||
- **THEN** all items SHALL be returned (subject to `pareto_scope` filter)
|
||||
|
||||
## ADDED Requirements
|
||||
|
||||
### Requirement: Reject History API SHALL expose materialized Pareto freshness metadata
|
||||
The API SHALL surface stable metadata so operators and clients can identify whether Pareto responses came from materialized snapshots or fallback paths.
|
||||
|
||||
#### Scenario: Materialized hit metadata
|
||||
- **WHEN** batch pareto response is served from materialized snapshot
|
||||
- **THEN** response metadata SHALL indicate materialized source and snapshot freshness/version identifiers
|
||||
|
||||
#### Scenario: Fallback metadata
|
||||
- **WHEN** response uses legacy fallback due to snapshot miss/stale/build failure
|
||||
- **THEN** response metadata SHALL include a stable fallback reason code
|
||||
@@ -0,0 +1,49 @@
|
||||
## ADDED Requirements
|
||||
|
||||
### Requirement: Reject History Pareto materialization SHALL build reusable aggregate snapshots
|
||||
The system SHALL build reusable Pareto aggregate snapshots from cached reject-history query datasets so interactive Pareto requests do not require full lot-level regrouping on every call.
|
||||
|
||||
#### Scenario: Build snapshot from cached dataset
|
||||
- **WHEN** a valid `query_id` has cached reject-history dataset and Pareto data is requested
|
||||
- **THEN** the system SHALL build a materialized snapshot containing the six supported Pareto dimensions (`reason`, `package`, `type`, `workflow`, `workcenter`, `equipment`)
|
||||
- **THEN** the snapshot SHALL include quantities needed to compute `metric_value`, `pct`, `cumPct`, and affected count fields
|
||||
|
||||
#### Scenario: Build skipped for missing dataset cache
|
||||
- **WHEN** the referenced `query_id` dataset is missing or expired
|
||||
- **THEN** snapshot build SHALL NOT proceed
|
||||
- **THEN** the caller SHALL receive a deterministic cache-miss outcome
|
||||
|
||||
### Requirement: Materialized snapshot keys SHALL encode filter identity and schema version
|
||||
The system SHALL key materialized Pareto snapshots by canonical filter identity and schema version to prevent cross-context reuse.
|
||||
|
||||
#### Scenario: Distinct supplementary filters generate distinct snapshots
|
||||
- **WHEN** two requests share the same `query_id` but differ in supplementary filters or policy toggles
|
||||
- **THEN** they SHALL resolve to different materialized snapshot keys
|
||||
|
||||
#### Scenario: Schema version invalidates prior snapshots
|
||||
- **WHEN** materialization schema version is incremented
|
||||
- **THEN** snapshots produced by prior versions SHALL NOT be treated as valid hits
|
||||
|
||||
### Requirement: Materialized snapshots SHALL preserve cross-filter semantics
|
||||
Materialized read paths SHALL produce the same cross-filter behavior as legacy DataFrame-based Pareto computation.
|
||||
|
||||
#### Scenario: Exclude-self behavior parity
|
||||
- **WHEN** `sel_reason=A` and `sel_type=X` are active
|
||||
- **THEN** reason Pareto SHALL be computed with `type=X` applied but without `reason=A` self-filter
|
||||
- **THEN** type Pareto SHALL be computed with `reason=A` applied but without `type=X` self-filter
|
||||
|
||||
#### Scenario: Multi-dimension intersection parity
|
||||
- **WHEN** multiple `sel_*` filters are active across dimensions
|
||||
- **THEN** each non-excluded dimension result SHALL reflect the AND intersection of all other selected dimensions
|
||||
|
||||
### Requirement: Materialized snapshots SHALL enforce bounded lifecycle and capacity
|
||||
Materialized Pareto cache storage SHALL be bounded by TTL and size guardrails to avoid unbounded memory growth.
|
||||
|
||||
#### Scenario: Snapshot expiry follows configured retention
|
||||
- **WHEN** a materialized snapshot exceeds configured TTL
|
||||
- **THEN** it SHALL be treated as expired and SHALL NOT be returned as a cache hit
|
||||
|
||||
#### Scenario: Oversized snapshot handling
|
||||
- **WHEN** a snapshot build exceeds configured snapshot size guardrail
|
||||
- **THEN** the snapshot SHALL be rejected or degraded according to policy
|
||||
- **THEN** the system SHALL record the rejection/degradation reason for operations telemetry
|
||||
@@ -0,0 +1,35 @@
|
||||
## 1. Materialization Service Foundation
|
||||
|
||||
- [x] 1.1 Create a dedicated reject Pareto materialization service module with key builder, payload schema versioning, and read/write interfaces
|
||||
- [x] 1.2 Implement canonical filter-context hashing (policy toggles, supplementary filters, trend dates) for materialized snapshot key isolation
|
||||
- [x] 1.3 Implement single-flight guard for concurrent snapshot builds targeting the same key
|
||||
- [x] 1.4 Add TTL and payload-size guardrails for materialized snapshots with explicit rejection paths
|
||||
|
||||
## 2. Snapshot Build and Compute Path
|
||||
|
||||
- [x] 2.1 Implement snapshot build pipeline from cached reject dataset to six-dimension aggregate structures
|
||||
- [x] 2.2 Implement cross-filter evaluation on materialized structures with exclude-self parity to current batch Pareto behavior
|
||||
- [x] 2.3 Implement `pareto_scope` (`top80`/`all`) and `pareto_display_scope` compatibility on materialized outputs
|
||||
- [x] 2.4 Add deterministic invalidation rules for stale or schema-mismatched snapshots
|
||||
|
||||
## 3. API Integration and Compatibility
|
||||
|
||||
- [x] 3.1 Integrate materialized read-through path into `compute_batch_pareto` and cached `compute_dimension_pareto` flow
|
||||
- [x] 3.2 Implement safe fallback to legacy DataFrame-based compute when snapshot is missing, stale, or build fails
|
||||
- [x] 3.3 Add response metadata fields for materialized source/freshness/version and fallback reason codes without breaking existing payload schema
|
||||
- [x] 3.4 Ensure cache-miss behavior for missing `query_id` remains unchanged (no Oracle fallback)
|
||||
|
||||
## 4. Observability and Operations Signals
|
||||
|
||||
- [x] 4.1 Extend cache telemetry payload to include materialized hit/miss/build/fallback counters and rates
|
||||
- [x] 4.2 Add snapshot freshness and payload-size telemetry fields to deep health diagnostics
|
||||
- [x] 4.3 Emit and document stable fallback reason codes (`miss`, `stale`, `build_failed`, etc.) for alert correlation
|
||||
- [x] 4.4 Add logging hooks for build latency and build failure diagnostics with request/key correlation context
|
||||
|
||||
## 5. Validation, Rollout, and Regression Safety
|
||||
|
||||
- [x] 5.1 Add unit tests for key isolation, schema version invalidation, single-flight behavior, and guardrail enforcement
|
||||
- [x] 5.2 Add parity tests comparing materialized and legacy results across multi-dimension cross-filter scenarios
|
||||
- [x] 5.3 Add route/service tests validating metadata exposure and fallback behavior under snapshot miss/stale/build-failure paths
|
||||
- [x] 5.4 Execute reject-history regression suite and stress checks for repeated Pareto filter toggling to confirm lower worker memory pressure
|
||||
- [x] 5.5 Add feature-flagged rollout plan (telemetry-only -> read-through enabled -> default-on) with rollback switch
|
||||
@@ -1,88 +1,30 @@
|
||||
## Purpose
|
||||
Define stable requirements for cache-observability-hardening.
|
||||
## Requirements
|
||||
### Requirement: Layered Cache SHALL Expose Operational State
|
||||
The route cache implementation SHALL expose layered cache operational state, including mode, freshness, and degradation status.
|
||||
|
||||
#### Scenario: Redis unavailable degradation state
|
||||
- **WHEN** Redis is unavailable
|
||||
- **THEN** health endpoints MUST indicate degraded cache mode while keeping L1 memory cache active
|
||||
## MODIFIED Requirements
|
||||
|
||||
### Requirement: Cache Telemetry MUST be Queryable for Operations
|
||||
The system MUST provide cache telemetry suitable for operations diagnostics.
|
||||
The system MUST provide cache telemetry suitable for operations diagnostics, including materialized Pareto cache behavior for reject-history workloads.
|
||||
|
||||
#### Scenario: Telemetry inspection
|
||||
- **WHEN** operators request deep health status
|
||||
- **THEN** cache-related metrics/state SHALL be present and interpretable for troubleshooting
|
||||
|
||||
### Requirement: Health Endpoints SHALL Expose Pool Saturation and Degradation Reason Codes
|
||||
Operational health endpoints MUST report connection pool saturation indicators and explicit degradation reason codes.
|
||||
#### Scenario: Materialized Pareto telemetry visibility
|
||||
- **WHEN** materialized Pareto cache is enabled
|
||||
- **THEN** telemetry SHALL expose at least hit count/rate, miss count/rate, build count, build failure count, and fallback count
|
||||
- **THEN** telemetry SHALL expose latest snapshot freshness indicators and aggregate payload size indicators
|
||||
|
||||
#### Scenario: Pool saturation observed
|
||||
- **WHEN** checked-out connections and overflow approach configured limits
|
||||
- **THEN** deep health output MUST expose saturation metrics and degraded reason classification
|
||||
## ADDED Requirements
|
||||
|
||||
### Requirement: Degraded Responses MUST Be Correlatable Across API and Health Telemetry
|
||||
Error responses for degraded states SHALL include stable codes that can be mapped to health telemetry and operational dashboards.
|
||||
### Requirement: Pareto materialization fallback reasons SHALL be operationally classifiable
|
||||
Telemetry MUST classify fallback outcomes with stable reason codes so repeated degradations can be monitored and alerted.
|
||||
|
||||
#### Scenario: Degraded API response correlation
|
||||
- **WHEN** an API request fails due to circuit-open or pool-exhausted conditions
|
||||
- **THEN** operators MUST be able to match the response code to current health telemetry state
|
||||
#### Scenario: Snapshot miss fallback reason
|
||||
- **WHEN** request falls back because no snapshot exists
|
||||
- **THEN** telemetry SHALL record a stable reason code for snapshot miss
|
||||
|
||||
### Requirement: Operational Alert Thresholds SHALL Be Explicitly Defined
|
||||
The system MUST define alert thresholds for sustained degraded state, repeated worker recovery, and abnormal retry pressure.
|
||||
|
||||
#### Scenario: Sustained degradation threshold exceeded
|
||||
- **WHEN** degraded status persists beyond configured duration
|
||||
- **THEN** the monitoring contract MUST classify the service as alert-worthy with actionable context
|
||||
|
||||
### Requirement: Cache Telemetry SHALL Include Memory Amplification Signals
|
||||
Operational telemetry MUST expose cache-domain memory usage indicators and representation amplification factors, and MUST differentiate between authoritative data payload and derived/index helper structures.
|
||||
|
||||
#### Scenario: Deep health telemetry request after representation normalization
|
||||
- **WHEN** operators inspect cache telemetry for resource or WIP domains
|
||||
- **THEN** telemetry MUST include per-domain memory footprint, amplification indicators, and enough structure detail to verify that full-record duplication is not reintroduced
|
||||
|
||||
### Requirement: Efficiency Benchmarks SHALL Gate Cache Refactor Rollout
|
||||
Cache/query efficiency changes MUST be validated against baseline latency and memory benchmarks before rollout.
|
||||
|
||||
#### Scenario: Pre-release validation
|
||||
- **WHEN** cache refactor changes are prepared for deployment
|
||||
- **THEN** benchmark results MUST demonstrate no regression beyond configured thresholds for P95 latency and memory usage
|
||||
|
||||
### Requirement: Process-Level Cache SHALL Use Bounded Capacity with Deterministic Eviction
|
||||
Process-level parsed-data caches MUST enforce a configurable maximum key capacity and use deterministic eviction behavior when capacity is exceeded.
|
||||
|
||||
#### Scenario: Cache capacity reached
|
||||
- **WHEN** a new cache entry is inserted and key capacity is at limit
|
||||
- **THEN** cache MUST evict entries according to defined policy before storing the new key
|
||||
|
||||
#### Scenario: Repeated access updates recency
|
||||
- **WHEN** an existing cache key is read or overwritten
|
||||
- **THEN** eviction order MUST reflect recency semantics so hot keys are retained preferentially
|
||||
|
||||
### Requirement: Cache Publish MUST Preserve Previous Readable Snapshot on Failure
|
||||
When refreshing full-table cache payloads, the system MUST avoid exposing partially published states to readers.
|
||||
|
||||
#### Scenario: Publish fails after payload serialization
|
||||
- **WHEN** a cache refresh has prepared new payload but publish operation fails
|
||||
- **THEN** previously published cache keys MUST remain readable and metadata MUST remain consistent with old snapshot
|
||||
|
||||
#### Scenario: Publish succeeds
|
||||
- **WHEN** publish operation completes successfully
|
||||
- **THEN** data payload and metadata keys MUST be visible as one coherent new snapshot
|
||||
|
||||
### Requirement: Process-Level Cache Slow Path SHALL Minimize Lock Hold Time
|
||||
Large payload parsing MUST NOT happen inside long-held process cache locks.
|
||||
|
||||
#### Scenario: Cache miss under concurrent requests
|
||||
- **WHEN** multiple requests hit process cache miss
|
||||
- **THEN** parsing work SHALL happen outside lock-protected mutation section, and lock scope SHALL be limited to consistency check + commit
|
||||
|
||||
### Requirement: Process-Level Cache Policies MUST Stay Consistent Across Services
|
||||
All service-local process caches MUST support bounded capacity with deterministic eviction.
|
||||
|
||||
#### Scenario: Realtime equipment cache growth
|
||||
- **WHEN** realtime equipment process cache reaches configured capacity
|
||||
- **THEN** entries MUST be evicted according to deterministic LRU behavior
|
||||
#### Scenario: Snapshot stale fallback reason
|
||||
- **WHEN** request falls back because snapshot fails freshness/version checks
|
||||
- **THEN** telemetry SHALL record a stable reason code for stale/incompatible snapshot
|
||||
|
||||
#### Scenario: Build failure fallback reason
|
||||
- **WHEN** request falls back because materialization build failed
|
||||
- **THEN** telemetry SHALL record a stable reason code for build failure
|
||||
|
||||
@@ -1,105 +1,12 @@
|
||||
# reject-history-api Specification
|
||||
|
||||
## Purpose
|
||||
TBD - created by archiving change reject-history-query-page. Update Purpose after archive.
|
||||
## Requirements
|
||||
### Requirement: Reject History API SHALL validate required query parameters
|
||||
The API SHALL validate date parameters and basic paging bounds before executing database work.
|
||||
|
||||
#### Scenario: Missing required dates
|
||||
- **WHEN** a reject-history endpoint requiring date range is called without `start_date` or `end_date`
|
||||
- **THEN** the API SHALL return HTTP 400 with a descriptive validation error
|
||||
|
||||
#### Scenario: Invalid date order
|
||||
- **WHEN** `end_date` is earlier than `start_date`
|
||||
- **THEN** the API SHALL return HTTP 400 and SHALL NOT run SQL queries
|
||||
|
||||
#### Scenario: Date range exceeds maximum
|
||||
- **WHEN** the date range between `start_date` and `end_date` exceeds 730 days
|
||||
- **THEN** the API SHALL return HTTP 400 with error message "日期範圍不可超過 730 天"
|
||||
|
||||
### Requirement: Reject History API primary query response SHALL include partial failure metadata
|
||||
The primary query endpoint SHALL include batch execution completeness information in the response `meta` field when chunks fail during batch query execution.
|
||||
|
||||
#### Scenario: Partial failure metadata in response
|
||||
- **WHEN** `POST /api/reject-history/query` completes with some chunks failing
|
||||
- **THEN** the response SHALL include `meta.has_partial_failure: true`
|
||||
- **THEN** the response SHALL include `meta.failed_chunk_count` as a positive integer
|
||||
- **THEN** the response SHALL include `meta.failed_ranges` as an array of `{start, end}` date strings (if available)
|
||||
- **THEN** the HTTP status SHALL still be 200 (data is partially available)
|
||||
|
||||
#### Scenario: No partial failure metadata on full success
|
||||
- **WHEN** `POST /api/reject-history/query` completes with all chunks succeeding
|
||||
- **THEN** the response `meta` SHALL NOT include `has_partial_failure`, `failed_chunk_count`, or `failed_ranges`
|
||||
|
||||
#### Scenario: Partial failure metadata preserved on cache hit
|
||||
- **WHEN** `POST /api/reject-history/query` returns cached data that originally had partial failures
|
||||
- **THEN** the response SHALL include the same `meta.has_partial_failure`, `meta.failed_chunk_count`, and `meta.failed_ranges` as the original response
|
||||
|
||||
### Requirement: Reject History API SHALL provide summary metrics endpoint
|
||||
The API SHALL provide aggregated summary metrics for the selected filter context.
|
||||
|
||||
#### Scenario: Summary response payload
|
||||
- **WHEN** `GET /api/reject-history/summary` is called with valid filters
|
||||
- **THEN** response SHALL be `{ success: true, data: { ... } }`
|
||||
- **THEN** data SHALL include `MOVEIN_QTY`, `REJECT_TOTAL_QTY`, `DEFECT_QTY`, `REJECT_RATE_PCT`, `DEFECT_RATE_PCT`, `REJECT_SHARE_PCT`, `AFFECTED_LOT_COUNT`, and `AFFECTED_WORKORDER_COUNT`
|
||||
|
||||
### Requirement: Reject History API SHALL support yield-exclusion policy toggle
|
||||
The API SHALL support excluding or including policy-marked scrap reasons through a shared query parameter.
|
||||
|
||||
#### Scenario: Default policy mode
|
||||
- **WHEN** reject-history endpoints are called without `include_excluded_scrap`
|
||||
- **THEN** `include_excluded_scrap` SHALL default to `false`
|
||||
- **THEN** rows mapped to `ERP_PJ_WIP_SCRAP_REASONS_EXCLUDE.ENABLE_FLAG='Y'` SHALL be excluded from yield-related calculations
|
||||
|
||||
#### Scenario: Explicitly include policy-marked scrap
|
||||
- **WHEN** `include_excluded_scrap=true` is provided
|
||||
- **THEN** policy-marked rows SHALL be included in summary/trend/pareto/list/export calculations
|
||||
- **THEN** API response `meta` SHALL include the effective `include_excluded_scrap` value
|
||||
|
||||
#### Scenario: Invalid toggle value
|
||||
- **WHEN** `include_excluded_scrap` is not parseable as boolean
|
||||
- **THEN** the API SHALL return HTTP 400 with a descriptive validation error
|
||||
|
||||
### Requirement: Reject History API SHALL provide trend endpoint
|
||||
The API SHALL return time-series trend data for quantity and rate metrics.
|
||||
|
||||
#### Scenario: Trend response structure
|
||||
- **WHEN** `GET /api/reject-history/trend` is called
|
||||
- **THEN** response SHALL be `{ success: true, data: { items: [...] } }`
|
||||
- **THEN** each trend item SHALL contain bucket date, `REJECT_TOTAL_QTY`, `DEFECT_QTY`, `REJECT_RATE_PCT`, and `DEFECT_RATE_PCT`
|
||||
|
||||
#### Scenario: Trend granularity
|
||||
- **WHEN** `granularity` is provided as `day`, `week`, or `month`
|
||||
- **THEN** the API SHALL aggregate by the requested granularity
|
||||
- **THEN** invalid granularity SHALL return HTTP 400
|
||||
|
||||
### Requirement: Reject History API SHALL provide reason Pareto endpoint
|
||||
The API SHALL return sorted reason distribution data with cumulative percentages. The endpoint supports dimension selection via `dimension` parameter for single-dimension queries.
|
||||
|
||||
#### Scenario: Pareto response payload
|
||||
- **WHEN** `GET /api/reject-history/reason-pareto` is called
|
||||
- **THEN** each item SHALL include `reason`, `category`, selected metric value, `pct`, and `cumPct`
|
||||
- **THEN** items SHALL be sorted descending by selected metric
|
||||
|
||||
#### Scenario: Metric mode validation
|
||||
- **WHEN** `metric_mode` is provided
|
||||
- **THEN** accepted values SHALL be `reject_total` or `defect`
|
||||
- **THEN** invalid `metric_mode` SHALL return HTTP 400
|
||||
|
||||
#### Scenario: Dimension selection
|
||||
- **WHEN** `dimension` parameter is provided with a valid value (reason, package, type, workflow, workcenter, equipment)
|
||||
- **THEN** the endpoint SHALL return Pareto data for that dimension
|
||||
- **WHEN** `dimension` is not provided
|
||||
- **THEN** the endpoint SHALL default to `reason`
|
||||
## MODIFIED Requirements
|
||||
|
||||
### Requirement: Reject History API SHALL provide batch Pareto endpoint with cross-filter
|
||||
The API SHALL provide a batch Pareto endpoint that returns all 6 dimension Pareto results in a single response, supporting cross-dimension filtering with exclude-self logic.
|
||||
The API SHALL provide a batch Pareto endpoint that returns all 6 dimension Pareto results in a single response, supporting cross-dimension filtering with exclude-self logic, and SHALL prefer materialized Pareto snapshots over full detail regrouping.
|
||||
|
||||
#### Scenario: Batch Pareto response structure
|
||||
- **WHEN** `GET /api/reject-history/batch-pareto` is called with valid `query_id`
|
||||
- **THEN** response SHALL be `{ success: true, data: { dimensions: { reason: {...}, package: {...}, type: {...}, workflow: {...}, workcenter: {...}, equipment: {...} } } }`
|
||||
- **THEN** each dimension object SHALL include `items` array with same schema as reason-pareto items (`reason`, `metric_value`, `pct`, `cumPct`, `MOVEIN_QTY`, `REJECT_TOTAL_QTY`, `DEFECT_QTY`, `count`)
|
||||
- **THEN** each dimension object SHALL include `items` array with schema (`reason`, `metric_value`, `pct`, `cumPct`, `MOVEIN_QTY`, `REJECT_TOTAL_QTY`, `DEFECT_QTY`, `count`)
|
||||
|
||||
#### Scenario: Cross-filter exclude-self logic
|
||||
- **WHEN** `sel_reason=A&sel_type=X` is provided
|
||||
@@ -109,112 +16,42 @@ The API SHALL provide a batch Pareto endpoint that returns all 6 dimension Paret
|
||||
|
||||
#### Scenario: Empty selections return unfiltered Paretos
|
||||
- **WHEN** batch-pareto is called with no `sel_*` parameters
|
||||
- **THEN** all 6 dimensions SHALL return their full Pareto distribution (same as calling reason-pareto individually with no cross-filter)
|
||||
- **THEN** all 6 dimensions SHALL return their full Pareto distribution (subject to `pareto_scope`)
|
||||
|
||||
#### Scenario: Cache-only computation
|
||||
- **WHEN** `query_id` does not exist in cache
|
||||
- **THEN** the endpoint SHALL return HTTP 400 with error message indicating cache miss
|
||||
- **THEN** the endpoint SHALL NOT fall back to Oracle query
|
||||
|
||||
#### Scenario: Materialized snapshot preferred
|
||||
- **WHEN** a valid and fresh materialized Pareto snapshot exists for the request context
|
||||
- **THEN** the endpoint SHALL return results from that snapshot
|
||||
- **THEN** the endpoint SHALL avoid full lot-level DataFrame regrouping for the same request
|
||||
|
||||
#### Scenario: Materialized miss fallback behavior
|
||||
- **WHEN** materialized snapshot is unavailable, stale, or build fails
|
||||
- **THEN** the endpoint SHALL fall back to legacy cache DataFrame computation
|
||||
- **THEN** the response schema and filter semantics SHALL remain unchanged
|
||||
|
||||
#### Scenario: Supplementary and policy filters apply
|
||||
- **WHEN** batch-pareto is called with supplementary filters (packages, workcenter_groups, reason) and policy toggles
|
||||
- **THEN** all 6 dimension Paretos SHALL be computed after applying policy and supplementary filters first (before cross-filter)
|
||||
|
||||
#### Scenario: Data source is cached DataFrame only
|
||||
- **WHEN** batch-pareto computes dimension Paretos
|
||||
- **THEN** computation SHALL operate on the in-memory cached Pandas DataFrame (populated by the primary query)
|
||||
- **THEN** the endpoint SHALL NOT issue any additional Oracle database queries
|
||||
- **THEN** response time SHALL be sub-100ms since all computation is in-memory
|
||||
|
||||
#### Scenario: Display scope (TOP20) support
|
||||
- **WHEN** `pareto_display_scope=top20` is provided
|
||||
- **THEN** applicable dimensions (type, workflow, equipment) SHALL truncate results to top 20 items after sorting
|
||||
- **WHEN** `pareto_display_scope` is omitted or `all`
|
||||
- **THEN** all items SHALL be returned (subject to pareto_scope 80% filter if active)
|
||||
- **THEN** all items SHALL be returned (subject to `pareto_scope` filter)
|
||||
|
||||
### Requirement: Reject History API SHALL support multi-dimension Pareto selection in view and export
|
||||
The detail view and export endpoints SHALL accept multiple dimension selections simultaneously and apply them with AND logic.
|
||||
## ADDED Requirements
|
||||
|
||||
#### Scenario: Multi-dimension filter on view endpoint
|
||||
- **WHEN** `GET /api/reject-history/view` is called with `sel_reason=A&sel_type=X`
|
||||
- **THEN** returned rows SHALL match reason=A AND type=X (both filters applied simultaneously)
|
||||
### Requirement: Reject History API SHALL expose materialized Pareto freshness metadata
|
||||
The API SHALL surface stable metadata so operators and clients can identify whether Pareto responses came from materialized snapshots or fallback paths.
|
||||
|
||||
#### Scenario: Multi-dimension filter on export endpoint
|
||||
- **WHEN** `GET /api/reject-history/export-cached` is called with `sel_reason=A&sel_type=X`
|
||||
- **THEN** exported CSV SHALL contain only rows matching reason=A AND type=X
|
||||
|
||||
#### Scenario: Backward compatibility with single-dimension params
|
||||
- **WHEN** `pareto_dimension` and `pareto_values` are provided (legacy format)
|
||||
- **THEN** the API SHALL still accept and apply them as before
|
||||
- **WHEN** both `sel_*` params and legacy params are provided
|
||||
- **THEN** `sel_*` params SHALL take precedence
|
||||
|
||||
### Requirement: Reject History API SHALL provide paginated detail endpoint
|
||||
The API SHALL return paginated detailed rows for the selected filter context.
|
||||
|
||||
#### Scenario: List response payload
|
||||
- **WHEN** `GET /api/reject-history/list?page=1&per_page=50` is called
|
||||
- **THEN** response SHALL include `{ items: [...], pagination: { page, perPage, total, totalPages } }`
|
||||
- **THEN** each row SHALL include date, process dimensions, reason fields, `MOVEIN_QTY`, `REJECT_TOTAL_QTY`, `DEFECT_QTY`, and reject component columns
|
||||
|
||||
#### Scenario: Paging bounds
|
||||
- **WHEN** `page < 1`
|
||||
- **THEN** page SHALL be treated as 1
|
||||
- **WHEN** `per_page > 200`
|
||||
- **THEN** `per_page` SHALL be capped at 200
|
||||
|
||||
### Requirement: Reject History API SHALL provide CSV export endpoint
|
||||
The API SHALL provide CSV export using the same filter and metric semantics as list/query APIs.
|
||||
|
||||
#### Scenario: Export payload consistency
|
||||
- **WHEN** `GET /api/reject-history/export` is called with valid filters
|
||||
- **THEN** CSV headers SHALL include both `REJECT_TOTAL_QTY` and `DEFECT_QTY`
|
||||
- **THEN** export rows SHALL follow the same semantic definitions as summary/list endpoints
|
||||
|
||||
#### Scenario: Cached export supports full detail-filter parity
|
||||
- **WHEN** `GET /api/reject-history/export-cached` is called with an existing `query_id`
|
||||
- **THEN** the endpoint SHALL apply primary policy toggles, supplementary filters, trend-date filters, metric filter, and Pareto-selected item filters
|
||||
- **THEN** returned rows SHALL match the same filtered detail dataset semantics used by `GET /api/reject-history/view`
|
||||
|
||||
#### Scenario: CSV encoding and escaping are stable
|
||||
- **WHEN** either export endpoint returns CSV
|
||||
- **THEN** response charset SHALL be `utf-8-sig`
|
||||
- **THEN** values containing commas, quotes, or newlines SHALL be CSV-escaped correctly
|
||||
|
||||
### Requirement: Reject History API SHALL centralize SQL in reject_history SQL directory
|
||||
The service SHALL load SQL from dedicated files under `src/mes_dashboard/sql/reject_history/`.
|
||||
|
||||
#### Scenario: SQL file loading
|
||||
- **WHEN** reject-history service executes queries
|
||||
- **THEN** SQL SHALL be loaded from files in `sql/reject_history`
|
||||
- **THEN** user-supplied filters SHALL be passed through bind parameters
|
||||
- **THEN** user input SHALL NOT be interpolated into SQL strings directly
|
||||
|
||||
### Requirement: Reject History API SHALL use cached exclusion-policy source
|
||||
The API SHALL read exclusion-policy reasons from cached `ERP_PJ_WIP_SCRAP_REASONS_EXCLUDE` data instead of querying Oracle on every request.
|
||||
|
||||
#### Scenario: Enabled exclusions only
|
||||
- **WHEN** exclusion-policy data is loaded
|
||||
- **THEN** only rows with `ENABLE_FLAG='Y'` SHALL be treated as active exclusions
|
||||
|
||||
#### Scenario: Daily full-table cache refresh
|
||||
- **WHEN** exclusion cache is initialized
|
||||
- **THEN** the full table SHALL be loaded and refreshed at least once per 24 hours
|
||||
- **THEN** Redis SHOULD be used as shared cache when available, with in-memory fallback when unavailable
|
||||
|
||||
### Requirement: Reject History API SHALL apply rate limiting on expensive endpoints
|
||||
The API SHALL rate-limit high-cost endpoints to protect Oracle and application resources.
|
||||
|
||||
#### Scenario: List and export rate limiting
|
||||
- **WHEN** `/api/reject-history/list` or `/api/reject-history/export` receives excessive requests
|
||||
- **THEN** configured rate limiting SHALL reject requests beyond the threshold within the time window
|
||||
|
||||
### Requirement: Database query execution path
|
||||
The reject-history service (`reject_history_service.py` and `reject_dataset_cache.py`) SHALL use `read_sql_df_slow` (dedicated connection) instead of `read_sql_df` (pooled connection) for all Oracle queries.
|
||||
|
||||
#### Scenario: Primary query uses dedicated connection
|
||||
- **WHEN** the reject-history primary query is executed
|
||||
- **THEN** it uses `read_sql_df_slow` which creates a dedicated Oracle connection outside the pool
|
||||
- **AND** the connection has a 300-second call_timeout (configurable)
|
||||
- **AND** the connection is subject to the global slow query semaphore
|
||||
#### Scenario: Materialized hit metadata
|
||||
- **WHEN** batch pareto response is served from materialized snapshot
|
||||
- **THEN** response metadata SHALL indicate materialized source and snapshot freshness/version identifiers
|
||||
|
||||
#### Scenario: Fallback metadata
|
||||
- **WHEN** response uses legacy fallback due to snapshot miss/stale/build failure
|
||||
- **THEN** response metadata SHALL include a stable fallback reason code
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
## ADDED Requirements
|
||||
|
||||
### Requirement: Reject History Pareto materialization SHALL build reusable aggregate snapshots
|
||||
The system SHALL build reusable Pareto aggregate snapshots from cached reject-history query datasets so interactive Pareto requests do not require full lot-level regrouping on every call.
|
||||
|
||||
#### Scenario: Build snapshot from cached dataset
|
||||
- **WHEN** a valid `query_id` has cached reject-history dataset and Pareto data is requested
|
||||
- **THEN** the system SHALL build a materialized snapshot containing the six supported Pareto dimensions (`reason`, `package`, `type`, `workflow`, `workcenter`, `equipment`)
|
||||
- **THEN** the snapshot SHALL include quantities needed to compute `metric_value`, `pct`, `cumPct`, and affected count fields
|
||||
|
||||
#### Scenario: Build skipped for missing dataset cache
|
||||
- **WHEN** the referenced `query_id` dataset is missing or expired
|
||||
- **THEN** snapshot build SHALL NOT proceed
|
||||
- **THEN** the caller SHALL receive a deterministic cache-miss outcome
|
||||
|
||||
### Requirement: Materialized snapshot keys SHALL encode filter identity and schema version
|
||||
The system SHALL key materialized Pareto snapshots by canonical filter identity and schema version to prevent cross-context reuse.
|
||||
|
||||
#### Scenario: Distinct supplementary filters generate distinct snapshots
|
||||
- **WHEN** two requests share the same `query_id` but differ in supplementary filters or policy toggles
|
||||
- **THEN** they SHALL resolve to different materialized snapshot keys
|
||||
|
||||
#### Scenario: Schema version invalidates prior snapshots
|
||||
- **WHEN** materialization schema version is incremented
|
||||
- **THEN** snapshots produced by prior versions SHALL NOT be treated as valid hits
|
||||
|
||||
### Requirement: Materialized snapshots SHALL preserve cross-filter semantics
|
||||
Materialized read paths SHALL produce the same cross-filter behavior as legacy DataFrame-based Pareto computation.
|
||||
|
||||
#### Scenario: Exclude-self behavior parity
|
||||
- **WHEN** `sel_reason=A` and `sel_type=X` are active
|
||||
- **THEN** reason Pareto SHALL be computed with `type=X` applied but without `reason=A` self-filter
|
||||
- **THEN** type Pareto SHALL be computed with `reason=A` applied but without `type=X` self-filter
|
||||
|
||||
#### Scenario: Multi-dimension intersection parity
|
||||
- **WHEN** multiple `sel_*` filters are active across dimensions
|
||||
- **THEN** each non-excluded dimension result SHALL reflect the AND intersection of all other selected dimensions
|
||||
|
||||
### Requirement: Materialized snapshots SHALL enforce bounded lifecycle and capacity
|
||||
Materialized Pareto cache storage SHALL be bounded by TTL and size guardrails to avoid unbounded memory growth.
|
||||
|
||||
#### Scenario: Snapshot expiry follows configured retention
|
||||
- **WHEN** a materialized snapshot exceeds configured TTL
|
||||
- **THEN** it SHALL be treated as expired and SHALL NOT be returned as a cache hit
|
||||
|
||||
#### Scenario: Oversized snapshot handling
|
||||
- **WHEN** a snapshot build exceeds configured snapshot size guardrail
|
||||
- **THEN** the snapshot SHALL be rejected or degraded according to policy
|
||||
- **THEN** the system SHALL record the rejection/degradation reason for operations telemetry
|
||||
@@ -364,6 +364,15 @@ def api_performance_detail():
|
||||
"worker_pid": os.getpid(),
|
||||
}
|
||||
|
||||
# ---- Pareto materialization telemetry ----
|
||||
pareto_materialization = None
|
||||
try:
|
||||
from mes_dashboard.services.reject_pareto_materialized import get_telemetry
|
||||
pareto_materialization = get_telemetry()
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to collect pareto materialization telemetry: %s", exc)
|
||||
pareto_materialization = {"error": str(exc)}
|
||||
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"data": {
|
||||
@@ -372,6 +381,7 @@ def api_performance_detail():
|
||||
"route_cache": route_cache,
|
||||
"db_pool": db_pool,
|
||||
"direct_connections": direct_connections,
|
||||
"pareto_materialization": pareto_materialization,
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import date, timedelta
|
||||
from typing import Optional
|
||||
@@ -33,6 +34,7 @@ from mes_dashboard.services.reject_history_service import (
|
||||
)
|
||||
|
||||
reject_history_bp = Blueprint("reject_history", __name__)
|
||||
logger = logging.getLogger("mes_dashboard.reject_history_routes")
|
||||
_REJECT_HISTORY_OPTIONS_CACHE_TTL_SECONDS = int(
|
||||
os.getenv("REJECT_HISTORY_OPTIONS_CACHE_TTL_SECONDS", "14400")
|
||||
)
|
||||
@@ -352,7 +354,8 @@ def api_reject_history_reason_pareto():
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
if result is not None:
|
||||
return jsonify({"success": True, "data": result, "meta": {}})
|
||||
pareto_meta = result.pop("_pareto_meta", None) or {}
|
||||
return jsonify({"success": True, "data": result, "meta": pareto_meta})
|
||||
# Cache expired, fall through to Oracle query
|
||||
|
||||
result = query_dimension_pareto(
|
||||
@@ -378,6 +381,9 @@ def api_reject_history_reason_pareto():
|
||||
return jsonify({"success": True, "data": data, "meta": meta})
|
||||
except ValueError as exc:
|
||||
return jsonify({"success": False, "error": str(exc)}), 400
|
||||
except MemoryError as exc:
|
||||
logger.warning("Reject history reason-pareto memory guard: %s", exc)
|
||||
return jsonify({"success": False, "error": str(exc)}), 400
|
||||
except Exception:
|
||||
return jsonify({"success": False, "error": "查詢柏拉圖資料失敗"}), 500
|
||||
|
||||
@@ -414,7 +420,14 @@ def api_reject_history_batch_pareto():
|
||||
)
|
||||
if result is None:
|
||||
return jsonify({"success": False, "error": "cache_miss"}), 400
|
||||
return jsonify({"success": True, "data": result})
|
||||
pareto_meta = result.pop("_pareto_meta", None)
|
||||
resp: dict = {"success": True, "data": result}
|
||||
if pareto_meta:
|
||||
resp["meta"] = pareto_meta
|
||||
return jsonify(resp)
|
||||
except MemoryError as exc:
|
||||
logger.warning("Reject history batch-pareto memory guard: %s", exc)
|
||||
return jsonify({"success": False, "error": str(exc)}), 400
|
||||
except ValueError as exc:
|
||||
return jsonify({"success": False, "error": str(exc)}), 400
|
||||
except Exception:
|
||||
@@ -640,6 +653,9 @@ def api_reject_history_view():
|
||||
|
||||
return jsonify({"success": True, "data": result})
|
||||
|
||||
except MemoryError as exc:
|
||||
logger.warning("Reject history view memory guard: %s", exc)
|
||||
return jsonify({"success": False, "error": str(exc)}), 400
|
||||
except ValueError as exc:
|
||||
return jsonify({"success": False, "error": str(exc)}), 400
|
||||
except Exception:
|
||||
@@ -706,6 +722,9 @@ def api_reject_history_export_cached():
|
||||
},
|
||||
)
|
||||
|
||||
except MemoryError as exc:
|
||||
logger.warning("Reject history export-cached memory guard: %s", exc)
|
||||
return jsonify({"success": False, "error": str(exc)}), 400
|
||||
except ValueError as exc:
|
||||
return jsonify({"success": False, "error": str(exc)}), 400
|
||||
except Exception:
|
||||
|
||||
@@ -73,7 +73,7 @@ _RETRYABLE_PATTERNS = (
|
||||
# ============================================================
|
||||
|
||||
BATCH_CHUNK_MAX_MEMORY_MB: int = int(
|
||||
os.getenv("BATCH_CHUNK_MAX_MEMORY_MB", "256")
|
||||
os.getenv("BATCH_CHUNK_MAX_MEMORY_MB", "192")
|
||||
)
|
||||
|
||||
BATCH_QUERY_TIME_THRESHOLD_DAYS: int = int(
|
||||
|
||||
@@ -11,6 +11,7 @@ Cache layers:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import gc
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
@@ -67,12 +68,12 @@ _CACHE_MAX_SIZE = 8
|
||||
_REDIS_NAMESPACE = "reject_dataset"
|
||||
_CACHE_SCHEMA_VERSION = 4
|
||||
_REJECT_ENGINE_GRAIN_DAYS = max(1, int(os.getenv("REJECT_ENGINE_GRAIN_DAYS", "10")))
|
||||
_REJECT_ENGINE_PARALLEL = max(1, int(os.getenv("REJECT_ENGINE_PARALLEL", "2")))
|
||||
_REJECT_ENGINE_PARALLEL = max(1, int(os.getenv("REJECT_ENGINE_PARALLEL", "1")))
|
||||
_REJECT_ENGINE_MAX_ROWS_PER_CHUNK = max(
|
||||
1, int(os.getenv("REJECT_ENGINE_MAX_ROWS_PER_CHUNK", "50000"))
|
||||
)
|
||||
_REJECT_ENGINE_MAX_TOTAL_ROWS = max(
|
||||
1, int(os.getenv("REJECT_ENGINE_MAX_TOTAL_ROWS", "300000"))
|
||||
1, int(os.getenv("REJECT_ENGINE_MAX_TOTAL_ROWS", "200000"))
|
||||
)
|
||||
_REJECT_ENGINE_SPILL_ENABLED = os.getenv("REJECT_ENGINE_SPILL_ENABLED", "true").strip().lower() in {
|
||||
"1",
|
||||
@@ -81,11 +82,27 @@ _REJECT_ENGINE_SPILL_ENABLED = os.getenv("REJECT_ENGINE_SPILL_ENABLED", "true").
|
||||
"on",
|
||||
}
|
||||
_REJECT_ENGINE_MAX_RESULT_MB = max(
|
||||
1, int(os.getenv("REJECT_ENGINE_MAX_RESULT_MB", "64"))
|
||||
1, int(os.getenv("REJECT_ENGINE_MAX_RESULT_MB", "48"))
|
||||
)
|
||||
_REJECT_ENGINE_SPOOL_TTL_SECONDS = max(
|
||||
300, int(os.getenv("REJECT_ENGINE_SPOOL_TTL_SECONDS", "21600"))
|
||||
)
|
||||
_REJECT_DERIVE_MAX_INPUT_MB = max(
|
||||
16, int(os.getenv("REJECT_DERIVE_MAX_INPUT_MB", "96"))
|
||||
)
|
||||
_REJECT_DERIVE_MAX_PROJECTED_RSS_MB = max(
|
||||
_REJECT_DERIVE_MAX_INPUT_MB + 64,
|
||||
int(os.getenv("REJECT_DERIVE_MAX_PROJECTED_RSS_MB", "1100")),
|
||||
)
|
||||
_REJECT_DERIVE_WORKING_SET_FACTOR = max(
|
||||
1.0, float(os.getenv("REJECT_DERIVE_WORKING_SET_FACTOR", "1.8"))
|
||||
)
|
||||
_REJECT_DERIVE_FORCE_GC = os.getenv("REJECT_DERIVE_FORCE_GC", "true").strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
_dataset_cache = ProcessLevelCache(ttl_seconds=_CACHE_TTL, max_size=_CACHE_MAX_SIZE)
|
||||
register_process_cache("reject_dataset", _dataset_cache, "Reject Dataset (L1, 15min)")
|
||||
@@ -283,6 +300,78 @@ def _store_query_result(query_id: str, df: pd.DataFrame) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _df_memory_mb(df: pd.DataFrame) -> float:
|
||||
if df is None or df.empty:
|
||||
return 0.0
|
||||
try:
|
||||
return float(df.memory_usage(deep=True).sum()) / (1024 * 1024)
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
|
||||
def _process_rss_mb() -> Optional[float]:
|
||||
try:
|
||||
import psutil # local import: optional runtime dependency
|
||||
except Exception:
|
||||
return None
|
||||
try:
|
||||
process = psutil.Process(os.getpid())
|
||||
return float(process.memory_info().rss) / (1024 * 1024)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _enforce_interactive_memory_guard(df: pd.DataFrame, *, operation: str, query_id: str) -> None:
|
||||
"""Prevent expensive cache-based recomputation from pushing worker memory over limit."""
|
||||
if df is None or df.empty:
|
||||
return
|
||||
|
||||
df_mb = _df_memory_mb(df)
|
||||
if df_mb > float(_REJECT_DERIVE_MAX_INPUT_MB):
|
||||
logger.warning(
|
||||
"Reject %s due to dataset size guard (query_id=%s, df_mb=%.1f, limit_mb=%d)",
|
||||
operation,
|
||||
query_id,
|
||||
df_mb,
|
||||
_REJECT_DERIVE_MAX_INPUT_MB,
|
||||
)
|
||||
raise MemoryError(
|
||||
f"{operation}資料量約 {df_mb:.1f} MB,超過 {_REJECT_DERIVE_MAX_INPUT_MB} MB 上限,請縮小篩選條件後重試"
|
||||
)
|
||||
|
||||
rss_mb = _process_rss_mb()
|
||||
if rss_mb is None:
|
||||
return
|
||||
|
||||
projected_rss_mb = rss_mb + (df_mb * float(_REJECT_DERIVE_WORKING_SET_FACTOR))
|
||||
if projected_rss_mb > float(_REJECT_DERIVE_MAX_PROJECTED_RSS_MB):
|
||||
logger.warning(
|
||||
"Reject %s due to projected RSS guard (query_id=%s, rss_mb=%.1f, df_mb=%.1f, factor=%.2f, projected_mb=%.1f, limit_mb=%d)",
|
||||
operation,
|
||||
query_id,
|
||||
rss_mb,
|
||||
df_mb,
|
||||
_REJECT_DERIVE_WORKING_SET_FACTOR,
|
||||
projected_rss_mb,
|
||||
_REJECT_DERIVE_MAX_PROJECTED_RSS_MB,
|
||||
)
|
||||
raise MemoryError(
|
||||
(
|
||||
f"目前服務記憶體負載較高(RSS {rss_mb:.1f} MB),暫停{operation}計算以保護系統,"
|
||||
"請稍後再試或縮小篩選條件"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _maybe_collect_after_interactive_compute() -> None:
|
||||
if not _REJECT_DERIVE_FORCE_GC:
|
||||
return
|
||||
try:
|
||||
gc.collect()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Container resolution (reuse query_tool_service resolvers)
|
||||
# ============================================================
|
||||
@@ -768,53 +857,57 @@ def apply_view(
|
||||
if df is None:
|
||||
return None
|
||||
|
||||
# Apply policy filters first (cache stores unfiltered data)
|
||||
df = _apply_policy_filters(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
_enforce_interactive_memory_guard(df, operation="視圖查詢", query_id=query_id)
|
||||
try:
|
||||
# Apply policy filters first (cache stores unfiltered data)
|
||||
df = _apply_policy_filters(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
|
||||
filtered = _apply_supplementary_filters(
|
||||
df,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
metric_filter=metric_filter,
|
||||
)
|
||||
filtered = _apply_supplementary_filters(
|
||||
df,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
metric_filter=metric_filter,
|
||||
)
|
||||
|
||||
# Analytics always uses full date range (supplementary-filtered only).
|
||||
# The frontend derives trend from analytics_raw and filters Pareto by
|
||||
# selectedTrendDates client-side.
|
||||
analytics_raw = _derive_analytics_raw(filtered)
|
||||
summary = _derive_summary_from_analytics(analytics_raw)
|
||||
# Analytics always uses full date range (supplementary-filtered only).
|
||||
# The frontend derives trend from analytics_raw and filters Pareto by
|
||||
# selectedTrendDates client-side.
|
||||
analytics_raw = _derive_analytics_raw(filtered)
|
||||
summary = _derive_summary_from_analytics(analytics_raw)
|
||||
|
||||
# Detail list: additionally filter by detail_reason and trend_dates
|
||||
detail_df = filtered
|
||||
if trend_dates:
|
||||
date_set = set(trend_dates)
|
||||
detail_df = detail_df[
|
||||
detail_df["TXN_DAY"].apply(lambda d: _to_date_str(d) in date_set)
|
||||
]
|
||||
if detail_reason:
|
||||
detail_df = detail_df[
|
||||
detail_df["LOSSREASONNAME"].str.strip() == detail_reason.strip()
|
||||
]
|
||||
detail_df = _apply_pareto_selection_filter(
|
||||
detail_df,
|
||||
pareto_dimension=pareto_dimension,
|
||||
pareto_values=pareto_values,
|
||||
pareto_selections=pareto_selections,
|
||||
)
|
||||
# Detail list: additionally filter by detail_reason and trend_dates
|
||||
detail_df = filtered
|
||||
if trend_dates:
|
||||
date_set = set(trend_dates)
|
||||
detail_df = detail_df[
|
||||
detail_df["TXN_DAY"].apply(lambda d: _to_date_str(d) in date_set)
|
||||
]
|
||||
if detail_reason:
|
||||
detail_df = detail_df[
|
||||
detail_df["LOSSREASONNAME"].str.strip() == detail_reason.strip()
|
||||
]
|
||||
detail_df = _apply_pareto_selection_filter(
|
||||
detail_df,
|
||||
pareto_dimension=pareto_dimension,
|
||||
pareto_values=pareto_values,
|
||||
pareto_selections=pareto_selections,
|
||||
)
|
||||
|
||||
detail_page = _paginate_detail(detail_df, page=page, per_page=per_page)
|
||||
detail_page = _paginate_detail(detail_df, page=page, per_page=per_page)
|
||||
|
||||
return {
|
||||
"analytics_raw": analytics_raw,
|
||||
"summary": summary,
|
||||
"detail": detail_page,
|
||||
}
|
||||
return {
|
||||
"analytics_raw": analytics_raw,
|
||||
"summary": summary,
|
||||
"detail": detail_page,
|
||||
}
|
||||
finally:
|
||||
_maybe_collect_after_interactive_compute()
|
||||
|
||||
|
||||
def _apply_supplementary_filters(
|
||||
@@ -1306,7 +1399,11 @@ def compute_dimension_pareto(
|
||||
exclude_material_scrap: bool = True,
|
||||
exclude_pb_diode: bool = True,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Compute dimension pareto from cached DataFrame (no Oracle query)."""
|
||||
"""Compute dimension pareto from cached DataFrame (no Oracle query).
|
||||
|
||||
Prefers materialized Pareto snapshots when available, falling back to
|
||||
legacy DataFrame-based computation on miss/stale/build-failure.
|
||||
"""
|
||||
metric_mode = _normalize_metric_mode(metric_mode)
|
||||
pareto_scope = _normalize_pareto_scope(pareto_scope)
|
||||
dimension = _normalize_text(dimension).lower() or "reason"
|
||||
@@ -1315,55 +1412,86 @@ def compute_dimension_pareto(
|
||||
f"Invalid dimension, supported: {', '.join(sorted(_DIM_TO_DF_COLUMN.keys()))}"
|
||||
)
|
||||
|
||||
# ---- Materialized read-through path ------------------------------------
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
try_materialized_dimension_pareto,
|
||||
)
|
||||
|
||||
mat_result, mat_meta = try_materialized_dimension_pareto(
|
||||
query_id,
|
||||
lambda: _get_cached_df(query_id),
|
||||
dimension=dimension,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
trend_dates=trend_dates,
|
||||
)
|
||||
if mat_result is not None:
|
||||
mat_result["_pareto_meta"] = mat_meta
|
||||
return mat_result
|
||||
|
||||
# ---- Legacy DataFrame-based compute (fallback) -------------------------
|
||||
df = _get_cached_df(query_id)
|
||||
if df is None:
|
||||
return None
|
||||
|
||||
# Keep cache-based pareto behavior aligned with primary/view policy filters.
|
||||
df = _apply_policy_filters(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
if df is None or df.empty:
|
||||
return {"items": [], "dimension": dimension, "metric_mode": metric_mode}
|
||||
|
||||
dim_col = _DIM_TO_DF_COLUMN.get(dimension)
|
||||
if dim_col not in df.columns:
|
||||
return {"items": [], "dimension": dimension, "metric_mode": metric_mode}
|
||||
|
||||
# Apply supplementary filters
|
||||
filtered = _apply_supplementary_filters(
|
||||
df,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
)
|
||||
if filtered is None or filtered.empty:
|
||||
return {"items": [], "dimension": dimension, "metric_mode": metric_mode}
|
||||
|
||||
# Apply trend date filter
|
||||
if trend_dates and "TXN_DAY" in filtered.columns:
|
||||
date_set = set(trend_dates)
|
||||
filtered = filtered[
|
||||
filtered["TXN_DAY"].apply(lambda d: _to_date_str(d) in date_set)
|
||||
]
|
||||
if filtered.empty:
|
||||
_enforce_interactive_memory_guard(df, operation="柏拉圖查詢", query_id=query_id)
|
||||
try:
|
||||
# Keep cache-based pareto behavior aligned with primary/view policy filters.
|
||||
df = _apply_policy_filters(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
if df is None or df.empty:
|
||||
return {"items": [], "dimension": dimension, "metric_mode": metric_mode}
|
||||
|
||||
items = _build_dimension_pareto_items(
|
||||
filtered,
|
||||
dim_col=dim_col,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
)
|
||||
dim_col = _DIM_TO_DF_COLUMN.get(dimension)
|
||||
if dim_col not in df.columns:
|
||||
return {"items": [], "dimension": dimension, "metric_mode": metric_mode}
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"dimension": dimension,
|
||||
"metric_mode": metric_mode,
|
||||
}
|
||||
# Apply supplementary filters
|
||||
filtered = _apply_supplementary_filters(
|
||||
df,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
)
|
||||
if filtered is None or filtered.empty:
|
||||
return {"items": [], "dimension": dimension, "metric_mode": metric_mode}
|
||||
|
||||
# Apply trend date filter
|
||||
if trend_dates and "TXN_DAY" in filtered.columns:
|
||||
date_set = set(trend_dates)
|
||||
filtered = filtered[
|
||||
filtered["TXN_DAY"].apply(lambda d: _to_date_str(d) in date_set)
|
||||
]
|
||||
if filtered.empty:
|
||||
return {"items": [], "dimension": dimension, "metric_mode": metric_mode}
|
||||
|
||||
items = _build_dimension_pareto_items(
|
||||
filtered,
|
||||
dim_col=dim_col,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
)
|
||||
|
||||
result = {
|
||||
"items": items,
|
||||
"dimension": dimension,
|
||||
"metric_mode": metric_mode,
|
||||
}
|
||||
if mat_meta:
|
||||
result["_pareto_meta"] = mat_meta
|
||||
return result
|
||||
finally:
|
||||
_maybe_collect_after_interactive_compute()
|
||||
|
||||
|
||||
def compute_batch_pareto(
|
||||
@@ -1381,74 +1509,110 @@ def compute_batch_pareto(
|
||||
exclude_material_scrap: bool = True,
|
||||
exclude_pb_diode: bool = True,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Compute all six Pareto dimensions from cached DataFrame (no Oracle query)."""
|
||||
"""Compute all six Pareto dimensions from cached DataFrame (no Oracle query).
|
||||
|
||||
Prefers materialized Pareto snapshots when available, falling back to
|
||||
legacy DataFrame-based computation on miss/stale/build-failure.
|
||||
"""
|
||||
metric_mode = _normalize_metric_mode(metric_mode)
|
||||
pareto_scope = _normalize_pareto_scope(pareto_scope)
|
||||
pareto_display_scope = _normalize_pareto_display_scope(pareto_display_scope)
|
||||
normalized_selections = _normalize_pareto_selections(pareto_selections)
|
||||
|
||||
# ---- Materialized read-through path ------------------------------------
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
try_materialized_batch_pareto,
|
||||
)
|
||||
|
||||
mat_result, mat_meta = try_materialized_batch_pareto(
|
||||
query_id,
|
||||
lambda: _get_cached_df(query_id),
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
pareto_display_scope=pareto_display_scope,
|
||||
pareto_selections=normalized_selections,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
trend_dates=trend_dates,
|
||||
)
|
||||
if mat_result is not None:
|
||||
mat_result["_pareto_meta"] = mat_meta
|
||||
return mat_result
|
||||
|
||||
# ---- Legacy DataFrame-based compute (fallback) -------------------------
|
||||
df = _get_cached_df(query_id)
|
||||
if df is None:
|
||||
return None
|
||||
|
||||
df = _apply_policy_filters(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
if df is None or df.empty:
|
||||
return {
|
||||
"dimensions": {
|
||||
dim: {"items": [], "dimension": dim, "metric_mode": metric_mode}
|
||||
for dim in _PARETO_DIMENSIONS
|
||||
}
|
||||
}
|
||||
|
||||
filtered = _apply_supplementary_filters(
|
||||
df,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
)
|
||||
if filtered is None or filtered.empty:
|
||||
return {
|
||||
"dimensions": {
|
||||
dim: {"items": [], "dimension": dim, "metric_mode": metric_mode}
|
||||
for dim in _PARETO_DIMENSIONS
|
||||
}
|
||||
}
|
||||
|
||||
if trend_dates and "TXN_DAY" in filtered.columns:
|
||||
date_set = set(trend_dates)
|
||||
filtered = filtered[
|
||||
filtered["TXN_DAY"].apply(lambda d: _to_date_str(d) in date_set)
|
||||
]
|
||||
|
||||
dimensions: Dict[str, Dict[str, Any]] = {}
|
||||
for dim in _PARETO_DIMENSIONS:
|
||||
dim_col = _DIM_TO_DF_COLUMN.get(dim)
|
||||
dim_df = _apply_cross_filter(filtered, normalized_selections, exclude_dim=dim)
|
||||
items = _build_dimension_pareto_items(
|
||||
dim_df,
|
||||
dim_col=dim_col,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
_enforce_interactive_memory_guard(df, operation="柏拉圖批次查詢", query_id=query_id)
|
||||
try:
|
||||
df = _apply_policy_filters(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
if pareto_display_scope == "top20" and dim in _PARETO_TOP20_DIMENSIONS:
|
||||
items = items[:20]
|
||||
dimensions[dim] = {
|
||||
"items": items,
|
||||
"dimension": dim,
|
||||
"metric_mode": metric_mode,
|
||||
}
|
||||
if df is None or df.empty:
|
||||
return {
|
||||
"dimensions": {
|
||||
dim: {"items": [], "dimension": dim, "metric_mode": metric_mode}
|
||||
for dim in _PARETO_DIMENSIONS
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
"dimensions": dimensions,
|
||||
"metric_mode": metric_mode,
|
||||
"pareto_scope": pareto_scope,
|
||||
"pareto_display_scope": pareto_display_scope,
|
||||
}
|
||||
filtered = _apply_supplementary_filters(
|
||||
df,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
)
|
||||
if filtered is None or filtered.empty:
|
||||
return {
|
||||
"dimensions": {
|
||||
dim: {"items": [], "dimension": dim, "metric_mode": metric_mode}
|
||||
for dim in _PARETO_DIMENSIONS
|
||||
}
|
||||
}
|
||||
|
||||
if trend_dates and "TXN_DAY" in filtered.columns:
|
||||
date_set = set(trend_dates)
|
||||
filtered = filtered[
|
||||
filtered["TXN_DAY"].apply(lambda d: _to_date_str(d) in date_set)
|
||||
]
|
||||
|
||||
dimensions: Dict[str, Dict[str, Any]] = {}
|
||||
for dim in _PARETO_DIMENSIONS:
|
||||
dim_col = _DIM_TO_DF_COLUMN.get(dim)
|
||||
dim_df = _apply_cross_filter(filtered, normalized_selections, exclude_dim=dim)
|
||||
items = _build_dimension_pareto_items(
|
||||
dim_df,
|
||||
dim_col=dim_col,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
)
|
||||
if pareto_display_scope == "top20" and dim in _PARETO_TOP20_DIMENSIONS:
|
||||
items = items[:20]
|
||||
dimensions[dim] = {
|
||||
"items": items,
|
||||
"dimension": dim,
|
||||
"metric_mode": metric_mode,
|
||||
}
|
||||
|
||||
result = {
|
||||
"dimensions": dimensions,
|
||||
"metric_mode": metric_mode,
|
||||
"pareto_scope": pareto_scope,
|
||||
"pareto_display_scope": pareto_display_scope,
|
||||
}
|
||||
if mat_meta:
|
||||
result["_pareto_meta"] = mat_meta
|
||||
return result
|
||||
finally:
|
||||
_maybe_collect_after_interactive_compute()
|
||||
|
||||
|
||||
# ============================================================
|
||||
@@ -1477,63 +1641,67 @@ def export_csv_from_cache(
|
||||
if df is None:
|
||||
return None
|
||||
|
||||
df = _apply_policy_filters(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
|
||||
filtered = _apply_supplementary_filters(
|
||||
df,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
metric_filter=metric_filter,
|
||||
)
|
||||
|
||||
if trend_dates:
|
||||
date_set = set(trend_dates)
|
||||
filtered = filtered[
|
||||
filtered["TXN_DAY"].apply(lambda d: _to_date_str(d) in date_set)
|
||||
]
|
||||
if detail_reason and "LOSSREASONNAME" in filtered.columns:
|
||||
filtered = filtered[
|
||||
filtered["LOSSREASONNAME"].str.strip() == detail_reason.strip()
|
||||
]
|
||||
filtered = _apply_pareto_selection_filter(
|
||||
filtered,
|
||||
pareto_dimension=pareto_dimension,
|
||||
pareto_values=pareto_values,
|
||||
pareto_selections=pareto_selections,
|
||||
)
|
||||
|
||||
rows = []
|
||||
for _, row in filtered.iterrows():
|
||||
rows.append(
|
||||
{
|
||||
"LOT": _normalize_text(row.get("CONTAINERNAME")),
|
||||
"WORKCENTER": _normalize_text(row.get("WORKCENTERNAME")),
|
||||
"WORKCENTER_GROUP": _normalize_text(row.get("WORKCENTER_GROUP")),
|
||||
"Package": _normalize_text(row.get("PRODUCTLINENAME")),
|
||||
"FUNCTION": _normalize_text(row.get("PJ_FUNCTION")),
|
||||
"TYPE": _normalize_text(row.get("PJ_TYPE")),
|
||||
"WORKFLOW": _normalize_text(row.get("WORKFLOWNAME")),
|
||||
"PRODUCT": _normalize_text(row.get("PRODUCTNAME")),
|
||||
"原因": _normalize_text(row.get("LOSSREASONNAME")),
|
||||
"EQUIPMENT": _normalize_text(row.get("EQUIPMENTNAME")),
|
||||
"COMMENT": _normalize_text(row.get("REJECTCOMMENT")),
|
||||
"SPEC": _normalize_text(row.get("SPECNAME")),
|
||||
"REJECT_QTY": _as_int(row.get("REJECT_QTY")),
|
||||
"STANDBY_QTY": _as_int(row.get("STANDBY_QTY")),
|
||||
"QTYTOPROCESS_QTY": _as_int(row.get("QTYTOPROCESS_QTY")),
|
||||
"INPROCESS_QTY": _as_int(row.get("INPROCESS_QTY")),
|
||||
"PROCESSED_QTY": _as_int(row.get("PROCESSED_QTY")),
|
||||
"扣帳報廢量": _as_int(row.get("REJECT_TOTAL_QTY")),
|
||||
"不扣帳報廢量": _as_int(row.get("DEFECT_QTY")),
|
||||
"MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
|
||||
"報廢時間": _to_datetime_str(row.get("TXN_TIME")),
|
||||
"日期": _to_date_str(row.get("TXN_DAY")),
|
||||
}
|
||||
_enforce_interactive_memory_guard(df, operation="CSV匯出", query_id=query_id)
|
||||
try:
|
||||
df = _apply_policy_filters(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
return rows
|
||||
|
||||
filtered = _apply_supplementary_filters(
|
||||
df,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
metric_filter=metric_filter,
|
||||
)
|
||||
|
||||
if trend_dates:
|
||||
date_set = set(trend_dates)
|
||||
filtered = filtered[
|
||||
filtered["TXN_DAY"].apply(lambda d: _to_date_str(d) in date_set)
|
||||
]
|
||||
if detail_reason and "LOSSREASONNAME" in filtered.columns:
|
||||
filtered = filtered[
|
||||
filtered["LOSSREASONNAME"].str.strip() == detail_reason.strip()
|
||||
]
|
||||
filtered = _apply_pareto_selection_filter(
|
||||
filtered,
|
||||
pareto_dimension=pareto_dimension,
|
||||
pareto_values=pareto_values,
|
||||
pareto_selections=pareto_selections,
|
||||
)
|
||||
|
||||
rows = []
|
||||
for _, row in filtered.iterrows():
|
||||
rows.append(
|
||||
{
|
||||
"LOT": _normalize_text(row.get("CONTAINERNAME")),
|
||||
"WORKCENTER": _normalize_text(row.get("WORKCENTERNAME")),
|
||||
"WORKCENTER_GROUP": _normalize_text(row.get("WORKCENTER_GROUP")),
|
||||
"Package": _normalize_text(row.get("PRODUCTLINENAME")),
|
||||
"FUNCTION": _normalize_text(row.get("PJ_FUNCTION")),
|
||||
"TYPE": _normalize_text(row.get("PJ_TYPE")),
|
||||
"WORKFLOW": _normalize_text(row.get("WORKFLOWNAME")),
|
||||
"PRODUCT": _normalize_text(row.get("PRODUCTNAME")),
|
||||
"原因": _normalize_text(row.get("LOSSREASONNAME")),
|
||||
"EQUIPMENT": _normalize_text(row.get("EQUIPMENTNAME")),
|
||||
"COMMENT": _normalize_text(row.get("REJECTCOMMENT")),
|
||||
"SPEC": _normalize_text(row.get("SPECNAME")),
|
||||
"REJECT_QTY": _as_int(row.get("REJECT_QTY")),
|
||||
"STANDBY_QTY": _as_int(row.get("STANDBY_QTY")),
|
||||
"QTYTOPROCESS_QTY": _as_int(row.get("QTYTOPROCESS_QTY")),
|
||||
"INPROCESS_QTY": _as_int(row.get("INPROCESS_QTY")),
|
||||
"PROCESSED_QTY": _as_int(row.get("PROCESSED_QTY")),
|
||||
"扣帳報廢量": _as_int(row.get("REJECT_TOTAL_QTY")),
|
||||
"不扣帳報廢量": _as_int(row.get("DEFECT_QTY")),
|
||||
"MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
|
||||
"報廢時間": _to_datetime_str(row.get("TXN_TIME")),
|
||||
"日期": _to_date_str(row.get("TXN_DAY")),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
finally:
|
||||
_maybe_collect_after_interactive_compute()
|
||||
|
||||
809
src/mes_dashboard/services/reject_pareto_materialized.py
Normal file
809
src/mes_dashboard/services/reject_pareto_materialized.py
Normal file
@@ -0,0 +1,809 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Reject-history Pareto materialized aggregate layer.
|
||||
|
||||
Pre-computes dimension aggregates from cached LOT-level DataFrames so that
|
||||
interactive Pareto requests read from compact snapshots instead of re-scanning
|
||||
detail rows on every filter change.
|
||||
|
||||
Snapshot lifecycle:
|
||||
build_snapshot() → creates 6-dim metric cube from filtered DataFrame
|
||||
store_snapshot() → writes to L1 process cache (keyed by filter context)
|
||||
read_snapshot() → reads from L1 with freshness/version validation
|
||||
evaluate() → runs cross-filter + scope on the in-memory cube
|
||||
|
||||
Feature flags (env):
|
||||
PARETO_MATERIALIZATION_ENABLED – allow building snapshots (default: false)
|
||||
PARETO_MATERIALIZATION_READ_ENABLED – serve from snapshots (default: false)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from mes_dashboard.core.feature_flags import resolve_bool_flag
|
||||
|
||||
logger = logging.getLogger("mes_dashboard.reject_pareto_materialized")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema & configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
_SNAPSHOT_TTL_SECONDS = max(60, int(os.getenv("PARETO_SNAPSHOT_TTL_SECONDS", "600")))
|
||||
_SNAPSHOT_MAX_CUBE_ROWS = max(100, int(os.getenv("PARETO_SNAPSHOT_MAX_CUBE_ROWS", "100000")))
|
||||
_SNAPSHOT_MAX_PAYLOAD_BYTES = max(
|
||||
1024 * 1024,
|
||||
int(float(os.getenv("PARETO_SNAPSHOT_MAX_PAYLOAD_MB", "8")) * 1024 * 1024),
|
||||
)
|
||||
_SNAPSHOT_L1_MAX_SIZE = max(1, int(os.getenv("PARETO_SNAPSHOT_L1_MAX_SIZE", "16")))
|
||||
|
||||
# Single-flight build timeout: how long a waiter will block for a concurrent build
|
||||
_BUILD_WAIT_TIMEOUT_SECONDS = float(os.getenv("PARETO_BUILD_WAIT_TIMEOUT", "10"))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Feature flags (evaluated once at import; restart to change)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MATERIALIZATION_ENABLED = resolve_bool_flag(
|
||||
"PARETO_MATERIALIZATION_ENABLED", default=False,
|
||||
)
|
||||
MATERIALIZATION_READ_ENABLED = resolve_bool_flag(
|
||||
"PARETO_MATERIALIZATION_READ_ENABLED", default=False,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# L1 snapshot cache
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
from mes_dashboard.core.cache import ProcessLevelCache, register_process_cache # noqa: E402
|
||||
|
||||
_snapshot_cache = ProcessLevelCache(
|
||||
ttl_seconds=_SNAPSHOT_TTL_SECONDS,
|
||||
max_size=_SNAPSHOT_L1_MAX_SIZE,
|
||||
)
|
||||
register_process_cache("pareto_snapshot", _snapshot_cache, "Pareto Snapshot (L1)")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Telemetry counters (thread-safe)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _Telemetry:
|
||||
"""In-process counters for materialized Pareto cache behaviour."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._lock = threading.Lock()
|
||||
self.hit = 0
|
||||
self.miss = 0
|
||||
self.build = 0
|
||||
self.build_ok = 0
|
||||
self.build_fail = 0
|
||||
self.fallback = 0
|
||||
self.fallback_reasons: Dict[str, int] = {}
|
||||
self.rejected_oversize = 0
|
||||
self.last_build_latency: Optional[float] = None
|
||||
self.last_snapshot_payload_bytes: Optional[int] = None
|
||||
self.last_snapshot_built_at: Optional[float] = None
|
||||
|
||||
# -- recording helpers ---------------------------------------------------
|
||||
|
||||
def record_hit(self) -> None:
|
||||
with self._lock:
|
||||
self.hit += 1
|
||||
|
||||
def record_miss(self) -> None:
|
||||
with self._lock:
|
||||
self.miss += 1
|
||||
|
||||
def record_build_start(self) -> None:
|
||||
with self._lock:
|
||||
self.build += 1
|
||||
|
||||
def record_build_ok(self, latency: float, payload_bytes: int) -> None:
|
||||
with self._lock:
|
||||
self.build_ok += 1
|
||||
self.last_build_latency = latency
|
||||
self.last_snapshot_payload_bytes = payload_bytes
|
||||
self.last_snapshot_built_at = time.time()
|
||||
|
||||
def record_build_fail(self) -> None:
|
||||
with self._lock:
|
||||
self.build_fail += 1
|
||||
|
||||
def record_fallback(self, reason: str) -> None:
|
||||
with self._lock:
|
||||
self.fallback += 1
|
||||
self.fallback_reasons[reason] = self.fallback_reasons.get(reason, 0) + 1
|
||||
|
||||
def record_rejected_oversize(self) -> None:
|
||||
with self._lock:
|
||||
self.rejected_oversize += 1
|
||||
|
||||
# -- snapshot for telemetry API ------------------------------------------
|
||||
|
||||
def snapshot(self) -> Dict[str, Any]:
|
||||
with self._lock:
|
||||
total_reads = self.hit + self.miss
|
||||
return {
|
||||
"hit": self.hit,
|
||||
"miss": self.miss,
|
||||
"hit_rate": round(self.hit / total_reads, 4) if total_reads else 0,
|
||||
"miss_rate": round(self.miss / total_reads, 4) if total_reads else 0,
|
||||
"build": self.build,
|
||||
"build_ok": self.build_ok,
|
||||
"build_fail": self.build_fail,
|
||||
"fallback": self.fallback,
|
||||
"fallback_reasons": dict(self.fallback_reasons),
|
||||
"rejected_oversize": self.rejected_oversize,
|
||||
"last_build_latency_s": self.last_build_latency,
|
||||
"last_snapshot_payload_bytes": self.last_snapshot_payload_bytes,
|
||||
"last_snapshot_built_at": self.last_snapshot_built_at,
|
||||
}
|
||||
|
||||
|
||||
_telemetry = _Telemetry()
|
||||
|
||||
|
||||
def get_telemetry() -> Dict[str, Any]:
|
||||
"""Return current materialization telemetry for operations diagnostics."""
|
||||
return _telemetry.snapshot()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fallback reason codes (stable, for alert correlation)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
FALLBACK_MISS = "miss"
|
||||
FALLBACK_STALE = "stale"
|
||||
FALLBACK_VERSION_MISMATCH = "version_mismatch"
|
||||
FALLBACK_BUILD_FAILED = "build_failed"
|
||||
FALLBACK_BUILD_TIMEOUT = "build_timeout"
|
||||
FALLBACK_DISABLED = "disabled"
|
||||
FALLBACK_OVERSIZE = "oversize"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Single-flight guard
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_building_events: Dict[str, threading.Event] = {}
|
||||
_building_lock = threading.Lock()
|
||||
|
||||
|
||||
def _acquire_build(key: str) -> Tuple[bool, Optional[threading.Event]]:
|
||||
"""Try to become the builder for *key*.
|
||||
|
||||
Returns (is_builder, event).
|
||||
- (True, event): caller should build, then call event.set().
|
||||
- (False, event): another thread is building; caller should event.wait().
|
||||
"""
|
||||
with _building_lock:
|
||||
existing = _building_events.get(key)
|
||||
if existing is not None:
|
||||
return False, existing
|
||||
event = threading.Event()
|
||||
_building_events[key] = event
|
||||
return True, event
|
||||
|
||||
|
||||
def _release_build(key: str) -> None:
|
||||
"""Signal that the build for *key* is done (success or failure)."""
|
||||
with _building_lock:
|
||||
event = _building_events.pop(key, None)
|
||||
if event is not None:
|
||||
event.set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Key builder
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_snapshot_key(
|
||||
query_id: str,
|
||||
*,
|
||||
include_excluded_scrap: bool = False,
|
||||
exclude_material_scrap: bool = True,
|
||||
exclude_pb_diode: bool = True,
|
||||
packages: Optional[List[str]] = None,
|
||||
workcenter_groups: Optional[List[str]] = None,
|
||||
reasons: Optional[List[str]] = None,
|
||||
trend_dates: Optional[List[str]] = None,
|
||||
) -> str:
|
||||
"""Build a canonical snapshot key encoding query identity and filter context.
|
||||
|
||||
Key structure: ``pareto_mat:<query_id>:<context_hash>:v<schema_version>``
|
||||
|
||||
The *context_hash* is a truncated SHA-256 of a JSON object that captures
|
||||
policy toggles, supplementary filters, trend dates, and schema version so
|
||||
that different filter contexts never collide.
|
||||
"""
|
||||
context = {
|
||||
"qid": query_id,
|
||||
"ies": bool(include_excluded_scrap),
|
||||
"ems": bool(exclude_material_scrap),
|
||||
"epd": bool(exclude_pb_diode),
|
||||
"pkg": sorted(packages) if packages else None,
|
||||
"wcg": sorted(workcenter_groups) if workcenter_groups else None,
|
||||
"rsn": sorted(reasons) if reasons else None,
|
||||
"td": sorted(trend_dates) if trend_dates else None,
|
||||
"sv": SCHEMA_VERSION,
|
||||
}
|
||||
raw = json.dumps(context, sort_keys=True, separators=(",", ":"))
|
||||
digest = hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||
return f"pareto_mat:{query_id}:{digest}:v{SCHEMA_VERSION}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Snapshot read / write / validate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def read_snapshot(key: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
|
||||
"""Read a snapshot from L1 cache, validating freshness and version.
|
||||
|
||||
Returns ``(snapshot, None)`` on hit, or ``(None, fallback_reason)`` on miss.
|
||||
"""
|
||||
payload = _snapshot_cache.get(key)
|
||||
if payload is None:
|
||||
return None, FALLBACK_MISS
|
||||
|
||||
# Version check
|
||||
if payload.get("schema_version") != SCHEMA_VERSION:
|
||||
_snapshot_cache.invalidate(key)
|
||||
return None, FALLBACK_VERSION_MISMATCH
|
||||
|
||||
# Freshness check (belt-and-suspenders; ProcessLevelCache also has TTL)
|
||||
built_at = payload.get("built_at", 0)
|
||||
if time.time() - built_at > _SNAPSHOT_TTL_SECONDS:
|
||||
_snapshot_cache.invalidate(key)
|
||||
return None, FALLBACK_STALE
|
||||
|
||||
return payload, None
|
||||
|
||||
|
||||
def store_snapshot(key: str, snapshot: Dict[str, Any]) -> bool:
|
||||
"""Write a validated snapshot to L1 cache.
|
||||
|
||||
Returns False and logs a warning if the payload exceeds size guardrails.
|
||||
"""
|
||||
# Payload-size guardrail
|
||||
try:
|
||||
payload_bytes = len(json.dumps(snapshot, separators=(",", ":")).encode())
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Snapshot serialization failed for key=%s", key)
|
||||
return False
|
||||
|
||||
if payload_bytes > _SNAPSHOT_MAX_PAYLOAD_BYTES:
|
||||
logger.warning(
|
||||
"Snapshot payload exceeds guardrail (key=%s, bytes=%d, limit=%d) – rejected",
|
||||
key, payload_bytes, _SNAPSHOT_MAX_PAYLOAD_BYTES,
|
||||
)
|
||||
_telemetry.record_rejected_oversize()
|
||||
return False
|
||||
|
||||
snapshot["_payload_bytes"] = payload_bytes
|
||||
_snapshot_cache.set(key, snapshot)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Snapshot build
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _empty_snapshot() -> Dict[str, Any]:
|
||||
return {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"built_at": time.time(),
|
||||
"cube": [],
|
||||
"dim_columns": {},
|
||||
}
|
||||
|
||||
|
||||
def build_snapshot(
|
||||
df, # pd.DataFrame – imported lazily to keep module importable w/o pandas
|
||||
*,
|
||||
include_excluded_scrap: bool = False,
|
||||
exclude_material_scrap: bool = True,
|
||||
exclude_pb_diode: bool = True,
|
||||
packages: Optional[List[str]] = None,
|
||||
workcenter_groups: Optional[List[str]] = None,
|
||||
reasons: Optional[List[str]] = None,
|
||||
trend_dates: Optional[List[str]] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Build a materialized Pareto snapshot from a cached reject DataFrame.
|
||||
|
||||
Returns the snapshot dict on success, ``None`` if the DataFrame is too
|
||||
large for materialization (size guardrail), or an empty snapshot when
|
||||
filtering yields zero rows.
|
||||
"""
|
||||
import pandas as pd # local import: keep module bootstrap light
|
||||
|
||||
from mes_dashboard.services.reject_dataset_cache import (
|
||||
_DIM_TO_DF_COLUMN,
|
||||
_apply_policy_filters,
|
||||
_apply_supplementary_filters,
|
||||
_normalize_text,
|
||||
_to_date_str,
|
||||
)
|
||||
|
||||
if df is None or (hasattr(df, "empty") and df.empty):
|
||||
return None
|
||||
|
||||
# ---- 1. Apply policy filters ------------------------------------------
|
||||
filtered = _apply_policy_filters(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
)
|
||||
if filtered is None or filtered.empty:
|
||||
return _empty_snapshot()
|
||||
|
||||
# ---- 2. Apply supplementary filters -----------------------------------
|
||||
filtered = _apply_supplementary_filters(
|
||||
filtered,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
)
|
||||
if filtered is None or filtered.empty:
|
||||
return _empty_snapshot()
|
||||
|
||||
# ---- 3. Apply trend date filter ---------------------------------------
|
||||
if trend_dates and "TXN_DAY" in filtered.columns:
|
||||
date_set = set(trend_dates)
|
||||
filtered = filtered[
|
||||
filtered["TXN_DAY"].apply(lambda d: _to_date_str(d) in date_set)
|
||||
]
|
||||
if filtered.empty:
|
||||
return _empty_snapshot()
|
||||
|
||||
# ---- 4. Build the 6-dimension metric cube -----------------------------
|
||||
dim_cols = [col for dim, col in _DIM_TO_DF_COLUMN.items() if col in filtered.columns]
|
||||
metric_cols = [c for c in ("REJECT_TOTAL_QTY", "DEFECT_QTY", "MOVEIN_QTY") if c in filtered.columns]
|
||||
|
||||
if not dim_cols or not metric_cols:
|
||||
return _empty_snapshot()
|
||||
|
||||
# Normalize dimension values (align with _build_dimension_pareto_items)
|
||||
work = filtered.copy()
|
||||
for col in dim_cols:
|
||||
work[col] = work[col].apply(lambda v: _normalize_text(v) or "(未知)")
|
||||
|
||||
# Aggregate: groupby all 6 dims → sum metrics + count unique lots
|
||||
agg_spec: Dict[str, Any] = {col: "sum" for col in metric_cols}
|
||||
has_container = "CONTAINERID" in work.columns
|
||||
if has_container:
|
||||
agg_spec["CONTAINERID"] = "nunique"
|
||||
|
||||
cube_df = work.groupby(dim_cols, sort=False).agg(agg_spec).reset_index()
|
||||
|
||||
if has_container:
|
||||
cube_df = cube_df.rename(columns={"CONTAINERID": "lot_count"})
|
||||
else:
|
||||
cube_df["lot_count"] = 0
|
||||
|
||||
# ---- 5. Size guardrail ------------------------------------------------
|
||||
if len(cube_df) > _SNAPSHOT_MAX_CUBE_ROWS:
|
||||
logger.warning(
|
||||
"Snapshot cube exceeds row guardrail (rows=%d, limit=%d) – rejected",
|
||||
len(cube_df), _SNAPSHOT_MAX_CUBE_ROWS,
|
||||
)
|
||||
_telemetry.record_rejected_oversize()
|
||||
return None
|
||||
|
||||
# Build dim name → column mapping (used at evaluation time)
|
||||
dim_columns = {dim: col for dim, col in _DIM_TO_DF_COLUMN.items() if col in dim_cols}
|
||||
|
||||
# Convert to list-of-dicts for JSON-safe storage
|
||||
cube_rows = cube_df.to_dict("records")
|
||||
|
||||
return {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"built_at": time.time(),
|
||||
"cube": cube_rows,
|
||||
"dim_columns": dim_columns,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cross-filter evaluation on the materialized cube
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Dimensions that support top-20 display truncation
|
||||
_PARETO_TOP20_DIMENSIONS = {"type", "workflow", "equipment"}
|
||||
|
||||
|
||||
def evaluate(
|
||||
snapshot: Dict[str, Any],
|
||||
*,
|
||||
metric_mode: str = "reject_total",
|
||||
pareto_scope: str = "top80",
|
||||
pareto_display_scope: str = "all",
|
||||
pareto_selections: Optional[Dict[str, List[str]]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Evaluate batch Pareto from a materialized snapshot.
|
||||
|
||||
Runs cross-filter (exclude-self) on the in-memory cube, computes
|
||||
metrics, pct, cumPct, and applies scope truncation.
|
||||
|
||||
Returns the same ``{"dimensions": {...}, ...}`` structure as the legacy
|
||||
``compute_batch_pareto`` function.
|
||||
"""
|
||||
from mes_dashboard.services.reject_history_service import _as_float, _as_int
|
||||
|
||||
cube = snapshot.get("cube", [])
|
||||
dim_columns = snapshot.get("dim_columns", {})
|
||||
selections = pareto_selections or {}
|
||||
|
||||
metric_col = "DEFECT_QTY" if metric_mode == "defect" else "REJECT_TOTAL_QTY"
|
||||
|
||||
dimensions: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
for dim, col in dim_columns.items():
|
||||
# Apply cross-filter: keep rows matching all OTHER dimensions' selections
|
||||
filtered_rows = cube
|
||||
for other_dim, other_col in dim_columns.items():
|
||||
if other_dim == dim:
|
||||
continue # exclude-self
|
||||
sel_values = selections.get(other_dim)
|
||||
if not sel_values:
|
||||
continue
|
||||
value_set = set(sel_values)
|
||||
filtered_rows = [r for r in filtered_rows if r.get(col_for_dim(other_dim, dim_columns)) in value_set]
|
||||
|
||||
# Group by target dimension → sum metrics
|
||||
groups: Dict[str, Dict[str, Any]] = {}
|
||||
for row in filtered_rows:
|
||||
dim_value = row.get(col) or "(未知)"
|
||||
if dim_value not in groups:
|
||||
groups[dim_value] = {
|
||||
"REJECT_TOTAL_QTY": 0,
|
||||
"DEFECT_QTY": 0,
|
||||
"MOVEIN_QTY": 0,
|
||||
"lot_count": 0,
|
||||
}
|
||||
g = groups[dim_value]
|
||||
for mc in ("REJECT_TOTAL_QTY", "DEFECT_QTY", "MOVEIN_QTY"):
|
||||
g[mc] += _as_int(row.get(mc, 0))
|
||||
g["lot_count"] += _as_int(row.get("lot_count", 0))
|
||||
|
||||
# Build sorted items (descending by metric)
|
||||
items_raw = []
|
||||
for dim_value, agg in groups.items():
|
||||
mv = _as_float(agg.get(metric_col, 0))
|
||||
if mv <= 0:
|
||||
continue
|
||||
items_raw.append({
|
||||
"dim_value": dim_value,
|
||||
"metric_value": mv,
|
||||
"REJECT_TOTAL_QTY": agg["REJECT_TOTAL_QTY"],
|
||||
"DEFECT_QTY": agg["DEFECT_QTY"],
|
||||
"MOVEIN_QTY": agg["MOVEIN_QTY"],
|
||||
"lot_count": agg["lot_count"],
|
||||
})
|
||||
|
||||
items_raw.sort(key=lambda x: x["metric_value"], reverse=True)
|
||||
|
||||
# Compute pct, cumPct
|
||||
total_metric = sum(x["metric_value"] for x in items_raw) or 1.0
|
||||
cum_pct = 0.0
|
||||
items: List[Dict[str, Any]] = []
|
||||
for it in items_raw:
|
||||
pct = round(it["metric_value"] / total_metric * 100, 4)
|
||||
cum_pct = round(cum_pct + pct, 4)
|
||||
items.append({
|
||||
"reason": it["dim_value"],
|
||||
"metric_value": it["metric_value"],
|
||||
"MOVEIN_QTY": it["MOVEIN_QTY"],
|
||||
"REJECT_TOTAL_QTY": it["REJECT_TOTAL_QTY"],
|
||||
"DEFECT_QTY": it["DEFECT_QTY"],
|
||||
"count": it["lot_count"],
|
||||
"pct": pct,
|
||||
"cumPct": cum_pct,
|
||||
})
|
||||
|
||||
# Apply pareto_scope
|
||||
if pareto_scope == "top80" and items:
|
||||
top_items = [i for i in items if i["cumPct"] <= 80.0]
|
||||
if not top_items:
|
||||
top_items = [items[0]]
|
||||
items = top_items
|
||||
|
||||
# Apply pareto_display_scope
|
||||
if pareto_display_scope == "top20" and dim in _PARETO_TOP20_DIMENSIONS:
|
||||
items = items[:20]
|
||||
|
||||
dimensions[dim] = {
|
||||
"items": items,
|
||||
"dimension": dim,
|
||||
"metric_mode": metric_mode,
|
||||
}
|
||||
|
||||
return {
|
||||
"dimensions": dimensions,
|
||||
"metric_mode": metric_mode,
|
||||
"pareto_scope": pareto_scope,
|
||||
"pareto_display_scope": pareto_display_scope,
|
||||
}
|
||||
|
||||
|
||||
def col_for_dim(dim: str, dim_columns: Dict[str, str]) -> str:
|
||||
"""Resolve the DataFrame column name for a Pareto dimension."""
|
||||
return dim_columns.get(dim, dim)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Evaluate single-dimension pareto from snapshot
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def evaluate_single_dimension(
|
||||
snapshot: Dict[str, Any],
|
||||
*,
|
||||
dimension: str = "reason",
|
||||
metric_mode: str = "reject_total",
|
||||
pareto_scope: str = "top80",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Evaluate a single-dimension Pareto from a materialized snapshot.
|
||||
|
||||
Returns the same ``{"items": [...], "dimension": ..., "metric_mode": ...}``
|
||||
structure as the legacy ``compute_dimension_pareto`` function.
|
||||
"""
|
||||
result = evaluate(
|
||||
snapshot,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
pareto_display_scope="all",
|
||||
pareto_selections=None,
|
||||
)
|
||||
dim_data = result.get("dimensions", {}).get(dimension)
|
||||
if dim_data is not None:
|
||||
return dim_data
|
||||
return {"items": [], "dimension": dimension, "metric_mode": metric_mode}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Orchestration: read-through with build-on-miss
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def try_materialized_batch_pareto(
|
||||
query_id: str,
|
||||
df_loader, # callable() -> Optional[pd.DataFrame]
|
||||
*,
|
||||
metric_mode: str = "reject_total",
|
||||
pareto_scope: str = "top80",
|
||||
pareto_display_scope: str = "all",
|
||||
pareto_selections: Optional[Dict[str, List[str]]] = None,
|
||||
include_excluded_scrap: bool = False,
|
||||
exclude_material_scrap: bool = True,
|
||||
exclude_pb_diode: bool = True,
|
||||
packages: Optional[List[str]] = None,
|
||||
workcenter_groups: Optional[List[str]] = None,
|
||||
reasons: Optional[List[str]] = None,
|
||||
trend_dates: Optional[List[str]] = None,
|
||||
) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]:
|
||||
"""Try to serve batch-pareto from a materialized snapshot.
|
||||
|
||||
Returns ``(result, meta)`` where *result* is the Pareto payload on hit,
|
||||
or ``None`` when the caller should fall back to legacy compute.
|
||||
*meta* always contains source/freshness/fallback information.
|
||||
"""
|
||||
if not MATERIALIZATION_READ_ENABLED:
|
||||
_telemetry.record_fallback(FALLBACK_DISABLED)
|
||||
return None, _meta(source="legacy", fallback_reason=FALLBACK_DISABLED)
|
||||
|
||||
key = build_snapshot_key(
|
||||
query_id,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
trend_dates=trend_dates,
|
||||
)
|
||||
|
||||
# ---- 1. Try read -------------------------------------------------------
|
||||
snapshot, miss_reason = read_snapshot(key)
|
||||
if snapshot is not None:
|
||||
_telemetry.record_hit()
|
||||
result = evaluate(
|
||||
snapshot,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
pareto_display_scope=pareto_display_scope,
|
||||
pareto_selections=pareto_selections,
|
||||
)
|
||||
return result, _meta(
|
||||
source="materialized",
|
||||
snapshot_built_at=snapshot.get("built_at"),
|
||||
schema_version=snapshot.get("schema_version"),
|
||||
payload_bytes=snapshot.get("_payload_bytes"),
|
||||
)
|
||||
|
||||
# ---- 2. Snapshot miss – attempt build ----------------------------------
|
||||
_telemetry.record_miss()
|
||||
|
||||
if not MATERIALIZATION_ENABLED:
|
||||
_telemetry.record_fallback(FALLBACK_DISABLED)
|
||||
return None, _meta(source="legacy", fallback_reason=FALLBACK_DISABLED)
|
||||
|
||||
is_builder, event = _acquire_build(key)
|
||||
|
||||
if not is_builder:
|
||||
# Another thread is building – wait or give up
|
||||
if event is not None and event.wait(timeout=_BUILD_WAIT_TIMEOUT_SECONDS):
|
||||
# Re-read after build
|
||||
snapshot, _ = read_snapshot(key)
|
||||
if snapshot is not None:
|
||||
_telemetry.record_hit()
|
||||
result = evaluate(
|
||||
snapshot,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
pareto_display_scope=pareto_display_scope,
|
||||
pareto_selections=pareto_selections,
|
||||
)
|
||||
return result, _meta(
|
||||
source="materialized",
|
||||
snapshot_built_at=snapshot.get("built_at"),
|
||||
schema_version=snapshot.get("schema_version"),
|
||||
payload_bytes=snapshot.get("_payload_bytes"),
|
||||
)
|
||||
_telemetry.record_fallback(FALLBACK_BUILD_TIMEOUT)
|
||||
return None, _meta(source="legacy", fallback_reason=FALLBACK_BUILD_TIMEOUT)
|
||||
|
||||
# We are the builder
|
||||
_telemetry.record_build_start()
|
||||
t0 = time.time()
|
||||
try:
|
||||
df = df_loader()
|
||||
if df is None:
|
||||
_telemetry.record_build_fail()
|
||||
_telemetry.record_fallback(miss_reason or FALLBACK_MISS)
|
||||
return None, _meta(source="legacy", fallback_reason=miss_reason or FALLBACK_MISS)
|
||||
|
||||
snapshot = build_snapshot(
|
||||
df,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
trend_dates=trend_dates,
|
||||
)
|
||||
|
||||
if snapshot is None:
|
||||
_telemetry.record_build_fail()
|
||||
_telemetry.record_fallback(FALLBACK_OVERSIZE)
|
||||
return None, _meta(source="legacy", fallback_reason=FALLBACK_OVERSIZE)
|
||||
|
||||
latency = time.time() - t0
|
||||
stored = store_snapshot(key, snapshot)
|
||||
if not stored:
|
||||
_telemetry.record_build_fail()
|
||||
_telemetry.record_fallback(FALLBACK_OVERSIZE)
|
||||
logger.warning(
|
||||
"Snapshot build completed but storage rejected (key=%s, latency=%.3fs)",
|
||||
key, latency,
|
||||
)
|
||||
return None, _meta(source="legacy", fallback_reason=FALLBACK_OVERSIZE)
|
||||
|
||||
_telemetry.record_build_ok(latency, snapshot.get("_payload_bytes", 0))
|
||||
logger.info(
|
||||
"Snapshot built (key=%s, cube_rows=%d, latency=%.3fs, bytes=%d)",
|
||||
key, len(snapshot.get("cube", [])), latency,
|
||||
snapshot.get("_payload_bytes", 0),
|
||||
)
|
||||
|
||||
result = evaluate(
|
||||
snapshot,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
pareto_display_scope=pareto_display_scope,
|
||||
pareto_selections=pareto_selections,
|
||||
)
|
||||
return result, _meta(
|
||||
source="materialized",
|
||||
snapshot_built_at=snapshot.get("built_at"),
|
||||
schema_version=snapshot.get("schema_version"),
|
||||
payload_bytes=snapshot.get("_payload_bytes"),
|
||||
build_latency=latency,
|
||||
)
|
||||
except Exception:
|
||||
_telemetry.record_build_fail()
|
||||
_telemetry.record_fallback(FALLBACK_BUILD_FAILED)
|
||||
logger.exception("Snapshot build failed (key=%s)", key)
|
||||
return None, _meta(source="legacy", fallback_reason=FALLBACK_BUILD_FAILED)
|
||||
finally:
|
||||
_release_build(key)
|
||||
|
||||
|
||||
def try_materialized_dimension_pareto(
|
||||
query_id: str,
|
||||
df_loader,
|
||||
*,
|
||||
dimension: str = "reason",
|
||||
metric_mode: str = "reject_total",
|
||||
pareto_scope: str = "top80",
|
||||
include_excluded_scrap: bool = False,
|
||||
exclude_material_scrap: bool = True,
|
||||
exclude_pb_diode: bool = True,
|
||||
packages: Optional[List[str]] = None,
|
||||
workcenter_groups: Optional[List[str]] = None,
|
||||
reasons: Optional[List[str]] = None,
|
||||
trend_dates: Optional[List[str]] = None,
|
||||
) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]:
|
||||
"""Try to serve single-dimension pareto from a materialized snapshot.
|
||||
|
||||
Returns ``(result, meta)`` – same contract as ``try_materialized_batch_pareto``.
|
||||
"""
|
||||
if not MATERIALIZATION_READ_ENABLED:
|
||||
return None, _meta(source="legacy", fallback_reason=FALLBACK_DISABLED)
|
||||
|
||||
key = build_snapshot_key(
|
||||
query_id,
|
||||
include_excluded_scrap=include_excluded_scrap,
|
||||
exclude_material_scrap=exclude_material_scrap,
|
||||
exclude_pb_diode=exclude_pb_diode,
|
||||
packages=packages,
|
||||
workcenter_groups=workcenter_groups,
|
||||
reasons=reasons,
|
||||
trend_dates=trend_dates,
|
||||
)
|
||||
|
||||
snapshot, miss_reason = read_snapshot(key)
|
||||
if snapshot is not None:
|
||||
_telemetry.record_hit()
|
||||
result = evaluate_single_dimension(
|
||||
snapshot,
|
||||
dimension=dimension,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
)
|
||||
return result, _meta(
|
||||
source="materialized",
|
||||
snapshot_built_at=snapshot.get("built_at"),
|
||||
schema_version=snapshot.get("schema_version"),
|
||||
)
|
||||
|
||||
# No build-on-miss for single dimension – fall back to legacy
|
||||
_telemetry.record_miss()
|
||||
_telemetry.record_fallback(miss_reason or FALLBACK_MISS)
|
||||
return None, _meta(source="legacy", fallback_reason=miss_reason or FALLBACK_MISS)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Response metadata helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _meta(
|
||||
*,
|
||||
source: str,
|
||||
fallback_reason: Optional[str] = None,
|
||||
snapshot_built_at: Optional[float] = None,
|
||||
schema_version: Optional[int] = None,
|
||||
payload_bytes: Optional[int] = None,
|
||||
build_latency: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build response metadata dict for materialization context."""
|
||||
m: Dict[str, Any] = {"pareto_source": source}
|
||||
if fallback_reason:
|
||||
m["pareto_fallback_reason"] = fallback_reason
|
||||
if snapshot_built_at is not None:
|
||||
m["pareto_snapshot_built_at"] = snapshot_built_at
|
||||
m["pareto_snapshot_age_s"] = round(time.time() - snapshot_built_at, 1)
|
||||
if schema_version is not None:
|
||||
m["pareto_schema_version"] = schema_version
|
||||
if payload_bytes is not None:
|
||||
m["pareto_snapshot_bytes"] = payload_bytes
|
||||
if build_latency is not None:
|
||||
m["pareto_build_latency_s"] = round(build_latency, 3)
|
||||
return m
|
||||
@@ -283,6 +283,21 @@ def test_compute_batch_pareto_applies_cross_filter_exclude_self(monkeypatch):
|
||||
assert [item["reason"] for item in package_items] == ["PKG-2"]
|
||||
|
||||
|
||||
def test_compute_batch_pareto_memory_guard_rejects_large_cached_dataset(monkeypatch):
|
||||
df = _build_detail_filter_df()
|
||||
|
||||
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _query_id: df)
|
||||
monkeypatch.setattr(cache_svc, "_df_memory_mb", lambda _df: 128.0)
|
||||
monkeypatch.setattr(cache_svc, "_REJECT_DERIVE_MAX_INPUT_MB", 64)
|
||||
|
||||
with pytest.raises(MemoryError, match="超過 64 MB 上限"):
|
||||
cache_svc.compute_batch_pareto(
|
||||
query_id="qid-batch-mem-guard",
|
||||
metric_mode="reject_total",
|
||||
pareto_scope="all",
|
||||
)
|
||||
|
||||
|
||||
def test_apply_pareto_selection_filter_supports_multi_dimension_and_logic():
|
||||
df = _build_detail_filter_df()
|
||||
|
||||
|
||||
@@ -304,6 +304,17 @@ class TestRejectHistoryApiRoutes(TestRejectHistoryRoutesBase):
|
||||
self.assertFalse(payload['success'])
|
||||
self.assertEqual(payload['error'], 'cache_miss')
|
||||
|
||||
@patch('mes_dashboard.routes.reject_history_routes.compute_batch_pareto')
|
||||
def test_batch_pareto_memory_guard_returns_400(self, mock_batch_pareto):
|
||||
mock_batch_pareto.side_effect = MemoryError('目前服務記憶體負載較高')
|
||||
|
||||
response = self.client.get('/api/reject-history/batch-pareto?query_id=qid-oom')
|
||||
payload = json.loads(response.data)
|
||||
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertFalse(payload['success'])
|
||||
self.assertIn('記憶體負載較高', payload['error'])
|
||||
|
||||
@patch('mes_dashboard.routes.reject_history_routes.apply_view')
|
||||
def test_view_passes_pareto_multi_select_filters(self, mock_apply_view):
|
||||
mock_apply_view.return_value = {
|
||||
@@ -483,5 +494,134 @@ class TestRejectHistoryApiRoutes(TestRejectHistoryRoutesBase):
|
||||
self.assertIn('text/csv', response.headers.get('Content-Type', ''))
|
||||
|
||||
|
||||
# ================================================================
|
||||
# 5.3 – Pareto materialization metadata & fallback route tests
|
||||
# ================================================================
|
||||
|
||||
@patch('mes_dashboard.routes.reject_history_routes.compute_batch_pareto')
|
||||
def test_batch_pareto_exposes_materialized_metadata_on_hit(self, mock_batch_pareto):
|
||||
"""When materialized snapshot serves the request, meta must appear."""
|
||||
mock_batch_pareto.return_value = {
|
||||
'dimensions': {
|
||||
'reason': {'items': [], 'dimension': 'reason', 'metric_mode': 'reject_total'},
|
||||
},
|
||||
'_pareto_meta': {
|
||||
'pareto_source': 'materialized',
|
||||
'pareto_schema_version': 1,
|
||||
'pareto_snapshot_built_at': 1700000000.0,
|
||||
'pareto_snapshot_age_s': 5.0,
|
||||
},
|
||||
}
|
||||
|
||||
response = self.client.get('/api/reject-history/batch-pareto?query_id=qid-mat')
|
||||
payload = json.loads(response.data)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertTrue(payload['success'])
|
||||
# _pareto_meta should be extracted to top-level 'meta'
|
||||
self.assertIn('meta', payload)
|
||||
self.assertEqual(payload['meta']['pareto_source'], 'materialized')
|
||||
self.assertEqual(payload['meta']['pareto_schema_version'], 1)
|
||||
# _pareto_meta should NOT be in data
|
||||
self.assertNotIn('_pareto_meta', payload['data'])
|
||||
|
||||
@patch('mes_dashboard.routes.reject_history_routes.compute_batch_pareto')
|
||||
def test_batch_pareto_exposes_fallback_metadata(self, mock_batch_pareto):
|
||||
"""When falling back to legacy, meta must include fallback reason."""
|
||||
mock_batch_pareto.return_value = {
|
||||
'dimensions': {
|
||||
'reason': {'items': [], 'dimension': 'reason', 'metric_mode': 'reject_total'},
|
||||
},
|
||||
'_pareto_meta': {
|
||||
'pareto_source': 'legacy',
|
||||
'pareto_fallback_reason': 'miss',
|
||||
},
|
||||
}
|
||||
|
||||
response = self.client.get('/api/reject-history/batch-pareto?query_id=qid-fb')
|
||||
payload = json.loads(response.data)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertTrue(payload['success'])
|
||||
self.assertIn('meta', payload)
|
||||
self.assertEqual(payload['meta']['pareto_source'], 'legacy')
|
||||
self.assertEqual(payload['meta']['pareto_fallback_reason'], 'miss')
|
||||
|
||||
@patch('mes_dashboard.routes.reject_history_routes.compute_batch_pareto')
|
||||
def test_batch_pareto_no_meta_when_absent(self, mock_batch_pareto):
|
||||
"""When no _pareto_meta is in the result, response has no meta key."""
|
||||
mock_batch_pareto.return_value = {
|
||||
'dimensions': {
|
||||
'reason': {'items': []},
|
||||
},
|
||||
}
|
||||
|
||||
response = self.client.get('/api/reject-history/batch-pareto?query_id=qid-nometa')
|
||||
payload = json.loads(response.data)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertTrue(payload['success'])
|
||||
self.assertNotIn('meta', payload)
|
||||
|
||||
@patch('mes_dashboard.routes.reject_history_routes.compute_dimension_pareto')
|
||||
def test_reason_pareto_exposes_materialized_metadata(self, mock_dim_pareto):
|
||||
"""reason-pareto with query_id should expose pareto metadata."""
|
||||
mock_dim_pareto.return_value = {
|
||||
'items': [{'reason': 'A', 'metric_value': 10, 'pct': 100, 'cumPct': 100}],
|
||||
'dimension': 'reason',
|
||||
'metric_mode': 'reject_total',
|
||||
'_pareto_meta': {
|
||||
'pareto_source': 'materialized',
|
||||
'pareto_schema_version': 1,
|
||||
},
|
||||
}
|
||||
|
||||
response = self.client.get(
|
||||
'/api/reject-history/reason-pareto'
|
||||
'?start_date=2026-01-01&end_date=2026-01-31'
|
||||
'&query_id=qid-rp'
|
||||
)
|
||||
payload = json.loads(response.data)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertTrue(payload['success'])
|
||||
self.assertIn('meta', payload)
|
||||
self.assertEqual(payload['meta']['pareto_source'], 'materialized')
|
||||
|
||||
@patch('mes_dashboard.routes.reject_history_routes.compute_batch_pareto')
|
||||
def test_batch_pareto_stale_fallback_reason(self, mock_batch_pareto):
|
||||
"""Stale snapshot fallback must carry 'stale' reason code."""
|
||||
mock_batch_pareto.return_value = {
|
||||
'dimensions': {'reason': {'items': []}},
|
||||
'_pareto_meta': {
|
||||
'pareto_source': 'legacy',
|
||||
'pareto_fallback_reason': 'stale',
|
||||
},
|
||||
}
|
||||
|
||||
response = self.client.get('/api/reject-history/batch-pareto?query_id=qid-stale')
|
||||
payload = json.loads(response.data)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(payload['meta']['pareto_fallback_reason'], 'stale')
|
||||
|
||||
@patch('mes_dashboard.routes.reject_history_routes.compute_batch_pareto')
|
||||
def test_batch_pareto_build_failed_fallback_reason(self, mock_batch_pareto):
|
||||
"""Build failure fallback must carry 'build_failed' reason code."""
|
||||
mock_batch_pareto.return_value = {
|
||||
'dimensions': {'reason': {'items': []}},
|
||||
'_pareto_meta': {
|
||||
'pareto_source': 'legacy',
|
||||
'pareto_fallback_reason': 'build_failed',
|
||||
},
|
||||
}
|
||||
|
||||
response = self.client.get('/api/reject-history/batch-pareto?query_id=qid-bf')
|
||||
payload = json.loads(response.data)
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertEqual(payload['meta']['pareto_fallback_reason'], 'build_failed')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
679
tests/test_reject_pareto_materialized.py
Normal file
679
tests/test_reject_pareto_materialized.py
Normal file
@@ -0,0 +1,679 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Tests for reject_pareto_materialized module.
|
||||
|
||||
Covers:
|
||||
5.1 – key isolation, schema version invalidation, single-flight, guardrails
|
||||
5.2 – parity tests: materialized vs legacy cross-filter results
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Dict, List, Optional
|
||||
from unittest import mock
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _build_sample_df(n_lots: int = 10) -> pd.DataFrame:
|
||||
"""Build a realistic reject-history lot-level DataFrame for testing."""
|
||||
rows = []
|
||||
reasons = ["001_CRACK", "002_CONTAMINATION", "003_SCRATCH"]
|
||||
packages = ["PKG_A", "PKG_B"]
|
||||
types = ["NORMAL", "REWORK"]
|
||||
workflows = ["WF_MAIN", "WF_ALT"]
|
||||
workcenters = ["WC_GRP_1", "WC_GRP_2"]
|
||||
equipments = ["EQ_001", "EQ_002", "EQ_003"]
|
||||
for i in range(n_lots):
|
||||
lot = f"LOT{i:04d}"
|
||||
# Each lot has 1-3 reject events
|
||||
for j in range(1 + i % 3):
|
||||
rows.append({
|
||||
"CONTAINERID": lot,
|
||||
"LOSSREASONNAME": reasons[(i + j) % len(reasons)],
|
||||
"PRODUCTLINENAME": packages[i % len(packages)],
|
||||
"PJ_TYPE": types[j % len(types)],
|
||||
"WORKFLOWNAME": workflows[i % len(workflows)],
|
||||
"WORKCENTER_GROUP": workcenters[i % len(workcenters)],
|
||||
"PRIMARY_EQUIPMENTNAME": equipments[(i + j) % len(equipments)],
|
||||
"REJECT_TOTAL_QTY": 10 + i,
|
||||
"DEFECT_QTY": 5 + j,
|
||||
"MOVEIN_QTY": 100 + i * 10,
|
||||
"SCRAP_OBJECTTYPE": "LOT",
|
||||
"LOSSREASON_CODE": f"00{j + 1}_CODE",
|
||||
"TXN_DAY": f"2026-01-{(i % 28) + 1:02d}",
|
||||
})
|
||||
return pd.DataFrame(rows)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5.1 – Key isolation
|
||||
# ============================================================
|
||||
|
||||
class TestKeyIsolation:
|
||||
"""Distinct filter contexts must produce distinct snapshot keys."""
|
||||
|
||||
def test_same_params_produce_same_key(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import build_snapshot_key
|
||||
|
||||
k1 = build_snapshot_key("Q1", packages=["A", "B"])
|
||||
k2 = build_snapshot_key("Q1", packages=["A", "B"])
|
||||
assert k1 == k2
|
||||
|
||||
def test_different_query_id_produces_different_key(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import build_snapshot_key
|
||||
|
||||
k1 = build_snapshot_key("Q1")
|
||||
k2 = build_snapshot_key("Q2")
|
||||
assert k1 != k2
|
||||
|
||||
def test_different_policy_toggle_produces_different_key(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import build_snapshot_key
|
||||
|
||||
k1 = build_snapshot_key("Q1", include_excluded_scrap=False)
|
||||
k2 = build_snapshot_key("Q1", include_excluded_scrap=True)
|
||||
assert k1 != k2
|
||||
|
||||
def test_different_supplementary_filter_produces_different_key(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import build_snapshot_key
|
||||
|
||||
k1 = build_snapshot_key("Q1", packages=["A"])
|
||||
k2 = build_snapshot_key("Q1", packages=["A", "B"])
|
||||
assert k1 != k2
|
||||
|
||||
def test_different_trend_dates_produces_different_key(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import build_snapshot_key
|
||||
|
||||
k1 = build_snapshot_key("Q1", trend_dates=["2026-01-01"])
|
||||
k2 = build_snapshot_key("Q1", trend_dates=["2026-01-02"])
|
||||
assert k1 != k2
|
||||
|
||||
def test_filter_order_does_not_affect_key(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import build_snapshot_key
|
||||
|
||||
k1 = build_snapshot_key("Q1", packages=["B", "A"])
|
||||
k2 = build_snapshot_key("Q1", packages=["A", "B"])
|
||||
assert k1 == k2
|
||||
|
||||
def test_none_vs_empty_treated_equally(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import build_snapshot_key
|
||||
|
||||
k1 = build_snapshot_key("Q1", packages=None)
|
||||
k2 = build_snapshot_key("Q1", packages=None)
|
||||
assert k1 == k2
|
||||
|
||||
def test_key_includes_schema_version(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
SCHEMA_VERSION,
|
||||
build_snapshot_key,
|
||||
)
|
||||
|
||||
key = build_snapshot_key("Q1")
|
||||
assert f":v{SCHEMA_VERSION}" in key
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5.1 – Schema version invalidation
|
||||
# ============================================================
|
||||
|
||||
class TestSchemaVersionInvalidation:
|
||||
"""Snapshots from prior schema versions must not be treated as valid hits."""
|
||||
|
||||
def test_version_mismatch_returns_miss(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
FALLBACK_VERSION_MISMATCH,
|
||||
SCHEMA_VERSION,
|
||||
_snapshot_cache,
|
||||
read_snapshot,
|
||||
)
|
||||
|
||||
key = "test_version_key"
|
||||
_snapshot_cache.set(key, {
|
||||
"schema_version": SCHEMA_VERSION - 1,
|
||||
"built_at": time.time(),
|
||||
"cube": [],
|
||||
"dim_columns": {},
|
||||
})
|
||||
result, reason = read_snapshot(key)
|
||||
assert result is None
|
||||
assert reason == FALLBACK_VERSION_MISMATCH
|
||||
|
||||
def test_current_version_returns_hit(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
SCHEMA_VERSION,
|
||||
_snapshot_cache,
|
||||
read_snapshot,
|
||||
)
|
||||
|
||||
key = "test_current_version_key"
|
||||
_snapshot_cache.set(key, {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"built_at": time.time(),
|
||||
"cube": [],
|
||||
"dim_columns": {},
|
||||
})
|
||||
result, reason = read_snapshot(key)
|
||||
assert result is not None
|
||||
assert reason is None
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5.1 – Single-flight guard
|
||||
# ============================================================
|
||||
|
||||
class TestSingleFlightGuard:
|
||||
"""Concurrent builds for the same key must serialize via single-flight."""
|
||||
|
||||
def test_first_caller_is_builder(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
_acquire_build,
|
||||
_release_build,
|
||||
)
|
||||
|
||||
is_builder, event = _acquire_build("sf_test_1")
|
||||
assert is_builder is True
|
||||
assert event is not None
|
||||
_release_build("sf_test_1")
|
||||
|
||||
def test_second_caller_waits(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
_acquire_build,
|
||||
_release_build,
|
||||
)
|
||||
|
||||
is_builder, _ = _acquire_build("sf_test_2")
|
||||
assert is_builder is True
|
||||
|
||||
is_second, event = _acquire_build("sf_test_2")
|
||||
assert is_second is False
|
||||
assert event is not None
|
||||
|
||||
_release_build("sf_test_2")
|
||||
# Event should be set after release
|
||||
assert event.is_set()
|
||||
|
||||
def test_concurrent_builds_only_one_executes(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
_acquire_build,
|
||||
_release_build,
|
||||
)
|
||||
|
||||
key = "sf_concurrent_test"
|
||||
results = []
|
||||
|
||||
def attempt_build(thread_id):
|
||||
is_builder, event = _acquire_build(key)
|
||||
results.append((thread_id, is_builder))
|
||||
if is_builder:
|
||||
time.sleep(0.05) # simulate build
|
||||
_release_build(key)
|
||||
elif event is not None:
|
||||
event.wait(timeout=2)
|
||||
|
||||
t1 = threading.Thread(target=attempt_build, args=(1,))
|
||||
t2 = threading.Thread(target=attempt_build, args=(2,))
|
||||
t1.start()
|
||||
time.sleep(0.01) # ensure t1 starts first
|
||||
t2.start()
|
||||
t1.join(timeout=3)
|
||||
t2.join(timeout=3)
|
||||
|
||||
builders = [r for r in results if r[1]]
|
||||
waiters = [r for r in results if not r[1]]
|
||||
assert len(builders) == 1
|
||||
assert len(waiters) == 1
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5.1 – Guardrail enforcement
|
||||
# ============================================================
|
||||
|
||||
class TestGuardrailEnforcement:
|
||||
"""TTL and size guardrails must reject oversized snapshots."""
|
||||
|
||||
def test_oversized_cube_returns_none(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import build_snapshot
|
||||
|
||||
# Create a DataFrame that will produce a huge cube
|
||||
# With 200+ unique values per dimension, cube rows explode
|
||||
rows = []
|
||||
for i in range(500):
|
||||
rows.append({
|
||||
"CONTAINERID": f"LOT{i}",
|
||||
"LOSSREASONNAME": f"{i:03d}_REASON",
|
||||
"PRODUCTLINENAME": f"PKG_{i}",
|
||||
"PJ_TYPE": f"TYPE_{i}",
|
||||
"WORKFLOWNAME": f"WF_{i}",
|
||||
"WORKCENTER_GROUP": f"WC_{i}",
|
||||
"PRIMARY_EQUIPMENTNAME": f"EQ_{i}",
|
||||
"REJECT_TOTAL_QTY": 10,
|
||||
"DEFECT_QTY": 5,
|
||||
"MOVEIN_QTY": 100,
|
||||
"SCRAP_OBJECTTYPE": "LOT",
|
||||
})
|
||||
df = pd.DataFrame(rows)
|
||||
|
||||
with mock.patch(
|
||||
"mes_dashboard.services.reject_pareto_materialized._SNAPSHOT_MAX_CUBE_ROWS",
|
||||
10,
|
||||
):
|
||||
result = build_snapshot(df)
|
||||
assert result is None
|
||||
|
||||
def test_oversized_payload_rejected_on_store(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import store_snapshot
|
||||
|
||||
huge_snapshot = {
|
||||
"schema_version": 1,
|
||||
"built_at": time.time(),
|
||||
"cube": [{"x": "y" * 10000}] * 1000,
|
||||
"dim_columns": {},
|
||||
}
|
||||
|
||||
with mock.patch(
|
||||
"mes_dashboard.services.reject_pareto_materialized._SNAPSHOT_MAX_PAYLOAD_BYTES",
|
||||
1024,
|
||||
):
|
||||
assert store_snapshot("test_oversize_key", huge_snapshot) is False
|
||||
|
||||
def test_stale_snapshot_returns_miss(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
FALLBACK_STALE,
|
||||
SCHEMA_VERSION,
|
||||
_snapshot_cache,
|
||||
read_snapshot,
|
||||
)
|
||||
|
||||
key = "test_stale_key"
|
||||
_snapshot_cache.set(key, {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"built_at": time.time() - 999999, # very old
|
||||
"cube": [],
|
||||
"dim_columns": {},
|
||||
})
|
||||
result, reason = read_snapshot(key)
|
||||
assert result is None
|
||||
assert reason == FALLBACK_STALE
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5.1 – Build & evaluate smoke test
|
||||
# ============================================================
|
||||
|
||||
class TestBuildAndEvaluate:
|
||||
"""Snapshot build should produce a valid cube and evaluate correctly."""
|
||||
|
||||
def test_build_produces_valid_snapshot(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
SCHEMA_VERSION,
|
||||
build_snapshot,
|
||||
)
|
||||
|
||||
df = _build_sample_df(20)
|
||||
snapshot = build_snapshot(df)
|
||||
|
||||
assert snapshot is not None
|
||||
assert snapshot["schema_version"] == SCHEMA_VERSION
|
||||
assert isinstance(snapshot["cube"], list)
|
||||
assert len(snapshot["cube"]) > 0
|
||||
assert isinstance(snapshot["dim_columns"], dict)
|
||||
|
||||
def test_build_empty_df_returns_empty_snapshot(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import build_snapshot
|
||||
|
||||
df = pd.DataFrame()
|
||||
result = build_snapshot(df)
|
||||
assert result is None
|
||||
|
||||
def test_evaluate_returns_all_six_dimensions(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
build_snapshot,
|
||||
evaluate,
|
||||
)
|
||||
|
||||
df = _build_sample_df(20)
|
||||
snapshot = build_snapshot(df)
|
||||
result = evaluate(snapshot, metric_mode="reject_total", pareto_scope="all")
|
||||
|
||||
dims = result["dimensions"]
|
||||
expected_dims = {"reason", "package", "type", "workflow", "workcenter", "equipment"}
|
||||
assert set(dims.keys()) == expected_dims
|
||||
for dim_name, dim_data in dims.items():
|
||||
assert "items" in dim_data
|
||||
assert dim_data["dimension"] == dim_name
|
||||
assert dim_data["metric_mode"] == "reject_total"
|
||||
|
||||
def test_evaluate_top80_filters_items(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
build_snapshot,
|
||||
evaluate,
|
||||
)
|
||||
|
||||
df = _build_sample_df(50)
|
||||
snapshot = build_snapshot(df)
|
||||
|
||||
result_all = evaluate(snapshot, pareto_scope="all")
|
||||
result_80 = evaluate(snapshot, pareto_scope="top80")
|
||||
|
||||
for dim in result_all["dimensions"]:
|
||||
all_count = len(result_all["dimensions"][dim]["items"])
|
||||
top80_count = len(result_80["dimensions"][dim]["items"])
|
||||
assert top80_count <= all_count
|
||||
|
||||
def test_evaluate_top20_display_scope(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
build_snapshot,
|
||||
evaluate,
|
||||
)
|
||||
|
||||
# Build a df with many unique types
|
||||
rows = []
|
||||
for i in range(100):
|
||||
rows.append({
|
||||
"CONTAINERID": f"LOT{i}",
|
||||
"LOSSREASONNAME": f"{i:03d}_REASON",
|
||||
"PRODUCTLINENAME": "PKG_A",
|
||||
"PJ_TYPE": f"TYPE_{i:03d}",
|
||||
"WORKFLOWNAME": "WF_MAIN",
|
||||
"WORKCENTER_GROUP": "WC_1",
|
||||
"PRIMARY_EQUIPMENTNAME": "EQ_1",
|
||||
"REJECT_TOTAL_QTY": 10 + i,
|
||||
"DEFECT_QTY": 5,
|
||||
"MOVEIN_QTY": 100,
|
||||
"SCRAP_OBJECTTYPE": "LOT",
|
||||
})
|
||||
df = pd.DataFrame(rows)
|
||||
snapshot = build_snapshot(df)
|
||||
|
||||
result = evaluate(snapshot, pareto_scope="all", pareto_display_scope="top20")
|
||||
# type is in _PARETO_TOP20_DIMENSIONS, so should be truncated
|
||||
type_items = result["dimensions"]["type"]["items"]
|
||||
assert len(type_items) <= 20
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5.2 – Parity tests: materialized vs legacy
|
||||
# ============================================================
|
||||
|
||||
class TestMaterializedVsLegacyParity:
|
||||
"""Materialized evaluation must produce the same results as legacy
|
||||
DataFrame-based computation for metrics and ranking."""
|
||||
|
||||
def _compute_legacy_batch_pareto(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
*,
|
||||
metric_mode: str = "reject_total",
|
||||
pareto_scope: str = "all",
|
||||
pareto_display_scope: str = "all",
|
||||
pareto_selections: Optional[Dict[str, List[str]]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Compute batch pareto using the legacy DataFrame path."""
|
||||
from mes_dashboard.services.reject_dataset_cache import (
|
||||
_DIM_TO_DF_COLUMN,
|
||||
_PARETO_DIMENSIONS,
|
||||
_PARETO_TOP20_DIMENSIONS,
|
||||
_apply_cross_filter,
|
||||
_build_dimension_pareto_items,
|
||||
_normalize_pareto_selections,
|
||||
)
|
||||
|
||||
normalized_selections = _normalize_pareto_selections(pareto_selections)
|
||||
|
||||
dimensions: Dict[str, Dict[str, Any]] = {}
|
||||
for dim in _PARETO_DIMENSIONS:
|
||||
dim_col = _DIM_TO_DF_COLUMN.get(dim)
|
||||
dim_df = _apply_cross_filter(df, normalized_selections, exclude_dim=dim)
|
||||
items = _build_dimension_pareto_items(
|
||||
dim_df,
|
||||
dim_col=dim_col,
|
||||
metric_mode=metric_mode,
|
||||
pareto_scope=pareto_scope,
|
||||
)
|
||||
if pareto_display_scope == "top20" and dim in _PARETO_TOP20_DIMENSIONS:
|
||||
items = items[:20]
|
||||
dimensions[dim] = {
|
||||
"items": items,
|
||||
"dimension": dim,
|
||||
"metric_mode": metric_mode,
|
||||
}
|
||||
return {"dimensions": dimensions}
|
||||
|
||||
def test_no_cross_filter_parity(self):
|
||||
"""Without cross-filter, materialized and legacy must match on metrics."""
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
build_snapshot,
|
||||
evaluate,
|
||||
)
|
||||
from mes_dashboard.services.reject_dataset_cache import _normalize_text
|
||||
|
||||
df = _build_sample_df(30)
|
||||
# Normalize dimension values in the df to match build behavior
|
||||
from mes_dashboard.services.reject_dataset_cache import _DIM_TO_DF_COLUMN
|
||||
for col in _DIM_TO_DF_COLUMN.values():
|
||||
if col in df.columns:
|
||||
df[col] = df[col].apply(lambda v: _normalize_text(v) or "(未知)")
|
||||
|
||||
snapshot = build_snapshot(df)
|
||||
mat_result = evaluate(snapshot, metric_mode="reject_total", pareto_scope="all")
|
||||
legacy_result = self._compute_legacy_batch_pareto(df, metric_mode="reject_total", pareto_scope="all")
|
||||
|
||||
for dim in mat_result["dimensions"]:
|
||||
mat_items = mat_result["dimensions"][dim]["items"]
|
||||
leg_items = legacy_result["dimensions"][dim]["items"]
|
||||
|
||||
# Same number of items
|
||||
assert len(mat_items) == len(leg_items), (
|
||||
f"Dimension {dim}: materialized has {len(mat_items)} items, legacy has {len(leg_items)}"
|
||||
)
|
||||
|
||||
# Same ordering (by metric_value descending)
|
||||
for i, (m, l) in enumerate(zip(mat_items, leg_items)):
|
||||
assert m["reason"] == l["reason"], (
|
||||
f"Dim {dim}, item {i}: names differ: {m['reason']} vs {l['reason']}"
|
||||
)
|
||||
assert m["metric_value"] == l["metric_value"], (
|
||||
f"Dim {dim}, item {i}: metric_value differ: {m['metric_value']} vs {l['metric_value']}"
|
||||
)
|
||||
assert m["REJECT_TOTAL_QTY"] == l["REJECT_TOTAL_QTY"], (
|
||||
f"Dim {dim}, item {i}: REJECT_TOTAL_QTY differ"
|
||||
)
|
||||
assert m["DEFECT_QTY"] == l["DEFECT_QTY"], (
|
||||
f"Dim {dim}, item {i}: DEFECT_QTY differ"
|
||||
)
|
||||
assert abs(m["pct"] - l["pct"]) < 0.01, (
|
||||
f"Dim {dim}, item {i}: pct differ: {m['pct']} vs {l['pct']}"
|
||||
)
|
||||
assert abs(m["cumPct"] - l["cumPct"]) < 0.01, (
|
||||
f"Dim {dim}, item {i}: cumPct differ: {m['cumPct']} vs {l['cumPct']}"
|
||||
)
|
||||
|
||||
def test_cross_filter_metric_parity(self):
|
||||
"""With cross-filter, materialized metrics must match legacy metrics."""
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
build_snapshot,
|
||||
evaluate,
|
||||
)
|
||||
from mes_dashboard.services.reject_dataset_cache import (
|
||||
_DIM_TO_DF_COLUMN,
|
||||
_normalize_text,
|
||||
)
|
||||
|
||||
df = _build_sample_df(30)
|
||||
# Normalize dimension values
|
||||
for col in _DIM_TO_DF_COLUMN.values():
|
||||
if col in df.columns:
|
||||
df[col] = df[col].apply(lambda v: _normalize_text(v) or "(未知)")
|
||||
|
||||
selections = {"reason": ["001_CRACK"], "type": ["NORMAL"]}
|
||||
|
||||
snapshot = build_snapshot(df)
|
||||
mat_result = evaluate(
|
||||
snapshot,
|
||||
metric_mode="reject_total",
|
||||
pareto_scope="all",
|
||||
pareto_selections=selections,
|
||||
)
|
||||
legacy_result = self._compute_legacy_batch_pareto(
|
||||
df, metric_mode="reject_total", pareto_scope="all", pareto_selections=selections,
|
||||
)
|
||||
|
||||
for dim in mat_result["dimensions"]:
|
||||
mat_items = mat_result["dimensions"][dim]["items"]
|
||||
leg_items = legacy_result["dimensions"][dim]["items"]
|
||||
|
||||
# Build lookup by name
|
||||
mat_by_name = {it["reason"]: it for it in mat_items}
|
||||
leg_by_name = {it["reason"]: it for it in leg_items}
|
||||
|
||||
# Verify same set of dimension values
|
||||
assert set(mat_by_name.keys()) == set(leg_by_name.keys()), (
|
||||
f"Dim {dim}: names differ. Mat: {set(mat_by_name.keys())}, "
|
||||
f"Leg: {set(leg_by_name.keys())}"
|
||||
)
|
||||
|
||||
# Verify metric values match
|
||||
for name in mat_by_name:
|
||||
m = mat_by_name[name]
|
||||
l = leg_by_name[name]
|
||||
assert m["metric_value"] == l["metric_value"], (
|
||||
f"Dim {dim}, {name}: metric_value differ: {m['metric_value']} vs {l['metric_value']}"
|
||||
)
|
||||
assert m["REJECT_TOTAL_QTY"] == l["REJECT_TOTAL_QTY"], (
|
||||
f"Dim {dim}, {name}: REJECT_TOTAL_QTY differ"
|
||||
)
|
||||
|
||||
def test_defect_metric_mode_parity(self):
|
||||
"""Defect metric mode must produce same results in both paths."""
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
build_snapshot,
|
||||
evaluate,
|
||||
)
|
||||
from mes_dashboard.services.reject_dataset_cache import (
|
||||
_DIM_TO_DF_COLUMN,
|
||||
_normalize_text,
|
||||
)
|
||||
|
||||
df = _build_sample_df(20)
|
||||
for col in _DIM_TO_DF_COLUMN.values():
|
||||
if col in df.columns:
|
||||
df[col] = df[col].apply(lambda v: _normalize_text(v) or "(未知)")
|
||||
|
||||
snapshot = build_snapshot(df)
|
||||
mat_result = evaluate(snapshot, metric_mode="defect", pareto_scope="all")
|
||||
legacy_result = self._compute_legacy_batch_pareto(df, metric_mode="defect", pareto_scope="all")
|
||||
|
||||
for dim in mat_result["dimensions"]:
|
||||
mat_items = mat_result["dimensions"][dim]["items"]
|
||||
leg_items = legacy_result["dimensions"][dim]["items"]
|
||||
|
||||
mat_by_name = {it["reason"]: it for it in mat_items}
|
||||
leg_by_name = {it["reason"]: it for it in leg_items}
|
||||
|
||||
assert set(mat_by_name.keys()) == set(leg_by_name.keys()), (
|
||||
f"Dim {dim}: names differ in defect mode"
|
||||
)
|
||||
|
||||
for name in mat_by_name:
|
||||
m = mat_by_name[name]
|
||||
l = leg_by_name[name]
|
||||
assert m["DEFECT_QTY"] == l["DEFECT_QTY"], (
|
||||
f"Dim {dim}, {name}: DEFECT_QTY differ in defect mode"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 5.2 – Telemetry
|
||||
# ============================================================
|
||||
|
||||
class TestTelemetry:
|
||||
"""Telemetry counters must be operational."""
|
||||
|
||||
def test_telemetry_snapshot_returns_dict(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import get_telemetry
|
||||
|
||||
tel = get_telemetry()
|
||||
assert isinstance(tel, dict)
|
||||
assert "hit" in tel
|
||||
assert "miss" in tel
|
||||
assert "build" in tel
|
||||
assert "fallback" in tel
|
||||
assert "fallback_reasons" in tel
|
||||
assert "hit_rate" in tel
|
||||
|
||||
def test_fallback_reason_codes_are_stable_strings(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
FALLBACK_BUILD_FAILED,
|
||||
FALLBACK_BUILD_TIMEOUT,
|
||||
FALLBACK_DISABLED,
|
||||
FALLBACK_MISS,
|
||||
FALLBACK_OVERSIZE,
|
||||
FALLBACK_STALE,
|
||||
FALLBACK_VERSION_MISMATCH,
|
||||
)
|
||||
|
||||
codes = [
|
||||
FALLBACK_MISS,
|
||||
FALLBACK_STALE,
|
||||
FALLBACK_VERSION_MISMATCH,
|
||||
FALLBACK_BUILD_FAILED,
|
||||
FALLBACK_BUILD_TIMEOUT,
|
||||
FALLBACK_DISABLED,
|
||||
FALLBACK_OVERSIZE,
|
||||
]
|
||||
for code in codes:
|
||||
assert isinstance(code, str)
|
||||
assert len(code) > 0
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Feature flag behavior
|
||||
# ============================================================
|
||||
|
||||
class TestFeatureFlagBehavior:
|
||||
"""Materialization must respect feature flags."""
|
||||
|
||||
def test_disabled_returns_none_with_fallback_reason(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
FALLBACK_DISABLED,
|
||||
try_materialized_batch_pareto,
|
||||
)
|
||||
|
||||
with mock.patch(
|
||||
"mes_dashboard.services.reject_pareto_materialized.MATERIALIZATION_READ_ENABLED",
|
||||
False,
|
||||
):
|
||||
result, meta = try_materialized_batch_pareto(
|
||||
"Q1",
|
||||
lambda: None,
|
||||
)
|
||||
assert result is None
|
||||
assert meta["pareto_fallback_reason"] == FALLBACK_DISABLED
|
||||
|
||||
def test_read_enabled_but_build_disabled_falls_back_on_miss(self):
|
||||
from mes_dashboard.services.reject_pareto_materialized import (
|
||||
FALLBACK_DISABLED,
|
||||
_snapshot_cache,
|
||||
try_materialized_batch_pareto,
|
||||
)
|
||||
|
||||
_snapshot_cache.clear()
|
||||
|
||||
with mock.patch(
|
||||
"mes_dashboard.services.reject_pareto_materialized.MATERIALIZATION_READ_ENABLED",
|
||||
True,
|
||||
), mock.patch(
|
||||
"mes_dashboard.services.reject_pareto_materialized.MATERIALIZATION_ENABLED",
|
||||
False,
|
||||
):
|
||||
result, meta = try_materialized_batch_pareto(
|
||||
"Q_nonexistent",
|
||||
lambda: None,
|
||||
)
|
||||
assert result is None
|
||||
assert meta["pareto_fallback_reason"] == FALLBACK_DISABLED
|
||||
Reference in New Issue
Block a user