feat(reject-history): fix silent data loss by propagating partial failure metadata to frontend

Chunk failures in BatchQueryEngine were silently discarded — `has_partial_failure` was tracked
in Redis but never surfaced to the API response or frontend. Users could see incomplete data
without any warning. This commit closes the gap end-to-end:

Backend:
- Track failed chunk time ranges (`failed_ranges`) in batch engine progress metadata
- Add single retry for transient Oracle errors (timeout, connection) in `_execute_single_chunk`
- Read `get_batch_progress()` after merge but before `redis_clear_batch()` cleanup
- Inject `has_partial_failure`, `failed_chunk_count`, `failed_ranges` into API response meta
- Persist partial failure flag to independent Redis key with TTL aligned to data storage layer
- Add shared container-resolution policy module with wildcard/expansion guardrails
- Refactor reason filter from single-value to multi-select (`reason` → `reasons`)

Frontend:
- Add client-side date range validation (730-day limit) before API submission
- Display amber warning banner on partial failure with specific failed date ranges
- Support generic fallback message for container-mode queries without date ranges
- Update FilterPanel to support multi-select reason chips

Specs & tests:
- Create batch-query-resilience spec; update reject-history-api and reject-history-page specs
- Add 7 new tests for retry, memory guard, failed ranges, partial failure propagation, TTL
- Cross-service regression verified (hold, resource, job, msd — 411 tests pass)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
egg
2026-03-03 14:00:07 +08:00
parent f1506787fb
commit a275c30c0e
35 changed files with 3028 additions and 1460 deletions

View File

@@ -59,6 +59,16 @@ QUERY_TOOL_MAX_CONTAINER_IDS=200
RESOURCE_DETAIL_DEFAULT_LIMIT=500 RESOURCE_DETAIL_DEFAULT_LIMIT=500
RESOURCE_DETAIL_MAX_LIMIT=500 RESOURCE_DETAIL_MAX_LIMIT=500
# Shared container-resolution guardrails
# 0 = disable raw input count cap (recommended: rely on expansion limits instead)
CONTAINER_RESOLVE_INPUT_MAX_VALUES=0
# Wildcard pattern must include this many literal-prefix chars before %/_ (e.g., GA%)
CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN=4
# Per-token expansion guard (avoid one wildcard exploding into too many container IDs)
CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN=2000
# Total resolved container-ID guard for a single resolve request
CONTAINER_RESOLVE_MAX_CONTAINER_IDS=30000
# Trust boundary for forwarded headers (safe default: false) # Trust boundary for forwarded headers (safe default: false)
# Direct-exposure deployment (no reverse proxy): keep this false # Direct-exposure deployment (no reverse proxy): keep this false
TRUST_PROXY_HEADERS=false TRUST_PROXY_HEADERS=false
@@ -101,14 +111,14 @@ GUNICORN_WORKERS=2
GUNICORN_THREADS=4 GUNICORN_THREADS=4
# Worker timeout (seconds): should stay above DB/query-tool slow paths # Worker timeout (seconds): should stay above DB/query-tool slow paths
GUNICORN_TIMEOUT=130 GUNICORN_TIMEOUT=360
# Graceful shutdown timeout for worker reloads (seconds) # Graceful shutdown timeout for worker reloads (seconds)
GUNICORN_GRACEFUL_TIMEOUT=60 GUNICORN_GRACEFUL_TIMEOUT=300
# Worker recycle policy (set 0 to disable) # Worker recycle policy (set 0 to disable)
GUNICORN_MAX_REQUESTS=5000 GUNICORN_MAX_REQUESTS=1200
GUNICORN_MAX_REQUESTS_JITTER=500 GUNICORN_MAX_REQUESTS_JITTER=300
# ============================================================ # ============================================================
# Redis Configuration (for WIP cache) # Redis Configuration (for WIP cache)
@@ -201,6 +211,8 @@ TRACE_EVENTS_MAX_WORKERS=2
# Max parallel workers for EventFetcher batch queries (per domain) # Max parallel workers for EventFetcher batch queries (per domain)
# Recommend: 2 (peak concurrent slow queries = TRACE_EVENTS_MAX_WORKERS × this) # Recommend: 2 (peak concurrent slow queries = TRACE_EVENTS_MAX_WORKERS × this)
EVENT_FETCHER_MAX_WORKERS=2 EVENT_FETCHER_MAX_WORKERS=2
# false = any failed batch raises error (avoid silent partial data)
EVENT_FETCHER_ALLOW_PARTIAL_RESULTS=false
# Max parallel workers for forward pipeline WIP+rejects fetching # Max parallel workers for forward pipeline WIP+rejects fetching
FORWARD_PIPELINE_MAX_WORKERS=2 FORWARD_PIPELINE_MAX_WORKERS=2
@@ -351,7 +363,7 @@ REJECT_ENGINE_SPOOL_CLEANUP_INTERVAL_SECONDS=300
REJECT_ENGINE_SPOOL_ORPHAN_GRACE_SECONDS=600 REJECT_ENGINE_SPOOL_ORPHAN_GRACE_SECONDS=600
# Batch query engine thresholds # Batch query engine thresholds
BATCH_QUERY_TIME_THRESHOLD_DAYS=60 BATCH_QUERY_TIME_THRESHOLD_DAYS=10
BATCH_QUERY_ID_THRESHOLD=1000 BATCH_QUERY_ID_THRESHOLD=1000
BATCH_CHUNK_MAX_MEMORY_MB=256 BATCH_CHUNK_MAX_MEMORY_MB=256

View File

@@ -284,6 +284,15 @@ QUERY_TOOL_MAX_CONTAINER_IDS=200
RESOURCE_DETAIL_DEFAULT_LIMIT=500 RESOURCE_DETAIL_DEFAULT_LIMIT=500
RESOURCE_DETAIL_MAX_LIMIT=500 RESOURCE_DETAIL_MAX_LIMIT=500
# 共用解析防護LOT/WAFER/工單)
CONTAINER_RESOLVE_INPUT_MAX_VALUES=0 # 0=不限制輸入筆數
CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN=4 # 萬用字元前最少字首長度(例如 GA25%
CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN=2000
CONTAINER_RESOLVE_MAX_CONTAINER_IDS=30000
# EventFetcher 批次容錯策略
EVENT_FETCHER_ALLOW_PARTIAL_RESULTS=false # false=任一批次失敗即整體失敗,避免靜默缺資料
# 反向代理信任邊界(無反向代理時務必維持 false # 反向代理信任邊界(無反向代理時務必維持 false
TRUST_PROXY_HEADERS=false TRUST_PROXY_HEADERS=false
TRUSTED_PROXY_IPS=127.0.0.1 TRUSTED_PROXY_IPS=127.0.0.1

View File

@@ -35,7 +35,7 @@ export function toRejectFilterSnapshot(input = {}) {
endDate: normalizeText(input.endDate), endDate: normalizeText(input.endDate),
workcenterGroups: normalizeArray(input.workcenterGroups), workcenterGroups: normalizeArray(input.workcenterGroups),
packages: normalizeArray(input.packages), packages: normalizeArray(input.packages),
reason: normalizeText(input.reason), reasons: normalizeArray(input.reasons),
includeExcludedScrap: normalizeBoolean(input.includeExcludedScrap, false), includeExcludedScrap: normalizeBoolean(input.includeExcludedScrap, false),
excludeMaterialScrap: normalizeBoolean(input.excludeMaterialScrap, true), excludeMaterialScrap: normalizeBoolean(input.excludeMaterialScrap, true),
excludePbDiode: normalizeBoolean(input.excludePbDiode, true), excludePbDiode: normalizeBoolean(input.excludePbDiode, true),
@@ -77,7 +77,7 @@ export function pruneRejectFilterSelections(filters = {}, options = {}) {
const removed = { const removed = {
workcenterGroups: [], workcenterGroups: [],
packages: [], packages: [],
reason: '', reasons: [],
}; };
if (hasWorkcenterOptions) { if (hasWorkcenterOptions) {
@@ -100,9 +100,14 @@ export function pruneRejectFilterSelections(filters = {}, options = {}) {
}); });
} }
if (next.reason && hasReasonOptions && !validReasons.has(next.reason)) { if (hasReasonOptions) {
removed.reason = next.reason; next.reasons = next.reasons.filter((value) => {
next.reason = ''; if (validReasons.has(value)) {
return true;
}
removed.reasons.push(value);
return false;
});
} }
return { return {
@@ -111,7 +116,7 @@ export function pruneRejectFilterSelections(filters = {}, options = {}) {
removedCount: removedCount:
removed.workcenterGroups.length + removed.workcenterGroups.length +
removed.packages.length + removed.packages.length +
(removed.reason ? 1 : 0), removed.reasons.length,
}; };
} }
@@ -126,13 +131,13 @@ export function buildRejectOptionsRequestParams(filters = {}) {
exclude_material_scrap: next.excludeMaterialScrap, exclude_material_scrap: next.excludeMaterialScrap,
exclude_pb_diode: next.excludePbDiode, exclude_pb_diode: next.excludePbDiode,
}; };
if (next.reason) { if (next.reasons.length > 0) {
params.reason = next.reason; params.reasons = next.reasons;
} }
return params; return params;
} }
export function buildRejectCommonQueryParams(filters = {}, { reason = '' } = {}) { export function buildRejectCommonQueryParams(filters = {}, { reasons: extraReasons = [] } = {}) {
const next = toRejectFilterSnapshot(filters); const next = toRejectFilterSnapshot(filters);
const params = { const params = {
start_date: next.startDate, start_date: next.startDate,
@@ -143,9 +148,9 @@ export function buildRejectCommonQueryParams(filters = {}, { reason = '' } = {})
exclude_material_scrap: next.excludeMaterialScrap, exclude_material_scrap: next.excludeMaterialScrap,
exclude_pb_diode: next.excludePbDiode, exclude_pb_diode: next.excludePbDiode,
}; };
const effectiveReason = normalizeText(reason) || next.reason; const merged = normalizeArray([...next.reasons, ...normalizeArray(extraReasons)]);
if (effectiveReason) { if (merged.length > 0) {
params.reasons = [effectiveReason]; params.reasons = merged;
} }
return params; return params;
} }
@@ -168,6 +173,30 @@ export function parseMultiLineInput(text) {
return result; return result;
} }
export function validateDateRange(startDate, endDate) {
const MAX_QUERY_DAYS = 730;
const start = normalizeText(startDate);
const end = normalizeText(endDate);
if (!start || !end) {
return '請先設定開始與結束日期';
}
const startDt = new Date(`${start}T00:00:00`);
const endDt = new Date(`${end}T00:00:00`);
if (Number.isNaN(startDt.getTime()) || Number.isNaN(endDt.getTime())) {
return '日期格式不正確';
}
if (endDt < startDt) {
return '結束日期必須大於起始日期';
}
const dayMs = 24 * 60 * 60 * 1000;
const days = Math.floor((endDt - startDt) / dayMs) + 1;
if (days > MAX_QUERY_DAYS) {
return '查詢範圍不可超過 730 天(約兩年)';
}
return '';
}
export function buildViewParams(queryId, { export function buildViewParams(queryId, {
supplementaryFilters = {}, supplementaryFilters = {},
metricFilter = 'all', metricFilter = 'all',
@@ -185,8 +214,8 @@ export function buildViewParams(queryId, {
if (supplementaryFilters.workcenterGroups?.length > 0) { if (supplementaryFilters.workcenterGroups?.length > 0) {
params.workcenter_groups = supplementaryFilters.workcenterGroups; params.workcenter_groups = supplementaryFilters.workcenterGroups;
} }
if (supplementaryFilters.reason) { if (supplementaryFilters.reasons?.length > 0) {
params.reason = supplementaryFilters.reason; params.reasons = supplementaryFilters.reasons;
} }
if (metricFilter && metricFilter !== 'all') { if (metricFilter && metricFilter !== 'all') {
params.metric_filter = metricFilter; params.metric_filter = metricFilter;

View File

@@ -5,6 +5,7 @@ import { apiGet, apiPost } from '../core/api.js';
import { import {
buildViewParams, buildViewParams,
parseMultiLineInput, parseMultiLineInput,
validateDateRange,
} from '../core/reject-history-filters.js'; } from '../core/reject-history-filters.js';
import { replaceRuntimeHistory } from '../core/shell-navigation.js'; import { replaceRuntimeHistory } from '../core/shell-navigation.js';
@@ -104,14 +105,14 @@ const availableFilters = ref({ workcenterGroups: [], packages: [], reasons: [] }
const supplementaryFilters = reactive({ const supplementaryFilters = reactive({
packages: [], packages: [],
workcenterGroups: [], workcenterGroups: [],
reason: '', reasons: [],
}); });
// ---- Interactive state ---- // ---- Interactive state ----
const page = ref(1); const page = ref(1);
const selectedTrendDates = ref([]); const selectedTrendDates = ref([]);
const trendLegendSelected = ref({ '扣帳報廢量': true, '不扣帳報廢量': true }); const trendLegendSelected = ref({ '扣帳報廢量': true, '不扣帳報廢量': true });
const paretoDisplayScope = ref('all'); const paretoDisplayScope = ref('top20');
const paretoSelections = reactive(createEmptyParetoSelections()); const paretoSelections = reactive(createEmptyParetoSelections());
const paretoData = reactive(createEmptyParetoData()); const paretoData = reactive(createEmptyParetoData());
@@ -146,6 +147,7 @@ const loading = reactive({
exporting: false, exporting: false,
}); });
const errorMessage = ref(''); const errorMessage = ref('');
const partialFailureWarning = ref('');
const lastQueryAt = ref(''); const lastQueryAt = ref('');
// ---- Request staleness tracking ---- // ---- Request staleness tracking ----
@@ -241,8 +243,8 @@ function buildBatchParetoParams() {
if (supplementaryFilters.workcenterGroups.length > 0) { if (supplementaryFilters.workcenterGroups.length > 0) {
params.workcenter_groups = supplementaryFilters.workcenterGroups; params.workcenter_groups = supplementaryFilters.workcenterGroups;
} }
if (supplementaryFilters.reason) { if (supplementaryFilters.reasons.length > 0) {
params.reason = supplementaryFilters.reason; params.reasons = supplementaryFilters.reasons;
} }
if (selectedTrendDates.value.length > 0) { if (selectedTrendDates.value.length > 0) {
params.trend_dates = selectedTrendDates.value; params.trend_dates = selectedTrendDates.value;
@@ -301,11 +303,20 @@ async function executePrimaryQuery() {
loading.querying = true; loading.querying = true;
loading.list = true; loading.list = true;
errorMessage.value = ''; errorMessage.value = '';
partialFailureWarning.value = '';
try { try {
const body = { mode: queryMode.value }; const body = { mode: queryMode.value };
if (queryMode.value === 'date_range') { if (queryMode.value === 'date_range') {
const dateValidationError = validateDateRange(
draftFilters.startDate,
draftFilters.endDate,
);
if (dateValidationError) {
errorMessage.value = dateValidationError;
return;
}
body.start_date = draftFilters.startDate; body.start_date = draftFilters.startDate;
body.end_date = draftFilters.endDate; body.end_date = draftFilters.endDate;
} else { } else {
@@ -321,6 +332,19 @@ async function executePrimaryQuery() {
if (isStaleRequest(requestId)) return; if (isStaleRequest(requestId)) return;
const result = unwrapApiResult(resp, '主查詢執行失敗'); const result = unwrapApiResult(resp, '主查詢執行失敗');
const meta = result.meta || {};
if (meta.has_partial_failure) {
const failedChunkCount = Number(meta.failed_chunk_count || 0);
const failedRanges = Array.isArray(meta.failed_ranges) ? meta.failed_ranges : [];
if (failedRanges.length > 0) {
const rangesText = failedRanges
.map((item) => `${item.start} ~ ${item.end}`)
.join('、');
partialFailureWarning.value = `警告:以下日期區間的資料擷取失敗(${failedChunkCount} 個批次):${rangesText}。目前顯示結果可能不完整。`;
} else {
partialFailureWarning.value = `警告:${failedChunkCount} 個查詢批次的資料擷取失敗。目前顯示結果可能不完整。`;
}
}
committedPrimary.mode = queryMode.value; committedPrimary.mode = queryMode.value;
committedPrimary.startDate = draftFilters.startDate; committedPrimary.startDate = draftFilters.startDate;
@@ -344,7 +368,7 @@ async function executePrimaryQuery() {
supplementaryFilters.packages = []; supplementaryFilters.packages = [];
supplementaryFilters.workcenterGroups = []; supplementaryFilters.workcenterGroups = [];
supplementaryFilters.reason = ''; supplementaryFilters.reasons = [];
page.value = 1; page.value = 1;
selectedTrendDates.value = []; selectedTrendDates.value = [];
resetParetoSelections(); resetParetoSelections();
@@ -445,7 +469,7 @@ function clearFilters() {
draftFilters.excludeMaterialScrap = true; draftFilters.excludeMaterialScrap = true;
draftFilters.excludePbDiode = true; draftFilters.excludePbDiode = true;
draftFilters.paretoTop80 = true; draftFilters.paretoTop80 = true;
paretoDisplayScope.value = 'all'; paretoDisplayScope.value = 'top20';
resetParetoSelections(); resetParetoSelections();
void executePrimaryQuery(); void executePrimaryQuery();
} }
@@ -520,7 +544,7 @@ function clearParetoSelection() {
function onSupplementaryChange(filters) { function onSupplementaryChange(filters) {
supplementaryFilters.packages = filters.packages || []; supplementaryFilters.packages = filters.packages || [];
supplementaryFilters.workcenterGroups = filters.workcenterGroups || []; supplementaryFilters.workcenterGroups = filters.workcenterGroups || [];
supplementaryFilters.reason = filters.reason || ''; supplementaryFilters.reasons = filters.reasons || [];
page.value = 1; page.value = 1;
selectedTrendDates.value = []; selectedTrendDates.value = [];
resetParetoSelections(); resetParetoSelections();
@@ -545,7 +569,7 @@ function removeFilterChip(chip) {
} }
if (chip.type === 'reason') { if (chip.type === 'reason') {
supplementaryFilters.reason = ''; supplementaryFilters.reasons = supplementaryFilters.reasons.filter((r) => r !== chip.value);
page.value = 1; page.value = 1;
updateUrlState(); updateUrlState();
void Promise.all([refreshView(), fetchBatchPareto()]); void Promise.all([refreshView(), fetchBatchPareto()]);
@@ -584,7 +608,7 @@ async function exportCsv() {
params.set('query_id', queryId.value); params.set('query_id', queryId.value);
for (const pkg of supplementaryFilters.packages) params.append('packages', pkg); for (const pkg of supplementaryFilters.packages) params.append('packages', pkg);
for (const wc of supplementaryFilters.workcenterGroups) params.append('workcenter_groups', wc); for (const wc of supplementaryFilters.workcenterGroups) params.append('workcenter_groups', wc);
if (supplementaryFilters.reason) params.set('reason', supplementaryFilters.reason); for (const r of supplementaryFilters.reasons) params.append('reasons', r);
params.set('metric_filter', metricFilterParam()); params.set('metric_filter', metricFilterParam());
for (const date of selectedTrendDates.value) params.append('trend_dates', date); for (const date of selectedTrendDates.value) params.append('trend_dates', date);
for (const [dimension, key] of Object.entries(PARETO_SELECTION_PARAM_MAP)) { for (const [dimension, key] of Object.entries(PARETO_SELECTION_PARAM_MAP)) {
@@ -760,13 +784,13 @@ const activeFilterChips = computed(() => {
value: '', value: '',
}); });
if (supplementaryFilters.reason) { for (const reason of supplementaryFilters.reasons) {
chips.push({ chips.push({
key: `reason:${supplementaryFilters.reason}`, key: `reason:${reason}`,
label: `原因: ${supplementaryFilters.reason}`, label: `原因: ${reason}`,
removable: true, removable: true,
type: 'reason', type: 'reason',
value: supplementaryFilters.reason, value: reason,
}); });
} }
@@ -866,16 +890,14 @@ function updateUrlState() {
appendArrayParams(params, 'packages', supplementaryFilters.packages); appendArrayParams(params, 'packages', supplementaryFilters.packages);
appendArrayParams(params, 'workcenter_groups', supplementaryFilters.workcenterGroups); appendArrayParams(params, 'workcenter_groups', supplementaryFilters.workcenterGroups);
if (supplementaryFilters.reason) { appendArrayParams(params, 'reasons', supplementaryFilters.reasons);
params.set('reason', supplementaryFilters.reason);
}
appendArrayParams(params, 'trend_dates', selectedTrendDates.value); appendArrayParams(params, 'trend_dates', selectedTrendDates.value);
for (const [dimension, key] of Object.entries(PARETO_SELECTION_PARAM_MAP)) { for (const [dimension, key] of Object.entries(PARETO_SELECTION_PARAM_MAP)) {
appendArrayParams(params, key, paretoSelections[dimension] || []); appendArrayParams(params, key, paretoSelections[dimension] || []);
} }
if (paretoDisplayScope.value !== 'all') { if (paretoDisplayScope.value !== 'top20') {
params.set('pareto_display_scope', paretoDisplayScope.value); params.set('pareto_display_scope', paretoDisplayScope.value);
} }
if (!committedPrimary.paretoTop80) { if (!committedPrimary.paretoTop80) {
@@ -945,7 +967,7 @@ function restoreFromUrl() {
supplementaryFilters.packages = readArrayParam(params, 'packages'); supplementaryFilters.packages = readArrayParam(params, 'packages');
supplementaryFilters.workcenterGroups = readArrayParam(params, 'workcenter_groups'); supplementaryFilters.workcenterGroups = readArrayParam(params, 'workcenter_groups');
supplementaryFilters.reason = String(params.get('reason') || '').trim(); supplementaryFilters.reasons = readArrayParam(params, 'reasons');
selectedTrendDates.value = readArrayParam(params, 'trend_dates'); selectedTrendDates.value = readArrayParam(params, 'trend_dates');
@@ -969,7 +991,7 @@ function restoreFromUrl() {
} }
const urlParetoDisplayScope = String(params.get('pareto_display_scope') || '').trim().toLowerCase(); const urlParetoDisplayScope = String(params.get('pareto_display_scope') || '').trim().toLowerCase();
paretoDisplayScope.value = urlParetoDisplayScope === 'top20' ? 'top20' : 'all'; paretoDisplayScope.value = urlParetoDisplayScope === 'all' ? 'all' : 'top20';
const parsedPage = Number(params.get('page') || '1'); const parsedPage = Number(params.get('page') || '1');
page.value = Number.isFinite(parsedPage) && parsedPage > 0 ? parsedPage : 1; page.value = Number.isFinite(parsedPage) && parsedPage > 0 ? parsedPage : 1;
@@ -1001,6 +1023,9 @@ onMounted(() => {
</header> </header>
<div v-if="errorMessage" class="error-banner">{{ errorMessage }}</div> <div v-if="errorMessage" class="error-banner">{{ errorMessage }}</div>
<div v-if="partialFailureWarning" class="warning-banner">
{{ partialFailureWarning }}
</div>
<FilterPanel <FilterPanel
:filters="draftFilters" :filters="draftFilters"

View File

@@ -8,23 +8,23 @@ const props = defineProps({
containerInput: { type: String, default: '' }, containerInput: { type: String, default: '' },
availableFilters: { type: Object, default: () => ({}) }, availableFilters: { type: Object, default: () => ({}) },
supplementaryFilters: { type: Object, default: () => ({}) }, supplementaryFilters: { type: Object, default: () => ({}) },
queryId: { type: String, default: '' }, queryId: { type: String, default: '' },
resolutionInfo: { type: Object, default: null }, resolutionInfo: { type: Object, default: null },
loading: { type: Object, required: true }, loading: { type: Object, required: true },
activeFilterChips: { type: Array, default: () => [] }, activeFilterChips: { type: Array, default: () => [] },
paretoDisplayScope: { type: String, default: 'all' }, paretoDisplayScope: { type: String, default: 'all' },
}); });
const emit = defineEmits([ const emit = defineEmits([
'apply', 'apply',
'clear', 'clear',
'export-csv', 'export-csv',
'remove-chip', 'remove-chip',
'pareto-scope-toggle', 'pareto-scope-toggle',
'pareto-display-scope-change', 'pareto-display-scope-change',
'update:queryMode', 'update:queryMode',
'update:containerInputType', 'update:containerInputType',
'update:containerInput', 'update:containerInput',
'supplementary-change', 'supplementary-change',
]); ]);
@@ -32,7 +32,7 @@ function emitSupplementary(patch) {
emit('supplementary-change', { emit('supplementary-change', {
packages: props.supplementaryFilters.packages || [], packages: props.supplementaryFilters.packages || [],
workcenterGroups: props.supplementaryFilters.workcenterGroups || [], workcenterGroups: props.supplementaryFilters.workcenterGroups || [],
reason: props.supplementaryFilters.reason || '', reasons: props.supplementaryFilters.reasons || [],
...patch, ...patch,
}); });
} }
@@ -86,23 +86,23 @@ function emitSupplementary(patch) {
<!-- Container mode --> <!-- Container mode -->
<template v-else> <template v-else>
<div class="filter-group"> <div class="filter-group filter-group-full container-input-group">
<label class="filter-label" for="container-type">輸入類型</label> <div class="container-label-row">
<select <label class="filter-label" for="container-type">輸入類型</label>
id="container-type" <select
class="filter-input" id="container-type"
:value="containerInputType" class="filter-input container-type-select"
@change="$emit('update:containerInputType', $event.target.value)" :value="containerInputType"
> @change="$emit('update:containerInputType', $event.target.value)"
<option value="lot">LOT</option> >
<option value="work_order">工單</option> <option value="lot">LOT</option>
<option value="wafer_lot">WAFER LOT</option> <option value="work_order">工單</option>
</select> <option value="wafer_lot">WAFER LOT</option>
</div> </select>
<div class="filter-group filter-group-wide"> <label class="filter-label" for="container-input"
<label class="filter-label" for="container-input" >輸入值 (每行一個支援 * % wildcard)</label
>輸入值 (每行一個支援 * % wildcard)</label >
> </div>
<textarea <textarea
id="container-input" id="container-input"
class="filter-input filter-textarea" class="filter-input filter-textarea"
@@ -124,12 +124,12 @@ function emitSupplementary(patch) {
<input v-model="filters.excludeMaterialScrap" type="checkbox" /> <input v-model="filters.excludeMaterialScrap" type="checkbox" />
排除原物料報廢 排除原物料報廢
</label> </label>
<label class="checkbox-pill"> <label class="checkbox-pill">
<input v-model="filters.excludePbDiode" type="checkbox" /> <input v-model="filters.excludePbDiode" type="checkbox" />
排除 PB_* 系列 排除 PB_* 系列
</label> </label>
</div> </div>
<div class="filter-actions"> <div class="filter-actions">
<button <button
class="btn btn-primary" class="btn btn-primary"
:disabled="loading.querying" :disabled="loading.querying"
@@ -181,30 +181,30 @@ function emitSupplementary(patch) {
</template> </template>
</div> </div>
<!-- Supplementary filters (only after primary query) --> <!-- Supplementary filters (only after primary query) -->
<div v-if="queryId" class="supplementary-panel"> <div v-if="queryId" class="supplementary-panel">
<div class="supplementary-header">補充篩選 (快取內篩選)</div> <div class="supplementary-header">補充篩選 (快取內篩選)</div>
<div class="supplementary-toolbar"> <div class="supplementary-toolbar">
<label class="checkbox-pill"> <label class="checkbox-pill">
<input <input
:checked="filters.paretoTop80" :checked="filters.paretoTop80"
type="checkbox" type="checkbox"
@change="$emit('pareto-scope-toggle', $event.target.checked)" @change="$emit('pareto-scope-toggle', $event.target.checked)"
/> />
Pareto 僅顯示累計前 80% Pareto 僅顯示累計前 80%
</label> </label>
<label class="filter-label">顯示範圍</label> <label class="filter-label">顯示範圍</label>
<select <select
class="dimension-select pareto-scope-select" class="dimension-select pareto-scope-select"
:value="paretoDisplayScope" :value="paretoDisplayScope"
@change="$emit('pareto-display-scope-change', $event.target.value)" @change="$emit('pareto-display-scope-change', $event.target.value)"
> >
<option value="all">全部顯示</option> <option value="all">全部顯示</option>
<option value="top20">只顯示 TOP 20</option> <option value="top20">只顯示 TOP 20</option>
</select> </select>
</div> </div>
<div class="supplementary-row"> <div class="supplementary-row">
<div class="filter-group"> <div class="filter-group">
<label class="filter-label">WORKCENTER GROUP</label> <label class="filter-label">WORKCENTER GROUP</label>
<MultiSelect <MultiSelect
:model-value="supplementaryFilters.workcenterGroups" :model-value="supplementaryFilters.workcenterGroups"
@@ -227,22 +227,14 @@ function emitSupplementary(patch) {
</div> </div>
<div class="filter-group"> <div class="filter-group">
<label class="filter-label" for="supp-reason">報廢原因</label> <label class="filter-label">報廢原因</label>
<select <MultiSelect
id="supp-reason" :model-value="supplementaryFilters.reasons"
class="filter-input" :options="availableFilters.reasons || []"
:value="supplementaryFilters.reason" placeholder="全部原因"
@change="emitSupplementary({ reason: $event.target.value })" searchable
> @update:model-value="emitSupplementary({ reasons: $event })"
<option value="">全部原因</option> />
<option
v-for="r in availableFilters.reasons || []"
:key="r"
:value="r"
>
{{ r }}
</option>
</select>
</div> </div>
</div> </div>
</div> </div>

View File

@@ -41,6 +41,19 @@
line-height: 1.5; line-height: 1.5;
} }
.container-label-row {
display: flex;
align-items: center;
gap: 8px;
flex-wrap: wrap;
}
.container-type-select {
width: auto;
min-width: 120px;
max-width: 180px;
}
.supplementary-panel { .supplementary-panel {
border-top: 1px solid var(--border); border-top: 1px solid var(--border);
padding: 16px 18px; padding: 16px 18px;
@@ -119,6 +132,15 @@
font-size: 13px; font-size: 13px;
} }
.warning-banner {
margin-bottom: 14px;
padding: 10px 12px;
border-radius: 6px;
background: #fffbeb;
color: #b45309;
font-size: 13px;
}
.filter-panel { .filter-panel {
display: grid; display: grid;
grid-template-columns: repeat(4, minmax(0, 1fr)); grid-template-columns: repeat(4, minmax(0, 1fr));

View File

@@ -0,0 +1,2 @@
schema: spec-driven
created: 2026-03-03

View File

@@ -0,0 +1,80 @@
## Context
報廢歷史查詢使用 `BatchQueryEngine` 將長日期範圍拆成 10 天 chunks 平行查詢 Oracle。每個 chunk 有記憶體上限256 MB和 timeout300s防護。當 chunk 失敗時,`has_partial_failure` 旗標寫入 Redis HSETkey: `batch:reject:{hash}:meta`),但此資訊**在三個斷點被丟失**
1. `reject_dataset_cache.py``execute_primary_query()` 未讀取 batch progress metadata
2. API route 直接 `jsonify({"success": True, **result})`,在 partial chunk failure 路徑下仍回 HTTP 200 + `success: true`,不區分完整與不完整結果
3. 前端 `App.vue` 沒有任何 partial failure 處理邏輯
另一個問題:`redis_clear_batch()``execute_primary_query()` 的清理階段會刪除 metadata key所以讀取必須在清理之前。
前端的 730 天日期上限驗證只在後端 `_validate_range()` 做,前端缺乏即時回饋。
## Goals / Non-Goals
**Goals:**
-`has_partial_failure` 從 Redis metadata 傳遞到 API response `meta` 欄位
- 追蹤失敗 chunk 的時間範圍,讓前端可顯示具體的缺漏區間
- 前端顯示 amber warning banner告知使用者資料可能不完整
- 前端加入日期範圍即時驗證,避免無效 API 請求
- 對 transient errorOracle timeout、連線失敗加入單次重試減少不必要的 partial failure
- 持久化 partial failure 旗標到獨立 Redis key讓 cache-hit 路徑也能還原警告狀態
**Non-Goals:**
- 不改變現有 chunk 分片策略或記憶體上限數值
- 不實作前端的自動重查/重試機制
- 不修改 `EVENT_FETCHER_ALLOW_PARTIAL_RESULTS` 的行為(預設已是安全的 false
- 不加入 progress bar / 即時進度追蹤 UI
## Decisions
### D1: 在 `redis_clear_batch` 之前讀取 metadata
**決定**: 在 `execute_primary_query()` 中,`merge_chunks()` 之後、`redis_clear_batch()` 之前,呼叫 `get_batch_progress("reject", engine_hash)` 讀取 partial failure 狀態。
**理由**: `redis_clear_batch` 會刪除包含 metadata 的 key之後就讀不到了。此時 chunk 資料已合併完成,是最後可讀取 metadata 的時機點。
### D2: 用獨立 Redis key 持久化 partial failure flagTTL 對齊實際資料層
**決定**: 在 `_store_query_result()` 之後,將 partial failure 資訊存到 `reject_dataset:{query_id}:partial_failure` Redis HSET。**TTL 必須與資料實際存活的層一致**:若資料 spill 到 parquet spool`_REJECT_ENGINE_SPOOL_TTL_SECONDS = 21600s`partial failure flag 的 TTL 也要用 21600s若資料存在 L1/L2`_CACHE_TTL = 900s`flag TTL 用 900s。實作方式`_store_partial_failure_flag()` 接受 `ttl` 參數,由呼叫端根據 `should_spill` 判斷傳入 `_REJECT_ENGINE_SPOOL_TTL_SECONDS``_CACHE_TTL`。Cache-hit 路徑透過 `_load_partial_failure_flag(query_id)` 還原。
**替代方案 A**: 將 flag 嵌入 DataFrame 的 attrs 或另外 pickle。
**為何不採用**: DataFrame attrs 在 parquet 序列化時會丟失pickle 增加反序列化風險。
**替代方案 B**: 固定 TTL=900s。
**為何不採用**: 大查詢 spill 到 parquet spool21600s TTL資料還能讀 6 小時,但 partial failure flag 15 分鐘就過期,造成「資料讀得到但警告消失」。
### D3: 在 `_update_progress` 中追蹤 failed_ranges僅 time-range chunk
**決定**: 擴充 `_update_progress()` 接受 `failed_ranges: Optional[List[Dict]]` 參數,以 JSON 字串存入 Redis HSET。Sequential 和 parallel path 均從失敗的 chunk descriptor 提取 `chunk_start` / `chunk_end`。**僅當 chunk descriptor 包含 `chunk_start`/`chunk_end` 時才記錄**(即 `decompose_by_time_range` 產生的 time-range chunk
**container-id 分塊的情境**: reject 的 container 模式使用 `decompose_by_ids()`chunk 結構為 `{"ids": [...]}` 不含日期範圍。此時 `failed_ranges` 為空 list前端透過 `failed_chunk_count > 0` 顯示 generic 警告訊息「N 個查詢批次的資料擷取失敗」),不含日期區間。
**理由**: chunk descriptor 的結構由 decompose 函式決定engine 層不應假設所有 chunk 都有時間範圍。
### D4: Memory guard 失敗不重試
**決定**: `_execute_single_chunk()` 加入 `max_retries=1`,但只對 `_is_retryable_error()` 回傳 true 的 exception 重試。Memory guard記憶體超限和 Redis store 失敗直接 return False不重試。
**理由**: Memory guard 代表該時段資料量確實過大重試結果相同Oracle timeout 和連線錯誤則可能是暫態問題。
### D5: 前端 warning banner 使用既有 amber 色系
**決定**: 新增 `.warning-banner` CSS class使用 `background: #fffbeb; color: #b45309`,與既有 `.resolution-warn` 的 amber 色系一致。放在 `.error-banner` 之後。
**替代方案**: 使用 toast/notification 元件。
**為何不採用**: 此專案無 toast 系統amber banner 與 red error-banner 模式統一。
### D6: 前端日期驗證函式放在共用 filters module
**決定**: 在 `frontend/src/core/reject-history-filters.js` 新增 `validateDateRange()`,複用 `resource-history/App.vue:231-248` 的驗證模式。
**理由**: reject-history-filters.js 已是此頁面的 filter 工具模組validateDateRange 屬於 filter 驗證邏輯。
## Risks / Trade-offs
- **[中] 重試邏輯影響所有 execute_plan 呼叫端** — `_execute_single_chunk()` 是 shared function被 reject / hold / resource / job / msd 五個服務共用。重試邏輯為加法行為(新增 retry loop 包在既有 try/except 外),成功路徑不變。→ 需要對其他 4 個服務執行 smoke test既有測試通過即可。若需更保守可加入 `max_retries` 參數讓呼叫端控制(預設 1但目前判斷統一重試對所有服務都是正面效果。
- **[低] 重試增加 Oracle 負擔** — 單次重試最多增加 1 倍的失敗查詢量。→ 透過 `_is_retryable_error()` 嚴格過濾,只重試 transient error且 parallel path 最多 3 worker影響可控。
- **[低] failed_ranges JSON 大小** — 理論上 73 chunks730/10全部失敗會產生 73 筆 rangeJSON < 5 KB。→ 遠低於 Redis HSET 欄位限制

View File

@@ -0,0 +1,34 @@
## Why
報廢歷史查詢的防爆機制(時間分片 + 記憶體上限 256 MB + Oracle timeout 300s在 chunk 失敗時會丟棄該 chunk 的資料,`has_partial_failure` 旗標僅寫入 Redis metadata**從未傳遞到 API response 或前端**。使用者查到不完整資料卻毫不知情影響決策正確性。此外730 天日期上限僅在後端驗證,前端無即時提示,導致不必要的等待。
## What Changes
- 後端 `reject_dataset_cache``execute_plan()` 後讀取 batch progress metadata`has_partial_failure`、失敗 chunk 數量及失敗時間範圍注入 API response `meta` 欄位
- 後端 `batch_query_engine` 追蹤失敗 chunk 的時間區間描述,寫入 Redis metadata 的 `failed_ranges` 欄位
- 後端 `_execute_single_chunk()` 對 transient errorOracle timeout / 連線錯誤加入單次重試memory guard 失敗不重試
- 前端新增 amber warning banner`meta.has_partial_failure` 為 true 時顯示不完整資料警告及失敗的日期區間
- 前端新增日期範圍即時驗證730 天上限),在 API 發送前攔截無效範圍
## Capabilities
### New Capabilities
- `batch-query-resilience`: 批次查詢引擎的失敗範圍追蹤、partial failure metadata 傳遞、及 transient error 單次重試機制
### Modified Capabilities
- `reject-history-api`: API response `meta` 新增 `has_partial_failure``failed_chunk_count``failed_ranges` 欄位,讓前端得知查詢結果完整性
- `reject-history-page`: 新增 amber warning banner 顯示 partial failure 警告新增前端日期範圍即時驗證730 天上限)
## Impact
- **後端服務 — batch_query_engine.py共用模組影響所有使用 execute_plan 的服務)**:
- 追蹤 failed_ranges + 重試邏輯修改的是 `_execute_single_chunk()`,此函式被 **reject / hold / resource / job / msd** 五個 dataset cache 服務共用
- 重試邏輯為加法行為(新增 retry loop不改變既有成功路徑對其他服務向後相容
- `failed_ranges` 追蹤僅在 chunk descriptor 含 `chunk_start`/`chunk_end` 時才記錄container-id 分塊(僅 reject container 模式使用)不受影響
- 需對 hold / resource / job / msd 執行回歸 smoke test
- **後端服務 — reject_dataset_cache.py**: 讀取 metadata + 注入 response + 持久化 partial failure flag
- **前端**: `App.vue`warning banner + 日期驗證)、`reject-history-filters.js`validateDateRange 函式)、`style.css`.warning-banner 樣式)
- **API 契約**: response `meta` 新增可選欄位(向後相容,現有前端不受影響)
- **測試**: `test_batch_query_engine.py``test_reject_dataset_cache.py` 需新增對應測試案例hold / resource / job / msd 需回歸驗證

View File

@@ -0,0 +1,82 @@
## ADDED Requirements
### Requirement: BatchQueryEngine SHALL track failed chunk time ranges in progress metadata
The engine SHALL record the time ranges of failed chunks in Redis progress metadata so consumers can report which date intervals have missing data.
#### Scenario: Failed chunk range recorded in sequential path
- **WHEN** a chunk with `chunk_start` and `chunk_end` keys fails during sequential execution
- **THEN** `_update_progress()` SHALL store a `failed_ranges` field in the Redis HSET metadata
- **THEN** `failed_ranges` SHALL be a JSON array of objects, each with `start` and `end` string keys
- **THEN** the array SHALL contain one entry per failed chunk
#### Scenario: Failed chunk range recorded in parallel path
- **WHEN** a chunk with `chunk_start` and `chunk_end` keys fails during parallel execution
- **THEN** the failed chunk's time range SHALL be appended to `failed_ranges` in the same format as the sequential path
#### Scenario: No failed ranges when all chunks succeed
- **WHEN** all chunks complete successfully
- **THEN** the `failed_ranges` field SHALL NOT be present in Redis metadata
#### Scenario: ID-batch chunks produce no failed_ranges entries
- **WHEN** a chunk created by `decompose_by_ids()` (containing only an `ids` key, no `chunk_start`/`chunk_end`) fails
- **THEN** no entry SHALL be appended to `failed_ranges` for that chunk
- **THEN** `has_partial_failure` SHALL still be set to `True`
- **THEN** `failed` count SHALL still be incremented
#### Scenario: get_batch_progress returns failed_ranges
- **WHEN** `get_batch_progress()` is called after execution with failed chunks
- **THEN** the returned dict SHALL include `failed_ranges` as a JSON string parseable to a list of `{start, end}` objects
### Requirement: BatchQueryEngine SHALL retry transient chunk failures once
The engine SHALL retry chunk execution once for transient errors (Oracle timeout, connection errors) but SHALL NOT retry deterministic failures (memory guard, Redis store).
#### Scenario: Oracle timeout retried once
- **WHEN** `_execute_single_chunk()` raises an exception matching Oracle timeout patterns (`DPY-4024`, `ORA-01013`)
- **THEN** the chunk SHALL be retried exactly once
- **WHEN** the retry succeeds
- **THEN** the chunk SHALL be marked as successful
#### Scenario: Connection error retried once
- **WHEN** `_execute_single_chunk()` raises `TimeoutError`, `ConnectionError`, or `OSError`
- **THEN** the chunk SHALL be retried exactly once
#### Scenario: Retry exhausted marks chunk as failed
- **WHEN** a chunk fails on both the initial attempt and the retry
- **THEN** the chunk SHALL be marked as failed
- **THEN** `has_partial_failure` SHALL be set to `True`
#### Scenario: Memory guard failure NOT retried
- **WHEN** a chunk's DataFrame exceeds `BATCH_CHUNK_MAX_MEMORY_MB`
- **THEN** the chunk SHALL return `False` immediately without retry
- **THEN** the query function SHALL have been called exactly once for that chunk
#### Scenario: Redis store failure NOT retried
- **WHEN** `redis_store_chunk()` returns `False`
- **THEN** the chunk SHALL return `False` immediately without retry
### Requirement: reject_dataset_cache SHALL propagate partial failure metadata to API response
The cache service SHALL read batch execution metadata and include partial failure information in the API response `meta` field.
#### Scenario: Partial failure metadata included in response
- **WHEN** `execute_primary_query()` uses the batch engine path and `get_batch_progress()` returns `has_partial_failure=True`
- **THEN** the response `meta` dict SHALL include `has_partial_failure: true`
- **THEN** the response `meta` dict SHALL include `failed_chunk_count` as an integer
- **THEN** if `failed_ranges` is present, the response `meta` dict SHALL include `failed_ranges` as a list of `{start, end}` objects
#### Scenario: Metadata read before redis_clear_batch
- **WHEN** `execute_primary_query()` calls `get_batch_progress()`
- **THEN** the call SHALL occur after `merge_chunks()` and before `redis_clear_batch()`
#### Scenario: No partial failure on successful query
- **WHEN** all chunks complete successfully
- **THEN** the response `meta` dict SHALL NOT include `has_partial_failure`
#### Scenario: Cache-hit path restores partial failure flag
- **WHEN** a cached DataFrame is returned (cache hit) and a partial failure flag was stored during the original query
- **THEN** the response `meta` dict SHALL include the same `has_partial_failure`, `failed_chunk_count`, and `failed_ranges` as the original response
#### Scenario: Partial failure flag TTL matches data storage layer
- **WHEN** partial failure is detected and the query result is spilled to parquet spool
- **THEN** the partial failure flag SHALL be stored with TTL equal to `_REJECT_ENGINE_SPOOL_TTL_SECONDS` (default 21600 seconds)
- **WHEN** partial failure is detected and the query result is stored in L1/L2 Redis cache
- **THEN** the partial failure flag SHALL be stored with TTL equal to `_CACHE_TTL` (default 900 seconds)

View File

@@ -0,0 +1,36 @@
## MODIFIED Requirements
### Requirement: Reject History API SHALL validate required query parameters
The API SHALL validate date parameters and basic paging bounds before executing database work.
#### Scenario: Missing required dates
- **WHEN** a reject-history endpoint requiring date range is called without `start_date` or `end_date`
- **THEN** the API SHALL return HTTP 400 with a descriptive validation error
#### Scenario: Invalid date order
- **WHEN** `end_date` is earlier than `start_date`
- **THEN** the API SHALL return HTTP 400 and SHALL NOT run SQL queries
#### Scenario: Date range exceeds maximum
- **WHEN** the date range between `start_date` and `end_date` exceeds 730 days
- **THEN** the API SHALL return HTTP 400 with error message "日期範圍不可超過 730 天"
## ADDED Requirements
### Requirement: Reject History API primary query response SHALL include partial failure metadata
The primary query endpoint SHALL include batch execution completeness information in the response `meta` field when chunks fail during batch query execution.
#### Scenario: Partial failure metadata in response
- **WHEN** `POST /api/reject-history/query` completes with some chunks failing
- **THEN** the response SHALL include `meta.has_partial_failure: true`
- **THEN** the response SHALL include `meta.failed_chunk_count` as a positive integer
- **THEN** the response SHALL include `meta.failed_ranges` as an array of `{start, end}` date strings (if available)
- **THEN** the HTTP status SHALL still be 200 (data is partially available)
#### Scenario: No partial failure metadata on full success
- **WHEN** `POST /api/reject-history/query` completes with all chunks succeeding
- **THEN** the response `meta` SHALL NOT include `has_partial_failure`, `failed_chunk_count`, or `failed_ranges`
#### Scenario: Partial failure metadata preserved on cache hit
- **WHEN** `POST /api/reject-history/query` returns cached data that originally had partial failures
- **THEN** the response SHALL include the same `meta.has_partial_failure`, `meta.failed_chunk_count`, and `meta.failed_ranges` as the original response

View File

@@ -0,0 +1,58 @@
## ADDED Requirements
### Requirement: Reject History page SHALL display partial failure warning banner
The page SHALL display an amber warning banner when the query result contains partial failures, informing users that displayed data may be incomplete.
#### Scenario: Warning banner displayed on partial failure
- **WHEN** the primary query response includes `meta.has_partial_failure: true`
- **THEN** an amber warning banner SHALL be displayed below the error banner position
- **THEN** the warning message SHALL be in Traditional Chinese
#### Scenario: Warning banner shows failed date ranges
- **WHEN** `meta.failed_ranges` contains date range objects
- **THEN** the warning banner SHALL display the specific failed date ranges (e.g., "以下日期區間的資料擷取失敗2025-01-01 ~ 2025-01-10")
#### Scenario: Warning banner shows generic message without ranges (container mode or missing range data)
- **WHEN** `meta.has_partial_failure` is true but `meta.failed_ranges` is empty or absent (e.g., container-id batch query)
- **THEN** the warning banner SHALL display a generic message with the failed chunk count (e.g., "3 個查詢批次的資料擷取失敗")
#### Scenario: Warning banner cleared on new query
- **WHEN** user initiates a new primary query
- **THEN** the warning banner SHALL be cleared before the new query executes
- **THEN** if the new query also has partial failures, the warning SHALL update with new failure information
#### Scenario: Warning banner coexists with error banner
- **WHEN** both an error message and a partial failure warning exist
- **THEN** the error banner SHALL appear first, followed by the warning banner
#### Scenario: Warning banner visual style
- **WHEN** the warning banner is rendered
- **THEN** it SHALL use amber/orange color scheme (background `#fffbeb`, text `#b45309`)
- **THEN** the style SHALL be consistent with the existing `.resolution-warn` color pattern
### Requirement: Reject History page SHALL validate date range before query submission
The page SHALL validate the date range on the client side before sending the API request, providing immediate feedback for invalid ranges.
#### Scenario: Date range exceeds 730-day limit
- **WHEN** user selects a date range exceeding 730 days and clicks "查詢"
- **THEN** the page SHALL display an error message "查詢範圍不可超過 730 天(約兩年)"
- **THEN** the API request SHALL NOT be sent
#### Scenario: Missing start or end date
- **WHEN** user clicks "查詢" without setting both start_date and end_date (in date_range mode)
- **THEN** the page SHALL display an error message "請先設定開始與結束日期"
- **THEN** the API request SHALL NOT be sent
#### Scenario: End date before start date
- **WHEN** user selects an end_date earlier than start_date
- **THEN** the page SHALL display an error message "結束日期必須大於起始日期"
- **THEN** the API request SHALL NOT be sent
#### Scenario: Valid date range proceeds normally
- **WHEN** user selects a valid date range within 730 days and clicks "查詢"
- **THEN** no validation error SHALL be shown
- **THEN** the API request SHALL proceed normally
#### Scenario: Container mode skips date validation
- **WHEN** query mode is "container" (not "date_range")
- **THEN** date range validation SHALL be skipped

View File

@@ -0,0 +1,46 @@
## 1. 前端日期範圍即時驗證
- [x] 1.1 在 `frontend/src/core/reject-history-filters.js` 末尾新增 `validateDateRange(startDate, endDate)` 函式MAX_QUERY_DAYS=730回傳空字串表示通過、非空字串為錯誤訊息
- [x] 1.2 在 `frontend/src/reject-history/App.vue` import `validateDateRange`,在 `executePrimaryQuery()` 的 API 呼叫前(`errorMessage.value = ''` 重置之後)加入 date_range 模式的驗證邏輯,驗證失敗時設定 `errorMessage` 並 return
## 2. 後端追蹤失敗 chunk 時間範圍
- [x] 2.1 在 `batch_query_engine.py``_update_progress()` 簽名加入 `failed_ranges: Optional[List] = None` 參數,在 mapping dict 中條件性加入 `json.dumps(failed_ranges)` 欄位
- [x] 2.2 在 `execute_plan()` 的 sequential path`for idx, chunk in enumerate(chunks)` 迴圈區段)新增 `failed_range_list = []`chunk 失敗時從 chunk descriptor 條件性提取 `chunk_start`/`chunk_end` append 到 list僅 time-range chunk 才有),傳入每次 `_update_progress()` 呼叫
- [x] 2.3 在 `_execute_parallel()` 修改 `futures` dict 為 `futures[future] = (idx, chunk)` 以保留 chunk descriptor新增 `failed_range_list`,失敗時條件性 append range返回值改為 4-tuple `(completed, failed, has_partial_failure, failed_range_list)`;同步更新 `execute_plan()` 中呼叫 `_execute_parallel()` 的解構為 4-tuple
## 3. 後端 chunk 失敗單次重試
- [x] 3.1 在 `batch_query_engine.py` 新增 `_RETRYABLE_PATTERNS` 常數和 `_is_retryable_error(exc)` 函式,辨識 Oracle timeout / 連線錯誤
- [x] 3.2 修改 `_execute_single_chunk()` 加入 `max_retries: int = 1` 參數,將 try/except 包在 retry loop 中memory guard 和 Redis store 失敗直接 return False 不重試exception 中若 `_is_retryable_error()` 為 True 則 log warning 並 continue
## 4. 後端傳遞 partial failure 到 API response
- [x] 4.1 在 `reject_dataset_cache.py``execute_primary_query()` 內 batch_query_engine local import 區塊加入 `get_batch_progress`
- [x] 4.2 在 `execute_primary_query()``merge_chunks()` 呼叫之後、`redis_clear_batch()` 呼叫之前,呼叫 `get_batch_progress("reject", engine_hash)` 讀取 `has_partial_failure``failed``failed_ranges`
- [x] 4.3 在 `redis_clear_batch()` 之後、`_apply_policy_filters()` 之前,將 partial failure 資訊條件性注入 `meta` dict`has_partial_failure``failed_chunk_count``failed_ranges`
- [x] 4.4 新增 `_store_partial_failure_flag(query_id, failed_count, failed_ranges, ttl)``_load_partial_failure_flag(query_id)` 兩個 helper使用 Redis HSET 存取 `reject_dataset:{query_id}:partial_failure``ttl` 由呼叫端傳入
- [x] 4.5 在 `_store_query_result()` 呼叫之後呼叫 `_store_partial_failure_flag()`TTL 根據 `_store_query_result()` 內的 `should_spill` 判斷spill 到 spool 時用 `_REJECT_ENGINE_SPOOL_TTL_SECONDS`21600s否則用 `_CACHE_TTL`900s`_get_cached_df()` cache-hit 路徑呼叫 `_load_partial_failure_flag()``meta.update()`
## 5. 前端 partial failure 警告 banner
- [x] 5.1 在 `frontend/src/reject-history/App.vue` 新增 `partialFailureWarning` ref`executePrimaryQuery()` 開頭重置,在讀取 result 後根據 `result.meta.has_partial_failure` 設定警告訊息(含 failed_ranges 的日期區間文字;無 ranges 時用 failed_chunk_count 的 generic 訊息)
- [x] 5.2 在 App.vue template 的 error-banner `<div>` 之後加入 `<div v-if="partialFailureWarning" class="warning-banner">{{ partialFailureWarning }}</div>`
- [x] 5.3 在 `frontend/src/reject-history/style.css``.error-banner` 規則之後加入 `.warning-banner` 樣式background: #fffbeb, color: #b45309
## 6. 測試
- [x] 6.1 在 `tests/test_batch_query_engine.py` 新增 `test_transient_failure_retried_once`mock query_fn 第一次 raise TimeoutError、第二次成功assert chunk 最終成功且 query_fn 被呼叫 2 次
- [x] 6.2 在 `tests/test_batch_query_engine.py` 新增 `test_memory_guard_not_retried`mock query_fn 回傳超大 DataFrameassert query_fn 僅被呼叫 1 次
- [x] 6.3 在 `tests/test_batch_query_engine.py` 新增 `test_failed_ranges_tracked`3 chunks 其中 1 個失敗assert Redis metadata 含 `failed_ranges` JSON
- [x] 6.4 在 `tests/test_reject_dataset_cache.py` 新增 `test_partial_failure_in_response_meta`mock `get_batch_progress` 回傳 `has_partial_failure=True`assert response `meta` 包含旗標和 `failed_ranges`
- [x] 6.5 在 `tests/test_reject_dataset_cache.py` 新增 `test_cache_hit_restores_partial_failure`:先寫入 partial failure flagcache hit 時 assert meta 有旗標
- [x] 6.6 在 `tests/test_reject_dataset_cache.py` 新增 `test_partial_failure_ttl_matches_spool`:當 should_spill=True 時 assert flag TTL 為 `_REJECT_ENGINE_SPOOL_TTL_SECONDS`,否則為 `_CACHE_TTL`
- [x] 6.7 在 `tests/test_batch_query_engine.py` 新增 `test_id_batch_chunk_no_failed_ranges`container-id 分塊 chunk 失敗時 assert `failed_ranges` 為空 list 但 `has_partial_failure=True`
## 7. 跨服務回歸驗證
- [x] 7.1 執行 `pytest tests/test_batch_query_engine.py tests/test_reject_dataset_cache.py -v` 確認本次修改的測試全部通過
- [x] 7.2 執行 hold_dataset_cache 相關測試確認重試邏輯不影響 hold`pytest tests/ -k "hold" -v`
- [x] 7.3 執行 resource / job / msd 相關測試確認回歸:`pytest tests/ -k "resource or job or mid_section" -v`
- [x] 7.4 若任何跨服務測試失敗,檢查是否為 `_execute_single_chunk` 簽名變更(`max_retries` 參數)導致,確認 keyword-only 預設值不影響既有呼叫

View File

@@ -0,0 +1,86 @@
# batch-query-resilience Specification
## Purpose
Batch query engine resilience features: failed chunk range tracking, transient error retry, and partial failure metadata propagation to API consumers.
## Requirements
### Requirement: BatchQueryEngine SHALL track failed chunk time ranges in progress metadata
The engine SHALL record the time ranges of failed chunks in Redis progress metadata so consumers can report which date intervals have missing data.
#### Scenario: Failed chunk range recorded in sequential path
- **WHEN** a chunk with `chunk_start` and `chunk_end` keys fails during sequential execution
- **THEN** `_update_progress()` SHALL store a `failed_ranges` field in the Redis HSET metadata
- **THEN** `failed_ranges` SHALL be a JSON array of objects, each with `start` and `end` string keys
- **THEN** the array SHALL contain one entry per failed chunk
#### Scenario: Failed chunk range recorded in parallel path
- **WHEN** a chunk with `chunk_start` and `chunk_end` keys fails during parallel execution
- **THEN** the failed chunk's time range SHALL be appended to `failed_ranges` in the same format as the sequential path
#### Scenario: No failed ranges when all chunks succeed
- **WHEN** all chunks complete successfully
- **THEN** the `failed_ranges` field SHALL NOT be present in Redis metadata
#### Scenario: ID-batch chunks produce no failed_ranges entries
- **WHEN** a chunk created by `decompose_by_ids()` (containing only an `ids` key, no `chunk_start`/`chunk_end`) fails
- **THEN** no entry SHALL be appended to `failed_ranges` for that chunk
- **THEN** `has_partial_failure` SHALL still be set to `True`
- **THEN** `failed` count SHALL still be incremented
#### Scenario: get_batch_progress returns failed_ranges
- **WHEN** `get_batch_progress()` is called after execution with failed chunks
- **THEN** the returned dict SHALL include `failed_ranges` as a JSON string parseable to a list of `{start, end}` objects
### Requirement: BatchQueryEngine SHALL retry transient chunk failures once
The engine SHALL retry chunk execution once for transient errors (Oracle timeout, connection errors) but SHALL NOT retry deterministic failures (memory guard, Redis store).
#### Scenario: Oracle timeout retried once
- **WHEN** `_execute_single_chunk()` raises an exception matching Oracle timeout patterns (`DPY-4024`, `ORA-01013`)
- **THEN** the chunk SHALL be retried exactly once
- **WHEN** the retry succeeds
- **THEN** the chunk SHALL be marked as successful
#### Scenario: Connection error retried once
- **WHEN** `_execute_single_chunk()` raises `TimeoutError`, `ConnectionError`, or `OSError`
- **THEN** the chunk SHALL be retried exactly once
#### Scenario: Retry exhausted marks chunk as failed
- **WHEN** a chunk fails on both the initial attempt and the retry
- **THEN** the chunk SHALL be marked as failed
- **THEN** `has_partial_failure` SHALL be set to `True`
#### Scenario: Memory guard failure NOT retried
- **WHEN** a chunk's DataFrame exceeds `BATCH_CHUNK_MAX_MEMORY_MB`
- **THEN** the chunk SHALL return `False` immediately without retry
- **THEN** the query function SHALL have been called exactly once for that chunk
#### Scenario: Redis store failure NOT retried
- **WHEN** `redis_store_chunk()` returns `False`
- **THEN** the chunk SHALL return `False` immediately without retry
### Requirement: reject_dataset_cache SHALL propagate partial failure metadata to API response
The cache service SHALL read batch execution metadata and include partial failure information in the API response `meta` field.
#### Scenario: Partial failure metadata included in response
- **WHEN** `execute_primary_query()` uses the batch engine path and `get_batch_progress()` returns `has_partial_failure=True`
- **THEN** the response `meta` dict SHALL include `has_partial_failure: true`
- **THEN** the response `meta` dict SHALL include `failed_chunk_count` as an integer
- **THEN** if `failed_ranges` is present, the response `meta` dict SHALL include `failed_ranges` as a list of `{start, end}` objects
#### Scenario: Metadata read before redis_clear_batch
- **WHEN** `execute_primary_query()` calls `get_batch_progress()`
- **THEN** the call SHALL occur after `merge_chunks()` and before `redis_clear_batch()`
#### Scenario: No partial failure on successful query
- **WHEN** all chunks complete successfully
- **THEN** the response `meta` dict SHALL NOT include `has_partial_failure`
#### Scenario: Cache-hit path restores partial failure flag
- **WHEN** a cached DataFrame is returned (cache hit) and a partial failure flag was stored during the original query
- **THEN** the response `meta` dict SHALL include the same `has_partial_failure`, `failed_chunk_count`, and `failed_ranges` as the original response
#### Scenario: Partial failure flag TTL matches data storage layer
- **WHEN** partial failure is detected and the query result is spilled to parquet spool
- **THEN** the partial failure flag SHALL be stored with TTL equal to `_REJECT_ENGINE_SPOOL_TTL_SECONDS` (default 21600 seconds)
- **WHEN** partial failure is detected and the query result is stored in L1/L2 Redis cache
- **THEN** the partial failure flag SHALL be stored with TTL equal to `_CACHE_TTL` (default 900 seconds)

View File

@@ -14,6 +14,28 @@ The API SHALL validate date parameters and basic paging bounds before executing
- **WHEN** `end_date` is earlier than `start_date` - **WHEN** `end_date` is earlier than `start_date`
- **THEN** the API SHALL return HTTP 400 and SHALL NOT run SQL queries - **THEN** the API SHALL return HTTP 400 and SHALL NOT run SQL queries
#### Scenario: Date range exceeds maximum
- **WHEN** the date range between `start_date` and `end_date` exceeds 730 days
- **THEN** the API SHALL return HTTP 400 with error message "日期範圍不可超過 730 天"
### Requirement: Reject History API primary query response SHALL include partial failure metadata
The primary query endpoint SHALL include batch execution completeness information in the response `meta` field when chunks fail during batch query execution.
#### Scenario: Partial failure metadata in response
- **WHEN** `POST /api/reject-history/query` completes with some chunks failing
- **THEN** the response SHALL include `meta.has_partial_failure: true`
- **THEN** the response SHALL include `meta.failed_chunk_count` as a positive integer
- **THEN** the response SHALL include `meta.failed_ranges` as an array of `{start, end}` date strings (if available)
- **THEN** the HTTP status SHALL still be 200 (data is partially available)
#### Scenario: No partial failure metadata on full success
- **WHEN** `POST /api/reject-history/query` completes with all chunks succeeding
- **THEN** the response `meta` SHALL NOT include `has_partial_failure`, `failed_chunk_count`, or `failed_ranges`
#### Scenario: Partial failure metadata preserved on cache hit
- **WHEN** `POST /api/reject-history/query` returns cached data that originally had partial failures
- **THEN** the response SHALL include the same `meta.has_partial_failure`, `meta.failed_chunk_count`, and `meta.failed_ranges` as the original response
### Requirement: Reject History API SHALL provide summary metrics endpoint ### Requirement: Reject History API SHALL provide summary metrics endpoint
The API SHALL provide aggregated summary metrics for the selected filter context. The API SHALL provide aggregated summary metrics for the selected filter context.

View File

@@ -236,6 +236,63 @@ The page template SHALL delegate sections to focused sub-components, following t
- **THEN** `App.vue` SHALL hold all reactive state and API logic - **THEN** `App.vue` SHALL hold all reactive state and API logic
- **THEN** sub-components SHALL receive data via props and communicate via events - **THEN** sub-components SHALL receive data via props and communicate via events
### Requirement: Reject History page SHALL display partial failure warning banner
The page SHALL display an amber warning banner when the query result contains partial failures, informing users that displayed data may be incomplete.
#### Scenario: Warning banner displayed on partial failure
- **WHEN** the primary query response includes `meta.has_partial_failure: true`
- **THEN** an amber warning banner SHALL be displayed below the error banner position
- **THEN** the warning message SHALL be in Traditional Chinese
#### Scenario: Warning banner shows failed date ranges
- **WHEN** `meta.failed_ranges` contains date range objects
- **THEN** the warning banner SHALL display the specific failed date ranges (e.g., "以下日期區間的資料擷取失敗2025-01-01 ~ 2025-01-10")
#### Scenario: Warning banner shows generic message without ranges (container mode or missing range data)
- **WHEN** `meta.has_partial_failure` is true but `meta.failed_ranges` is empty or absent (e.g., container-id batch query)
- **THEN** the warning banner SHALL display a generic message with the failed chunk count (e.g., "3 個查詢批次的資料擷取失敗")
#### Scenario: Warning banner cleared on new query
- **WHEN** user initiates a new primary query
- **THEN** the warning banner SHALL be cleared before the new query executes
- **THEN** if the new query also has partial failures, the warning SHALL update with new failure information
#### Scenario: Warning banner coexists with error banner
- **WHEN** both an error message and a partial failure warning exist
- **THEN** the error banner SHALL appear first, followed by the warning banner
#### Scenario: Warning banner visual style
- **WHEN** the warning banner is rendered
- **THEN** it SHALL use amber/orange color scheme (background `#fffbeb`, text `#b45309`)
- **THEN** the style SHALL be consistent with the existing `.resolution-warn` color pattern
### Requirement: Reject History page SHALL validate date range before query submission
The page SHALL validate the date range on the client side before sending the API request, providing immediate feedback for invalid ranges.
#### Scenario: Date range exceeds 730-day limit
- **WHEN** user selects a date range exceeding 730 days and clicks "查詢"
- **THEN** the page SHALL display an error message "查詢範圍不可超過 730 天(約兩年)"
- **THEN** the API request SHALL NOT be sent
#### Scenario: Missing start or end date
- **WHEN** user clicks "查詢" without setting both start_date and end_date (in date_range mode)
- **THEN** the page SHALL display an error message "請先設定開始與結束日期"
- **THEN** the API request SHALL NOT be sent
#### Scenario: End date before start date
- **WHEN** user selects an end_date earlier than start_date
- **THEN** the page SHALL display an error message "結束日期必須大於起始日期"
- **THEN** the API request SHALL NOT be sent
#### Scenario: Valid date range proceeds normally
- **WHEN** user selects a valid date range within 730 days and clicks "查詢"
- **THEN** no validation error SHALL be shown
- **THEN** the API request SHALL proceed normally
#### Scenario: Container mode skips date validation
- **WHEN** query mode is "container" (not "date_range")
- **THEN** date range validation SHALL be skipped
### Requirement: Frontend API timeout ### Requirement: Frontend API timeout
The reject-history page SHALL use a 360-second API timeout (up from 60 seconds) for all Oracle-backed API calls. The reject-history page SHALL use a 360-second API timeout (up from 60 seconds) for all Oracle-backed API calls.

View File

@@ -11,6 +11,7 @@ from flask import Blueprint, Response, jsonify, request
from mes_dashboard.core.cache import cache_get, cache_set, make_cache_key from mes_dashboard.core.cache import cache_get, cache_set, make_cache_key
from mes_dashboard.core.rate_limit import configured_rate_limit from mes_dashboard.core.rate_limit import configured_rate_limit
from mes_dashboard.core.request_validation import parse_json_payload
from mes_dashboard.core.utils import parse_bool_query from mes_dashboard.core.utils import parse_bool_query
from mes_dashboard.services.reject_dataset_cache import ( from mes_dashboard.services.reject_dataset_cache import (
apply_view, apply_view,
@@ -344,7 +345,7 @@ def api_reject_history_reason_pareto():
pareto_scope=pareto_scope, pareto_scope=pareto_scope,
packages=_parse_multi_param("packages") or None, packages=_parse_multi_param("packages") or None,
workcenter_groups=_parse_multi_param("workcenter_groups") or None, workcenter_groups=_parse_multi_param("workcenter_groups") or None,
reason=request.args.get("reason", "").strip() or None, reasons=_parse_multi_param("reasons") or None,
trend_dates=_parse_multi_param("trend_dates") or None, trend_dates=_parse_multi_param("trend_dates") or None,
include_excluded_scrap=include_excluded_scrap, include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap, exclude_material_scrap=exclude_material_scrap,
@@ -404,7 +405,7 @@ def api_reject_history_batch_pareto():
pareto_display_scope=pareto_display_scope, pareto_display_scope=pareto_display_scope,
packages=_parse_multi_param("packages") or None, packages=_parse_multi_param("packages") or None,
workcenter_groups=_parse_multi_param("workcenter_groups") or None, workcenter_groups=_parse_multi_param("workcenter_groups") or None,
reason=request.args.get("reason", "").strip() or None, reasons=_parse_multi_param("reasons") or None,
trend_dates=_parse_multi_param("trend_dates") or None, trend_dates=_parse_multi_param("trend_dates") or None,
pareto_selections=_parse_multi_pareto_selections(), pareto_selections=_parse_multi_pareto_selections(),
include_excluded_scrap=include_excluded_scrap, include_excluded_scrap=include_excluded_scrap,
@@ -548,7 +549,9 @@ def api_reject_history_analytics():
@reject_history_bp.route("/api/reject-history/query", methods=["POST"]) @reject_history_bp.route("/api/reject-history/query", methods=["POST"])
def api_reject_history_query(): def api_reject_history_query():
"""Primary query: execute Oracle → cache DataFrame → return results.""" """Primary query: execute Oracle → cache DataFrame → return results."""
body = request.get_json(silent=True) or {} body, payload_error = parse_json_payload(require_non_empty_object=True)
if payload_error is not None:
return jsonify({"success": False, "error": payload_error.message}), payload_error.status_code
mode = str(body.get("mode", "")).strip() mode = str(body.get("mode", "")).strip()
if mode not in ("date_range", "container"): if mode not in ("date_range", "container"):
@@ -599,7 +602,7 @@ def api_reject_history_view():
page = request.args.get("page", 1, type=int) or 1 page = request.args.get("page", 1, type=int) or 1
per_page = request.args.get("per_page", 50, type=int) or 50 per_page = request.args.get("per_page", 50, type=int) or 50
metric_filter = request.args.get("metric_filter", "all").strip().lower() or "all" metric_filter = request.args.get("metric_filter", "all").strip().lower() or "all"
reason = request.args.get("reason", "").strip() or None reasons = _parse_multi_param("reasons") or None
detail_reason = request.args.get("detail_reason", "").strip() or None detail_reason = request.args.get("detail_reason", "").strip() or None
pareto_selections = _parse_multi_pareto_selections() pareto_selections = _parse_multi_pareto_selections()
pareto_dimension = None pareto_dimension = None
@@ -618,7 +621,7 @@ def api_reject_history_view():
query_id=query_id, query_id=query_id,
packages=_parse_multi_param("packages") or None, packages=_parse_multi_param("packages") or None,
workcenter_groups=_parse_multi_param("workcenter_groups") or None, workcenter_groups=_parse_multi_param("workcenter_groups") or None,
reason=reason, reasons=reasons,
metric_filter=metric_filter, metric_filter=metric_filter,
trend_dates=_parse_multi_param("trend_dates") or None, trend_dates=_parse_multi_param("trend_dates") or None,
detail_reason=detail_reason, detail_reason=detail_reason,
@@ -653,7 +656,7 @@ def api_reject_history_export_cached():
return jsonify({"success": False, "error": "缺少必要參數: query_id"}), 400 return jsonify({"success": False, "error": "缺少必要參數: query_id"}), 400
metric_filter = request.args.get("metric_filter", "all").strip().lower() or "all" metric_filter = request.args.get("metric_filter", "all").strip().lower() or "all"
reason = request.args.get("reason", "").strip() or None reasons = _parse_multi_param("reasons") or None
detail_reason = request.args.get("detail_reason", "").strip() or None detail_reason = request.args.get("detail_reason", "").strip() or None
pareto_selections = _parse_multi_pareto_selections() pareto_selections = _parse_multi_pareto_selections()
pareto_dimension = None pareto_dimension = None
@@ -672,7 +675,7 @@ def api_reject_history_export_cached():
query_id=query_id, query_id=query_id,
packages=_parse_multi_param("packages") or None, packages=_parse_multi_param("packages") or None,
workcenter_groups=_parse_multi_param("workcenter_groups") or None, workcenter_groups=_parse_multi_param("workcenter_groups") or None,
reason=reason, reasons=reasons,
metric_filter=metric_filter, metric_filter=metric_filter,
trend_dates=_parse_multi_param("trend_dates") or None, trend_dates=_parse_multi_param("trend_dates") or None,
detail_reason=detail_reason, detail_reason=detail_reason,

View File

@@ -56,6 +56,18 @@ from mes_dashboard.core.redis_df_store import (
logger = logging.getLogger("mes_dashboard.batch_query_engine") logger = logging.getLogger("mes_dashboard.batch_query_engine")
_RETRYABLE_PATTERNS = (
"dpy-4024",
"ora-01013",
"ora-03113",
"ora-03135",
"ora-12514",
"ora-12541",
"timeout",
"timed out",
)
# ============================================================ # ============================================================
# Configuration (env-overridable) # Configuration (env-overridable)
# ============================================================ # ============================================================
@@ -65,7 +77,7 @@ BATCH_CHUNK_MAX_MEMORY_MB: int = int(
) )
BATCH_QUERY_TIME_THRESHOLD_DAYS: int = int( BATCH_QUERY_TIME_THRESHOLD_DAYS: int = int(
os.getenv("BATCH_QUERY_TIME_THRESHOLD_DAYS", "60") os.getenv("BATCH_QUERY_TIME_THRESHOLD_DAYS", "10")
) )
BATCH_QUERY_ID_THRESHOLD: int = int( BATCH_QUERY_ID_THRESHOLD: int = int(
@@ -196,6 +208,7 @@ def _update_progress(
failed: int, failed: int,
status: str = "running", status: str = "running",
has_partial_failure: bool = False, has_partial_failure: bool = False,
failed_ranges: Optional[List[Dict[str, str]]] = None,
ttl: int = 900, ttl: int = 900,
) -> None: ) -> None:
"""Write/update batch progress metadata to Redis.""" """Write/update batch progress metadata to Redis."""
@@ -212,6 +225,10 @@ def _update_progress(
"status": status, "status": status,
"has_partial_failure": str(has_partial_failure), "has_partial_failure": str(has_partial_failure),
} }
if failed_ranges is not None:
mapping["failed_ranges"] = json.dumps(
failed_ranges, ensure_ascii=False, default=str
)
try: try:
client.hset(key, mapping=mapping) client.hset(key, mapping=mapping)
client.expire(key, ttl) client.expire(key, ttl)
@@ -279,6 +296,7 @@ def execute_plan(
completed = 0 completed = 0
failed = 0 failed = 0
has_partial_failure = False has_partial_failure = False
failed_range_list: Optional[List[Dict[str, str]]] = None
_update_progress( _update_progress(
cache_prefix, query_hash, cache_prefix, query_hash,
@@ -296,7 +314,9 @@ def execute_plan(
_update_progress( _update_progress(
cache_prefix, query_hash, cache_prefix, query_hash,
total=total, completed=completed, failed=failed, total=total, completed=completed, failed=failed,
has_partial_failure=has_partial_failure, ttl=chunk_ttl, has_partial_failure=has_partial_failure,
failed_ranges=failed_range_list,
ttl=chunk_ttl,
) )
continue continue
ok = _execute_single_chunk( ok = _execute_single_chunk(
@@ -308,14 +328,24 @@ def execute_plan(
else: else:
failed += 1 failed += 1
has_partial_failure = True has_partial_failure = True
if failed_range_list is None:
failed_range_list = []
chunk_start = chunk.get("chunk_start")
chunk_end = chunk.get("chunk_end")
if chunk_start and chunk_end:
failed_range_list.append(
{"start": str(chunk_start), "end": str(chunk_end)}
)
_update_progress( _update_progress(
cache_prefix, query_hash, cache_prefix, query_hash,
total=total, completed=completed, failed=failed, total=total, completed=completed, failed=failed,
has_partial_failure=has_partial_failure, ttl=chunk_ttl, has_partial_failure=has_partial_failure,
failed_ranges=failed_range_list,
ttl=chunk_ttl,
) )
else: else:
# --- Parallel path --- # --- Parallel path ---
completed, failed, has_partial_failure = _execute_parallel( completed, failed, has_partial_failure, failed_range_list = _execute_parallel(
chunks, query_fn, cache_prefix, query_hash, chunks, query_fn, cache_prefix, query_hash,
chunk_ttl, max_rows_per_chunk, skip_cached, chunk_ttl, max_rows_per_chunk, skip_cached,
effective_parallel, effective_parallel,
@@ -327,6 +357,7 @@ def execute_plan(
total=total, completed=completed, failed=failed, total=total, completed=completed, failed=failed,
status=final_status, status=final_status,
has_partial_failure=has_partial_failure, has_partial_failure=has_partial_failure,
failed_ranges=failed_range_list,
ttl=chunk_ttl, ttl=chunk_ttl,
) )
@@ -366,53 +397,59 @@ def _execute_single_chunk(
query_hash: str, query_hash: str,
chunk_ttl: int, chunk_ttl: int,
max_rows_per_chunk: Optional[int], max_rows_per_chunk: Optional[int],
max_retries: int = 1,
) -> bool: ) -> bool:
"""Run one chunk through *query_fn*, apply guards, store result. """Run one chunk through *query_fn*, apply guards, store result.
Returns True on success, False on failure. Returns True on success, False on failure.
""" """
try: attempts = max(0, int(max_retries)) + 1
df = query_fn(chunk, max_rows_per_chunk=max_rows_per_chunk) for attempt in range(attempts):
if df is None: try:
df = pd.DataFrame() df = query_fn(chunk, max_rows_per_chunk=max_rows_per_chunk)
if df is None:
df = pd.DataFrame()
# ---- Memory guard ---- # ---- Memory guard ----
mem_bytes = df.memory_usage(deep=True).sum() mem_bytes = df.memory_usage(deep=True).sum()
mem_mb = mem_bytes / (1024 * 1024) mem_mb = mem_bytes / (1024 * 1024)
if mem_mb > BATCH_CHUNK_MAX_MEMORY_MB: if mem_mb > BATCH_CHUNK_MAX_MEMORY_MB:
logger.warning( logger.warning(
"Chunk %d memory %.1f MB exceeds limit %d MB — discarded", "Chunk %d memory %.1f MB exceeds limit %d MB — discarded",
idx, mem_mb, BATCH_CHUNK_MAX_MEMORY_MB, idx, mem_mb, BATCH_CHUNK_MAX_MEMORY_MB,
)
return False
# ---- Store to Redis ----
stored = redis_store_chunk(cache_prefix, query_hash, idx, df, ttl=chunk_ttl)
if not stored:
logger.warning(
"Chunk %d failed to persist into Redis, marking as failed", idx
)
return False
logger.debug(
"Chunk %d completed: %d rows, %.1f MB",
idx, len(df), mem_mb,
)
return True
except Exception as exc:
should_retry = attempt < attempts - 1 and _is_retryable_error(exc)
if should_retry:
logger.warning(
"Chunk %d transient failure on attempt %d/%d: %s; retrying",
idx,
attempt + 1,
attempts,
exc,
)
continue
logger.error(
"Chunk %d failed: %s", idx, exc, exc_info=True,
) )
return False return False
return False
# ---- Truncation flag ----
truncated = (
max_rows_per_chunk is not None
and len(df) == max_rows_per_chunk
)
if truncated:
logger.info("Chunk %d returned exactly max_rows_per_chunk=%d (truncated)", idx, max_rows_per_chunk)
# ---- Store to Redis ----
stored = redis_store_chunk(cache_prefix, query_hash, idx, df, ttl=chunk_ttl)
if not stored:
logger.warning(
"Chunk %d failed to persist into Redis, marking as failed", idx
)
return False
logger.debug(
"Chunk %d completed: %d rows, %.1f MB",
idx, len(df), mem_mb,
)
return True
except Exception as exc:
logger.error(
"Chunk %d failed: %s", idx, exc, exc_info=True,
)
return False
def _execute_parallel( def _execute_parallel(
@@ -427,12 +464,13 @@ def _execute_parallel(
) -> tuple: ) -> tuple:
"""Execute chunks in parallel via ThreadPoolExecutor. """Execute chunks in parallel via ThreadPoolExecutor.
Returns (completed, failed, has_partial_failure). Returns (completed, failed, has_partial_failure, failed_ranges).
""" """
total = len(chunks) total = len(chunks)
completed = 0 completed = 0
failed = 0 failed = 0
has_partial_failure = False has_partial_failure = False
failed_range_list: Optional[List[Dict[str, str]]] = None
futures = {} futures = {}
with ThreadPoolExecutor(max_workers=max_workers) as executor: with ThreadPoolExecutor(max_workers=max_workers) as executor:
@@ -445,10 +483,10 @@ def _execute_parallel(
idx, chunk, query_fn, idx, chunk, query_fn,
cache_prefix, query_hash, chunk_ttl, max_rows_per_chunk, cache_prefix, query_hash, chunk_ttl, max_rows_per_chunk,
) )
futures[future] = idx futures[future] = (idx, chunk)
for future in as_completed(futures): for future in as_completed(futures):
idx = futures[future] idx, chunk = futures[future]
try: try:
ok = future.result() ok = future.result()
if ok: if ok:
@@ -456,18 +494,46 @@ def _execute_parallel(
else: else:
failed += 1 failed += 1
has_partial_failure = True has_partial_failure = True
if failed_range_list is None:
failed_range_list = []
chunk_start = chunk.get("chunk_start")
chunk_end = chunk.get("chunk_end")
if chunk_start and chunk_end:
failed_range_list.append(
{"start": str(chunk_start), "end": str(chunk_end)}
)
except Exception as exc: except Exception as exc:
logger.error("Chunk %d future error: %s", idx, exc) logger.error("Chunk %d future error: %s", idx, exc)
failed += 1 failed += 1
has_partial_failure = True has_partial_failure = True
if failed_range_list is None:
failed_range_list = []
chunk_start = chunk.get("chunk_start")
chunk_end = chunk.get("chunk_end")
if chunk_start and chunk_end:
failed_range_list.append(
{"start": str(chunk_start), "end": str(chunk_end)}
)
_update_progress( _update_progress(
cache_prefix, query_hash, cache_prefix, query_hash,
total=total, completed=completed, failed=failed, total=total, completed=completed, failed=failed,
has_partial_failure=has_partial_failure, ttl=chunk_ttl, has_partial_failure=has_partial_failure,
failed_ranges=failed_range_list,
ttl=chunk_ttl,
) )
return completed, failed, has_partial_failure return completed, failed, has_partial_failure, failed_range_list
def _is_retryable_error(exc: Exception) -> bool:
"""Return True for transient Oracle/network timeout errors."""
if isinstance(exc, (TimeoutError, ConnectionError, OSError)):
return True
text = str(exc).strip().lower()
if not text:
return False
return any(pattern in text for pattern in _RETRYABLE_PATTERNS)
# ============================================================ # ============================================================

View File

@@ -0,0 +1,152 @@
# -*- coding: utf-8 -*-
"""Shared guardrails for LOT/WAFER/工單 container resolution."""
from __future__ import annotations
import os
from typing import Any, Dict, Iterable, List, Optional
def _env_int(name: str, default: int) -> int:
raw = os.getenv(name)
if raw is None:
return int(default)
try:
return int(raw)
except (TypeError, ValueError):
return int(default)
def _normalize_wildcard_token(value: str) -> str:
return str(value or "").replace("*", "%")
def _is_pattern_token(value: str) -> bool:
token = _normalize_wildcard_token(value)
return "%" in token or "_" in token
def _literal_prefix_before_wildcard(value: str) -> str:
token = _normalize_wildcard_token(value)
for idx, ch in enumerate(token):
if ch in ("%", "_"):
return token[:idx]
return token
def normalize_input_values(values: Iterable[Any]) -> List[str]:
normalized: List[str] = []
seen = set()
for raw in values or []:
token = str(raw or "").strip()
if not token or token in seen:
continue
seen.add(token)
normalized.append(token)
return normalized
def validate_resolution_request(input_type: str, values: Iterable[Any]) -> Optional[str]:
"""Validate resolver request without hard-capping raw input count."""
tokens = normalize_input_values(values)
if not tokens:
return "請輸入至少一個查詢條件"
# Compatibility switch. Default 0 means "no count cap".
max_values = max(_env_int("CONTAINER_RESOLVE_INPUT_MAX_VALUES", 0), 0)
if max_values and len(tokens) > max_values:
return f"輸入數量超過上限 ({max_values} 筆)"
# Wildcard safety: avoid full-table scans like "%" or "_".
min_prefix_len = max(_env_int("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", 2), 0)
if min_prefix_len > 0:
invalid_patterns: List[str] = []
for token in tokens:
if not _is_pattern_token(token):
continue
if len(_literal_prefix_before_wildcard(token).strip()) < min_prefix_len:
invalid_patterns.append(token)
if invalid_patterns:
sample = ", ".join(invalid_patterns[:3])
suffix = "..." if len(invalid_patterns) > 3 else ""
return (
f"{input_type} 萬用字元條件過於寬鬆(需至少 {min_prefix_len} 碼前綴): "
f"{sample}{suffix}"
)
return None
def extract_container_ids(rows: Iterable[Dict[str, Any]]) -> List[str]:
ids: List[str] = []
seen = set()
for row in rows or []:
cid = str(
row.get("container_id")
or row.get("CONTAINERID")
or ""
).strip()
if not cid or cid in seen:
continue
seen.add(cid)
ids.append(cid)
return ids
def assess_resolution_result(result: Dict[str, Any]) -> Dict[str, Any]:
"""Assess expansion result against guardrails."""
expansion_info = result.get("expansion_info") or {}
max_expand_per_token = max(
_env_int("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", 2000),
1,
)
offenders: List[Dict[str, Any]] = []
for token, count in expansion_info.items():
try:
c = int(count)
except (TypeError, ValueError):
continue
if c > max_expand_per_token:
offenders.append({"token": str(token), "count": c})
unique_ids = extract_container_ids(result.get("data") or [])
max_container_ids = max(
_env_int("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", 30000),
1,
)
return {
"max_expansion_per_token": max_expand_per_token,
"expansion_offenders": offenders,
"max_container_ids": max_container_ids,
"resolved_container_ids": len(unique_ids),
"over_container_limit": len(unique_ids) > max_container_ids,
}
def validate_resolution_result(
result: Dict[str, Any],
*,
strict: bool = True,
) -> Optional[str]:
"""Validate expansion result guardrails.
strict=True: exceed guardrail -> return error message.
strict=False: exceed guardrail -> allow caller to continue (split/decompose path).
"""
assessment = assess_resolution_result(result)
offenders = assessment.get("expansion_offenders") or []
if offenders and strict:
first = offenders[0]
token = str(first.get("token") or "")
count = int(first.get("count") or 0)
return (
f"單一條件展開過大 ({count} 筆,限制 {assessment['max_expansion_per_token']})"
f"請縮小範圍: {token}"
)
if bool(assessment.get("over_container_limit")) and strict:
return (
f"解析結果過大({assessment['resolved_container_ids']} 筆 CONTAINERID限制 {assessment['max_container_ids']}"
",請縮小查詢條件"
)
return None

View File

@@ -21,6 +21,10 @@ logger = logging.getLogger("mes_dashboard.event_fetcher")
ORACLE_IN_BATCH_SIZE = 1000 ORACLE_IN_BATCH_SIZE = 1000
EVENT_FETCHER_MAX_WORKERS = int(os.getenv('EVENT_FETCHER_MAX_WORKERS', '2')) EVENT_FETCHER_MAX_WORKERS = int(os.getenv('EVENT_FETCHER_MAX_WORKERS', '2'))
CACHE_SKIP_CID_THRESHOLD = int(os.getenv('EVENT_FETCHER_CACHE_SKIP_CID_THRESHOLD', '10000')) CACHE_SKIP_CID_THRESHOLD = int(os.getenv('EVENT_FETCHER_CACHE_SKIP_CID_THRESHOLD', '10000'))
EVENT_FETCHER_ALLOW_PARTIAL_RESULTS = (
os.getenv('EVENT_FETCHER_ALLOW_PARTIAL_RESULTS', 'false').strip().lower()
in {'1', 'true', 'yes', 'on'}
)
_DOMAIN_SPECS: Dict[str, Dict[str, Any]] = { _DOMAIN_SPECS: Dict[str, Dict[str, Any]] = {
"history": { "history": {
@@ -280,16 +284,23 @@ class EventFetcher:
for batch in batches: for batch in batches:
_fetch_and_group_batch(batch) _fetch_and_group_batch(batch)
else: else:
failures = []
with ThreadPoolExecutor(max_workers=min(len(batches), EVENT_FETCHER_MAX_WORKERS)) as executor: with ThreadPoolExecutor(max_workers=min(len(batches), EVENT_FETCHER_MAX_WORKERS)) as executor:
futures = {executor.submit(_fetch_and_group_batch, b): b for b in batches} futures = {executor.submit(_fetch_and_group_batch, b): b for b in batches}
for future in as_completed(futures): for future in as_completed(futures):
try: try:
future.result() future.result()
except Exception: except Exception as exc:
failures.append((futures[future], exc))
logger.error( logger.error(
"EventFetcher batch query failed domain=%s batch_size=%s", "EventFetcher batch query failed domain=%s batch_size=%s",
domain, len(futures[future]), exc_info=True, domain, len(futures[future]), exc_info=True,
) )
if failures and not EVENT_FETCHER_ALLOW_PARTIAL_RESULTS:
failed_cids = sum(len(batch) for batch, _ in failures)
raise RuntimeError(
f"EventFetcher chunk failed (domain={domain}, failed_chunks={len(failures)}, failed_cids={failed_cids})"
)
result = dict(grouped) result = dict(grouped)
del grouped del grouped

View File

@@ -150,7 +150,7 @@ def get_jobs_by_resources(
) -> Dict[str, Any]: ) -> Dict[str, Any]:
"""Query jobs for selected resources within date range. """Query jobs for selected resources within date range.
For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 60), For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 10),
the query is decomposed into monthly chunks via BatchQueryEngine. the query is decomposed into monthly chunks via BatchQueryEngine.
Results are cached in Redis to avoid redundant Oracle queries. Results are cached in Redis to avoid redundant Oracle queries.

View File

@@ -863,7 +863,7 @@ def _fetch_station_detection_data(
) -> Optional[pd.DataFrame]: ) -> Optional[pd.DataFrame]:
"""Execute station_detection.sql and return raw DataFrame. """Execute station_detection.sql and return raw DataFrame.
For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 60), For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 10),
the query is decomposed into monthly chunks via BatchQueryEngine to the query is decomposed into monthly chunks via BatchQueryEngine to
prevent Oracle timeout on high-volume stations. prevent Oracle timeout on high-volume stations.
""" """

View File

@@ -26,9 +26,15 @@ from typing import Any, Dict, List, Optional, Generator, Iterable, Tuple
import pandas as pd import pandas as pd
from mes_dashboard.core.database import read_sql_df from mes_dashboard.core.database import read_sql_df
from mes_dashboard.sql import QueryBuilder, SQLLoader from mes_dashboard.sql import QueryBuilder, SQLLoader
from mes_dashboard.services.event_fetcher import EventFetcher from mes_dashboard.services.container_resolution_policy import (
assess_resolution_result,
normalize_input_values,
validate_resolution_request,
validate_resolution_result,
)
from mes_dashboard.services.event_fetcher import EventFetcher
try: try:
from mes_dashboard.core.database import read_sql_df_slow from mes_dashboard.core.database import read_sql_df_slow
@@ -89,7 +95,7 @@ def validate_date_range(start_date: str, end_date: str, max_days: int = MAX_DATE
return f'日期格式錯誤: {e}' return f'日期格式錯誤: {e}'
def validate_lot_input(input_type: str, values: List[str]) -> Optional[str]: def validate_lot_input(input_type: str, values: List[str]) -> Optional[str]:
"""Validate LOT input based on type. """Validate LOT input based on type.
Args: Args:
@@ -99,23 +105,7 @@ def validate_lot_input(input_type: str, values: List[str]) -> Optional[str]:
Returns: Returns:
Error message if validation fails, None if valid. Error message if validation fails, None if valid.
""" """
if not values: return validate_resolution_request(input_type, values)
return '請輸入至少一個查詢條件'
limits = {
'lot_id': MAX_LOT_IDS,
'wafer_lot': MAX_LOT_IDS,
'gd_lot_id': MAX_LOT_IDS,
'serial_number': MAX_SERIAL_NUMBERS,
'work_order': MAX_WORK_ORDERS,
'gd_work_order': MAX_GD_WORK_ORDERS,
}
limit = limits.get(input_type, MAX_LOT_IDS)
if len(values) > limit:
return f'輸入數量超過上限 ({limit} 筆)'
return None
def validate_equipment_input(equipment_ids: List[str]) -> Optional[str]: def validate_equipment_input(equipment_ids: List[str]) -> Optional[str]:
@@ -344,27 +334,50 @@ def resolve_lots(input_type: str, values: List[str]) -> Dict[str, Any]:
return {'error': validation_error} return {'error': validation_error}
# Clean values # Clean values
cleaned = [v.strip() for v in values if v.strip()] cleaned = normalize_input_values(values)
if not cleaned: if not cleaned:
return {'error': '請輸入有效的查詢條件'} return {'error': '請輸入有效的查詢條件'}
try: try:
if input_type == 'lot_id': if input_type == 'lot_id':
return _resolve_by_lot_id(cleaned) result = _resolve_by_lot_id(cleaned)
elif input_type == 'wafer_lot': elif input_type == 'wafer_lot':
return _resolve_by_wafer_lot(cleaned) result = _resolve_by_wafer_lot(cleaned)
elif input_type == 'gd_lot_id': elif input_type == 'gd_lot_id':
return _resolve_by_gd_lot_id(cleaned) result = _resolve_by_gd_lot_id(cleaned)
elif input_type == 'serial_number': elif input_type == 'serial_number':
return _resolve_by_serial_number(cleaned) result = _resolve_by_serial_number(cleaned)
elif input_type == 'work_order': elif input_type == 'work_order':
return _resolve_by_work_order(cleaned) result = _resolve_by_work_order(cleaned)
elif input_type == 'gd_work_order': elif input_type == 'gd_work_order':
return _resolve_by_gd_work_order(cleaned) result = _resolve_by_gd_work_order(cleaned)
else: else:
return {'error': f'不支援的輸入類型: {input_type}'} return {'error': f'不支援的輸入類型: {input_type}'}
except Exception as exc: guard_assessment = assess_resolution_result(result)
overflow_tokens = guard_assessment.get("expansion_offenders") or []
overflow_total = bool(guard_assessment.get("over_container_limit"))
if overflow_tokens or overflow_total:
logger.warning(
"Resolution guardrail overflow (input_type=%s, offenders=%s, resolved=%s, max=%s); continuing with decompose path",
input_type,
len(overflow_tokens),
guard_assessment.get("resolved_container_ids"),
guard_assessment.get("max_container_ids"),
)
result["guardrail"] = {
"overflow": True,
"expansion_offenders": overflow_tokens,
"resolved_container_ids": guard_assessment.get("resolved_container_ids"),
"max_container_ids": guard_assessment.get("max_container_ids"),
}
# Keep compatibility: validation API remains available for strict call sites.
guard_error = validate_resolution_result(result, strict=False)
if guard_error:
return {'error': guard_error}
return result
except Exception as exc:
logger.error(f"LOT resolution failed: {exc}") logger.error(f"LOT resolution failed: {exc}")
return {'error': f'解析失敗: {str(exc)}'} return {'error': f'解析失敗: {str(exc)}'}

File diff suppressed because it is too large Load Diff

View File

@@ -30,6 +30,31 @@ WITH spec_map AS (
WHERE SPEC IS NOT NULL WHERE SPEC IS NOT NULL
GROUP BY SPEC GROUP BY SPEC
), ),
reject_scope AS (
SELECT DISTINCT
r.WIPTRACKINGGROUPKEYID
FROM DWH.DW_MES_LOTREJECTHISTORY r
WHERE {{ BASE_WHERE }}
AND r.WIPTRACKINGGROUPKEYID IS NOT NULL
),
wip_workflow_map AS (
SELECT
WIPTRACKINGGROUPKEYID,
WORKFLOWNAME
FROM (
SELECT
lwh.WIPTRACKINGGROUPKEYID,
lwh.WORKFLOWNAME,
ROW_NUMBER() OVER (
PARTITION BY lwh.WIPTRACKINGGROUPKEYID
ORDER BY lwh.MOVEOUTTIMESTAMP DESC NULLS LAST
) AS rn
FROM DWH.DW_MES_LOTWIPHISTORY lwh
INNER JOIN reject_scope rs
ON rs.WIPTRACKINGGROUPKEYID = lwh.WIPTRACKINGGROUPKEYID
)
WHERE rn = 1
),
reject_raw AS ( reject_raw AS (
SELECT SELECT
TRUNC(r.TXNDATE) AS TXN_DAY, TRUNC(r.TXNDATE) AS TXN_DAY,
@@ -105,7 +130,7 @@ reject_raw AS (
FROM DWH.DW_MES_LOTREJECTHISTORY r FROM DWH.DW_MES_LOTREJECTHISTORY r
LEFT JOIN DWH.DW_MES_CONTAINER c LEFT JOIN DWH.DW_MES_CONTAINER c
ON c.CONTAINERID = r.CONTAINERID ON c.CONTAINERID = r.CONTAINERID
LEFT JOIN DWH.DW_MES_LOTWIPHISTORY lwh LEFT JOIN wip_workflow_map lwh
ON lwh.WIPTRACKINGGROUPKEYID = r.WIPTRACKINGGROUPKEYID ON lwh.WIPTRACKINGGROUPKEYID = r.WIPTRACKINGGROUPKEYID
LEFT JOIN spec_map sm LEFT JOIN spec_map sm
ON sm.SPEC = TRIM(r.SPECNAME) ON sm.SPEC = TRIM(r.SPECNAME)

View File

@@ -6,8 +6,8 @@
-- :end_date - End date (YYYY-MM-DD) -- :end_date - End date (YYYY-MM-DD)
WITH spec_map AS ( WITH spec_map AS (
SELECT SELECT
SPEC, SPEC,
MIN(WORK_CENTER) KEEP ( MIN(WORK_CENTER) KEEP (
DENSE_RANK FIRST ORDER BY WORKCENTERSEQUENCE_GROUP DENSE_RANK FIRST ORDER BY WORKCENTERSEQUENCE_GROUP
) AS WORK_CENTER, ) AS WORK_CENTER,
@@ -15,9 +15,34 @@ WITH spec_map AS (
DENSE_RANK FIRST ORDER BY WORKCENTERSEQUENCE_GROUP DENSE_RANK FIRST ORDER BY WORKCENTERSEQUENCE_GROUP
) AS WORKCENTER_GROUP, ) AS WORKCENTER_GROUP,
MIN(WORKCENTERSEQUENCE_GROUP) AS WORKCENTERSEQUENCE_GROUP MIN(WORKCENTERSEQUENCE_GROUP) AS WORKCENTERSEQUENCE_GROUP
FROM DWH.DW_MES_SPEC_WORKCENTER_V FROM DWH.DW_MES_SPEC_WORKCENTER_V
WHERE SPEC IS NOT NULL WHERE SPEC IS NOT NULL
GROUP BY SPEC GROUP BY SPEC
),
reject_scope AS (
SELECT DISTINCT
r.WIPTRACKINGGROUPKEYID
FROM DWH.DW_MES_LOTREJECTHISTORY r
WHERE {{ BASE_WHERE }}
AND r.WIPTRACKINGGROUPKEYID IS NOT NULL
),
wip_workflow_map AS (
SELECT
WIPTRACKINGGROUPKEYID,
WORKFLOWNAME
FROM (
SELECT
lwh.WIPTRACKINGGROUPKEYID,
lwh.WORKFLOWNAME,
ROW_NUMBER() OVER (
PARTITION BY lwh.WIPTRACKINGGROUPKEYID
ORDER BY lwh.MOVEOUTTIMESTAMP DESC NULLS LAST
) AS rn
FROM DWH.DW_MES_LOTWIPHISTORY lwh
INNER JOIN reject_scope rs
ON rs.WIPTRACKINGGROUPKEYID = lwh.WIPTRACKINGGROUPKEYID
)
WHERE rn = 1
), ),
reject_raw AS ( reject_raw AS (
SELECT SELECT
@@ -99,7 +124,7 @@ reject_raw AS (
FROM DWH.DW_MES_LOTREJECTHISTORY r FROM DWH.DW_MES_LOTREJECTHISTORY r
LEFT JOIN DWH.DW_MES_CONTAINER c LEFT JOIN DWH.DW_MES_CONTAINER c
ON c.CONTAINERID = r.CONTAINERID ON c.CONTAINERID = r.CONTAINERID
LEFT JOIN DWH.DW_MES_LOTWIPHISTORY lwh LEFT JOIN wip_workflow_map lwh
ON lwh.WIPTRACKINGGROUPKEYID = r.WIPTRACKINGGROUPKEYID ON lwh.WIPTRACKINGGROUPKEYID = r.WIPTRACKINGGROUPKEYID
LEFT JOIN spec_map sm LEFT JOIN spec_map sm
ON sm.SPEC = TRIM(r.SPECNAME) ON sm.SPEC = TRIM(r.SPECNAME)

View File

@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Unit tests for BatchQueryEngine module.""" """Unit tests for BatchQueryEngine module."""
import json
import pytest import pytest
from unittest.mock import patch, MagicMock, call from unittest.mock import patch, MagicMock, call
@@ -482,8 +483,8 @@ class TestChunkFailureResilience:
skip_cached=False, skip_cached=False,
) )
# All 3 chunks attempted # One chunk retried once due retryable timeout pattern.
assert call_count["n"] == 3 assert call_count["n"] == 4
# Final metadata should reflect partial failure # Final metadata should reflect partial failure
last = hset_calls[-1] last = hset_calls[-1]
assert last["status"] == "partial" assert last["status"] == "partial"
@@ -567,10 +568,147 @@ class TestShouldDecompose:
assert should_decompose_by_time("2025-01-01", "2025-12-31") assert should_decompose_by_time("2025-01-01", "2025-12-31")
def test_short_range_false(self): def test_short_range_false(self):
assert not should_decompose_by_time("2025-01-01", "2025-02-01") assert not should_decompose_by_time("2025-01-01", "2025-01-11")
def test_large_ids_true(self): def test_large_ids_true(self):
assert should_decompose_by_ids(list(range(2000))) assert should_decompose_by_ids(list(range(2000)))
def test_small_ids_false(self): def test_small_ids_false(self):
assert not should_decompose_by_ids(list(range(500))) assert not should_decompose_by_ids(list(range(500)))
class TestRetryAndFailedRanges:
def _mock_redis(self):
mock_client = MagicMock()
stored = {}
hashes = {}
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
mock_client.get.side_effect = lambda k: stored.get(k)
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
mock_client.hset.side_effect = lambda k, mapping=None: hashes.setdefault(k, {}).update(mapping or {})
mock_client.hgetall.side_effect = lambda k: hashes.get(k, {})
mock_client.expire.return_value = None
return mock_client
def test_transient_failure_retried_once(self):
import mes_dashboard.core.redis_df_store as rds
import mes_dashboard.services.batch_query_engine as bqe
mock_client = self._mock_redis()
call_count = {"n": 0}
def flaky_query_fn(chunk, max_rows_per_chunk=None):
call_count["n"] += 1
if call_count["n"] == 1:
raise TimeoutError("connection timed out")
return pd.DataFrame({"V": [1]})
with patch.object(rds, "REDIS_ENABLED", True), \
patch.object(rds, "get_redis_client", return_value=mock_client), \
patch.object(bqe, "get_redis_client", return_value=mock_client):
execute_plan(
[{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
flaky_query_fn,
query_hash="retryonce",
cache_prefix="retry",
skip_cached=False,
)
progress = bqe.get_batch_progress("retry", "retryonce")
assert call_count["n"] == 2
assert progress is not None
assert progress.get("status") == "completed"
assert progress.get("failed") == "0"
def test_memory_guard_not_retried(self):
import mes_dashboard.core.redis_df_store as rds
import mes_dashboard.services.batch_query_engine as bqe
mock_client = self._mock_redis()
call_count = {"n": 0}
def large_df_query_fn(chunk, max_rows_per_chunk=None):
call_count["n"] += 1
return pd.DataFrame({"V": [1]})
with patch.object(rds, "REDIS_ENABLED", True), \
patch.object(rds, "get_redis_client", return_value=mock_client), \
patch.object(bqe, "get_redis_client", return_value=mock_client), \
patch.object(bqe, "BATCH_CHUNK_MAX_MEMORY_MB", 0):
execute_plan(
[{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
large_df_query_fn,
query_hash="memnoretry",
cache_prefix="retry",
skip_cached=False,
)
assert call_count["n"] == 1
def test_failed_ranges_tracked(self):
import mes_dashboard.core.redis_df_store as rds
import mes_dashboard.services.batch_query_engine as bqe
mock_client = self._mock_redis()
def query_fn(chunk, max_rows_per_chunk=None):
if chunk["chunk_start"] == "2025-01-11":
raise RuntimeError("chunk failure")
return pd.DataFrame({"V": [1]})
chunks = [
{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"},
{"chunk_start": "2025-01-11", "chunk_end": "2025-01-20"},
{"chunk_start": "2025-01-21", "chunk_end": "2025-01-30"},
]
with patch.object(rds, "REDIS_ENABLED", True), \
patch.object(rds, "get_redis_client", return_value=mock_client), \
patch.object(bqe, "get_redis_client", return_value=mock_client):
execute_plan(
chunks,
query_fn,
query_hash="franges",
cache_prefix="retry",
skip_cached=False,
)
progress = bqe.get_batch_progress("retry", "franges")
assert progress is not None
assert progress.get("has_partial_failure") == "True"
assert progress.get("failed") == "1"
failed_ranges = json.loads(progress.get("failed_ranges", "[]"))
assert failed_ranges == [{"start": "2025-01-11", "end": "2025-01-20"}]
def test_id_batch_chunk_no_failed_ranges(self):
import mes_dashboard.core.redis_df_store as rds
import mes_dashboard.services.batch_query_engine as bqe
mock_client = self._mock_redis()
def query_fn(chunk, max_rows_per_chunk=None):
if chunk.get("ids") == ["B"]:
raise RuntimeError("id chunk failed")
return pd.DataFrame({"V": [1]})
chunks = [
{"ids": ["A"]},
{"ids": ["B"]},
]
with patch.object(rds, "REDIS_ENABLED", True), \
patch.object(rds, "get_redis_client", return_value=mock_client), \
patch.object(bqe, "get_redis_client", return_value=mock_client):
execute_plan(
chunks,
query_fn,
query_hash="idfail",
cache_prefix="retry",
skip_cached=False,
)
progress = bqe.get_batch_progress("retry", "idfail")
assert progress is not None
assert progress.get("has_partial_failure") == "True"
assert progress.get("failed") == "1"
failed_ranges = json.loads(progress.get("failed_ranges", "[]"))
assert failed_ranges == []

View File

@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
"""Unit tests for shared container resolution policy helpers."""
from __future__ import annotations
from mes_dashboard.services import container_resolution_policy as policy
def test_validate_resolution_request_rejects_empty_values():
assert policy.validate_resolution_request("lot_id", []) is not None
def test_validate_resolution_request_rejects_broad_pattern(monkeypatch):
monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
error = policy.validate_resolution_request("lot_id", ["%"])
assert error is not None
assert "萬用字元條件過於寬鬆" in error
def test_validate_resolution_request_allows_pattern_with_prefix(monkeypatch):
monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
error = policy.validate_resolution_request("lot_id", ["GA26%"])
assert error is None
def test_validate_resolution_result_rejects_excessive_expansion(monkeypatch):
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", "3")
result = {
"data": [{"container_id": "C1"}],
"expansion_info": {"GA%": 10},
}
error = policy.validate_resolution_result(result)
assert error is not None
assert "單一條件展開過大" in error
def test_validate_resolution_result_rejects_excessive_container_count(monkeypatch):
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", "2")
result = {
"data": [
{"container_id": "C1"},
{"container_id": "C2"},
{"container_id": "C3"},
],
"expansion_info": {},
}
error = policy.validate_resolution_result(result)
assert error is not None
assert "解析結果過大" in error
def test_validate_resolution_result_non_strict_allows_overflow(monkeypatch):
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", "2")
result = {
"data": [
{"container_id": "C1"},
{"container_id": "C2"},
{"container_id": "C3"},
],
"expansion_info": {"GA%": 999},
}
error = policy.validate_resolution_result(result, strict=False)
assert error is None
def test_extract_container_ids_deduplicates_and_preserves_order():
rows = [
{"container_id": "C1"},
{"container_id": "C1"},
{"CONTAINERID": "C2"},
{"container_id": "C3"},
]
assert policy.extract_container_ids(rows) == ["C1", "C2", "C3"]

View File

@@ -198,3 +198,60 @@ def test_fetch_events_sanitizes_nan_values(
result = EventFetcher.fetch_events(["CID-1"], "upstream_history") result = EventFetcher.fetch_events(["CID-1"], "upstream_history")
assert result["CID-1"][0]["VALUE"] is None assert result["CID-1"][0]["VALUE"] is None
@patch("mes_dashboard.services.event_fetcher.cache_set")
@patch("mes_dashboard.services.event_fetcher.cache_get", return_value=None)
@patch("mes_dashboard.services.event_fetcher.read_sql_df_slow_iter")
@patch("mes_dashboard.services.event_fetcher.SQLLoader.load")
def test_fetch_events_raises_when_parallel_batch_fails_and_partial_disabled(
mock_sql_load,
mock_iter,
_mock_cache_get,
_mock_cache_set,
monkeypatch,
):
mock_sql_load.return_value = "SELECT * FROM t WHERE h.CONTAINERID = :container_id {{ WORKCENTER_FILTER }}"
monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_ALLOW_PARTIAL_RESULTS", False)
monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_MAX_WORKERS", 2)
def _side_effect(sql, params, timeout_seconds=60):
if "CID-1000" in params.values():
raise RuntimeError("chunk fail")
return iter([])
mock_iter.side_effect = _side_effect
cids = [f"CID-{i}" for i in range(1001)] # force >1 batch
try:
EventFetcher.fetch_events(cids, "history")
assert False, "expected RuntimeError"
except RuntimeError as exc:
assert "chunk failed" in str(exc)
@patch("mes_dashboard.services.event_fetcher.cache_set")
@patch("mes_dashboard.services.event_fetcher.cache_get", return_value=None)
@patch("mes_dashboard.services.event_fetcher.read_sql_df_slow_iter")
@patch("mes_dashboard.services.event_fetcher.SQLLoader.load")
def test_fetch_events_allows_partial_when_enabled(
mock_sql_load,
mock_iter,
_mock_cache_get,
_mock_cache_set,
monkeypatch,
):
mock_sql_load.return_value = "SELECT * FROM t WHERE h.CONTAINERID = :container_id {{ WORKCENTER_FILTER }}"
monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_ALLOW_PARTIAL_RESULTS", True)
monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_MAX_WORKERS", 2)
def _side_effect(sql, params, timeout_seconds=60):
if "CID-1000" in params.values():
raise RuntimeError("chunk fail")
return iter([])
mock_iter.side_effect = _side_effect
cids = [f"CID-{i}" for i in range(1001)]
result = EventFetcher.fetch_events(cids, "history")
assert result == {}

View File

@@ -77,7 +77,7 @@ class TestJobQueryEngineDecomposition:
result = job_svc.get_jobs_by_resources( result = job_svc.get_jobs_by_resources(
resource_ids=["R1"], resource_ids=["R1"],
start_date="2025-06-01", start_date="2025-06-01",
end_date="2025-06-30", end_date="2025-06-05",
) )
assert engine_calls["execute"] == 0 # Engine NOT used assert engine_calls["execute"] == 0 # Engine NOT used

View File

@@ -191,7 +191,7 @@ class TestErrorLeakageProtection:
def test_query_error_masks_internal_details(self, mock_read): def test_query_error_masks_internal_details(self, mock_read):
mock_read.side_effect = RuntimeError("ORA-00942: table or view does not exist") mock_read.side_effect = RuntimeError("ORA-00942: table or view does not exist")
result = get_jobs_by_resources(["RES001"], "2024-01-01", "2024-01-31") result = get_jobs_by_resources(["RES001"], "2024-01-01", "2024-01-05")
assert result["error"] == QUERY_ERROR_MESSAGE assert result["error"] == QUERY_ERROR_MESSAGE
assert "ORA-00942" not in result["error"] assert "ORA-00942" not in result["error"]

View File

@@ -85,7 +85,7 @@ class TestDetectionEngineDecomposition:
df = msd_svc._fetch_station_detection_data( df = msd_svc._fetch_station_detection_data(
start_date="2025-06-01", start_date="2025-06-01",
end_date="2025-06-30", end_date="2025-06-05",
station="測試", station="測試",
) )

View File

@@ -14,7 +14,7 @@ from unittest.mock import patch, MagicMock
from mes_dashboard import create_app from mes_dashboard import create_app
from mes_dashboard.core.cache import NoOpCache from mes_dashboard.core.cache import NoOpCache
from mes_dashboard.core.rate_limit import reset_rate_limits_for_tests from mes_dashboard.core.rate_limit import reset_rate_limits_for_tests
from mes_dashboard.services.query_tool_service import MAX_DATE_RANGE_DAYS, MAX_LOT_IDS from mes_dashboard.services.query_tool_service import MAX_DATE_RANGE_DAYS
@pytest.fixture @pytest.fixture
@@ -118,20 +118,19 @@ class TestResolveEndpoint:
data = json.loads(response.data) data = json.loads(response.data)
assert 'error' in data assert 'error' in data
def test_values_over_limit(self, client): def test_rejects_too_broad_wildcard(self, client):
"""Should reject values exceeding limit.""" """Should reject wildcard patterns that are too broad."""
values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS + 1)]
response = client.post( response = client.post(
'/api/query-tool/resolve', '/api/query-tool/resolve',
json={ json={
'input_type': 'lot_id', 'input_type': 'lot_id',
'values': values 'values': ['%']
} }
) )
assert response.status_code == 400 assert response.status_code == 400
data = json.loads(response.data) data = json.loads(response.data)
assert 'error' in data assert 'error' in data
assert '超過上限' in data['error'] or str(MAX_LOT_IDS) in data['error'] assert '萬用字元條件過於寬鬆' in data['error']
@patch('mes_dashboard.routes.query_tool_routes.resolve_lots') @patch('mes_dashboard.routes.query_tool_routes.resolve_lots')
def test_resolve_success(self, mock_resolve, client): def test_resolve_success(self, mock_resolve, client):

View File

@@ -90,7 +90,7 @@ class TestValidateDateRange:
assert '格式' in result or 'format' in result.lower() assert '格式' in result or 'format' in result.lower()
class TestValidateLotInput: class TestValidateLotInput:
"""Tests for validate_lot_input function.""" """Tests for validate_lot_input function."""
def test_valid_lot_ids(self): def test_valid_lot_ids(self):
@@ -117,53 +117,24 @@ class TestValidateLotInput:
assert result is not None assert result is not None
assert '至少一個' in result assert '至少一個' in result
def test_exceeds_lot_id_limit(self): def test_large_input_list_allowed_when_no_count_cap(self, monkeypatch):
"""Should reject LOT IDs exceeding limit.""" """Should allow large lists when count cap is disabled."""
values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS + 1)] monkeypatch.setenv("CONTAINER_RESOLVE_INPUT_MAX_VALUES", "0")
result = validate_lot_input('lot_id', values) values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS + 50)]
assert result is not None result = validate_lot_input('lot_id', values)
assert '超過上限' in result assert result is None
assert str(MAX_LOT_IDS) in result
def test_exceeds_serial_number_limit(self):
"""Should reject serial numbers exceeding limit."""
values = [f'SN{i:06d}' for i in range(MAX_SERIAL_NUMBERS + 1)]
result = validate_lot_input('serial_number', values)
assert result is not None
assert '超過上限' in result
assert str(MAX_SERIAL_NUMBERS) in result
def test_exceeds_work_order_limit(self):
"""Should reject work orders exceeding limit."""
values = [f'WO{i:06d}' for i in range(MAX_WORK_ORDERS + 1)]
result = validate_lot_input('work_order', values)
assert result is not None
assert '超過上限' in result
assert str(MAX_WORK_ORDERS) in result
def test_exceeds_gd_work_order_limit(self): def test_rejects_too_broad_wildcard_pattern(self, monkeypatch):
"""Should reject GD work orders exceeding limit.""" """Should reject broad wildcard like '%' to prevent full scan."""
values = [f'GD{i:06d}' for i in range(MAX_GD_WORK_ORDERS + 1)] monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
result = validate_lot_input('gd_work_order', values) result = validate_lot_input('lot_id', ['%'])
assert result is not None assert result is not None
assert '超過上限' in result assert '萬用字元條件過於寬鬆' in result
assert str(MAX_GD_WORK_ORDERS) in result
def test_accepts_wildcard_with_prefix(self, monkeypatch):
def test_exactly_at_limit(self): monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
"""Should accept values exactly at limit.""" result = validate_lot_input('lot_id', ['GA25%'])
values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS)] assert result is None
result = validate_lot_input('lot_id', values)
assert result is None
def test_unknown_input_type_uses_default_limit(self):
"""Should use default limit for unknown input types."""
values = [f'X{i}' for i in range(MAX_LOT_IDS)]
result = validate_lot_input('unknown_type', values)
assert result is None
values_over = [f'X{i}' for i in range(MAX_LOT_IDS + 1)]
result = validate_lot_input('unknown_type', values_over)
assert result is not None
class TestValidateEquipmentInput: class TestValidateEquipmentInput:

View File

@@ -3,6 +3,7 @@
from __future__ import annotations from __future__ import annotations
import json
from decimal import Decimal from decimal import Decimal
from unittest.mock import MagicMock from unittest.mock import MagicMock
@@ -400,6 +401,72 @@ class TestEngineDecompositionDateRange:
assert engine_calls["parallel"] == cache_svc._REJECT_ENGINE_PARALLEL assert engine_calls["parallel"] == cache_svc._REJECT_ENGINE_PARALLEL
assert engine_calls["max_rows_per_chunk"] == cache_svc._REJECT_ENGINE_MAX_ROWS_PER_CHUNK assert engine_calls["max_rows_per_chunk"] == cache_svc._REJECT_ENGINE_MAX_ROWS_PER_CHUNK
def test_engine_chunk_uses_paged_fetch_without_truncation(self, monkeypatch):
"""Engine chunk should fetch all pages (offset paging), not truncate at page size."""
import mes_dashboard.services.batch_query_engine as engine_mod
offsets = []
captured = {"df": pd.DataFrame(), "merge_kwargs": None}
def fake_read_sql(sql, params):
offset = int(params.get("offset", 0))
limit = int(params.get("limit", 0))
offsets.append(offset)
total_rows = 5
remaining = max(total_rows - offset, 0)
take = min(limit, remaining)
if take <= 0:
return pd.DataFrame()
return pd.DataFrame(
{
"CONTAINERID": [f"C{offset + i}" for i in range(take)],
"LOSSREASONNAME": ["R1"] * take,
"REJECT_TOTAL_QTY": [1] * take,
}
)
def fake_execute_plan(chunks, query_fn, **kwargs):
page_size = kwargs.get("max_rows_per_chunk")
captured["df"] = query_fn(chunks[0], max_rows_per_chunk=page_size)
return kwargs.get("query_hash", "qh")
def fake_merge_chunks(prefix, qhash, **kwargs):
captured["merge_kwargs"] = kwargs
return captured["df"]
monkeypatch.setattr(cache_svc, "_REJECT_ENGINE_MAX_ROWS_PER_CHUNK", 2)
monkeypatch.setattr(engine_mod, "should_decompose_by_time", lambda *_a, **_kw: True)
monkeypatch.setattr(
engine_mod,
"decompose_by_time_range",
lambda *_a, **_kw: [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-31"}],
)
monkeypatch.setattr(engine_mod, "execute_plan", fake_execute_plan)
monkeypatch.setattr(engine_mod, "merge_chunks", fake_merge_chunks)
monkeypatch.setattr(cache_svc, "read_sql_df", fake_read_sql)
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: None)
monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda df, **kw: df)
monkeypatch.setattr(cache_svc, "_store_query_result", lambda *_a, **_kw: None)
monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *_a, **_kw: 0)
monkeypatch.setattr(
cache_svc,
"_build_primary_response",
lambda qid, df, meta, ri: {"query_id": qid, "rows": len(df)},
)
result = cache_svc.execute_primary_query(
mode="date_range",
start_date="2025-01-01",
end_date="2025-03-01",
)
assert result["rows"] == 5
assert offsets == [0, 2, 4]
assert captured["merge_kwargs"] == {}
def test_short_range_skips_engine(self, monkeypatch): def test_short_range_skips_engine(self, monkeypatch):
"""Short date range (<= threshold) uses direct path, no engine.""" """Short date range (<= threshold) uses direct path, no engine."""
import mes_dashboard.services.batch_query_engine as engine_mod import mes_dashboard.services.batch_query_engine as engine_mod
@@ -453,7 +520,7 @@ class TestEngineDecompositionDateRange:
result = cache_svc.execute_primary_query( result = cache_svc.execute_primary_query(
mode="date_range", mode="date_range",
start_date="2025-06-01", start_date="2025-06-01",
end_date="2025-06-30", end_date="2025-06-10",
) )
assert engine_calls["decompose"] == 0 # Engine NOT used assert engine_calls["decompose"] == 0 # Engine NOT used
@@ -629,7 +696,7 @@ def test_large_result_spills_to_parquet_and_view_export_use_spool_fallback(monke
result = cache_svc.execute_primary_query( result = cache_svc.execute_primary_query(
mode="date_range", mode="date_range",
start_date="2025-01-01", start_date="2025-01-01",
end_date="2025-01-31", end_date="2025-01-05",
) )
query_id = result["query_id"] query_id = result["query_id"]
@@ -651,3 +718,185 @@ def test_large_result_spills_to_parquet_and_view_export_use_spool_fallback(monke
export_rows = cache_svc.export_csv_from_cache(query_id=query_id) export_rows = cache_svc.export_csv_from_cache(query_id=query_id)
assert export_rows is not None assert export_rows is not None
assert len(export_rows) == len(df) assert len(export_rows) == len(df)
def test_resolve_containers_deduplicates_container_ids(monkeypatch):
monkeypatch.setattr(
cache_svc,
"_RESOLVERS",
{
"lot": lambda values: {
"data": [
{"container_id": "CID-1"},
{"container_id": "CID-1"},
{"container_id": "CID-2"},
],
"input_count": len(values),
"not_found": [],
"expansion_info": {"LOT%": 2},
}
},
)
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", "10")
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", "10")
resolved = cache_svc.resolve_containers("lot", ["LOT%"])
assert resolved["container_ids"] == ["CID-1", "CID-2"]
assert resolved["resolution_info"]["resolved_count"] == 2
def test_resolve_containers_allows_oversized_expansion_and_sets_guardrail(monkeypatch):
monkeypatch.setattr(
cache_svc,
"_RESOLVERS",
{
"lot": lambda values: {
"data": [{"container_id": "CID-1"}],
"input_count": len(values),
"not_found": [],
"expansion_info": {"GA%": 999},
}
},
)
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", "50")
monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
resolved = cache_svc.resolve_containers("lot", ["GA%"])
guardrail = resolved["resolution_info"].get("guardrail") or {}
assert guardrail.get("overflow") is True
assert len(guardrail.get("expansion_offenders") or []) == 1
def test_partial_failure_in_response_meta(monkeypatch):
import mes_dashboard.services.batch_query_engine as engine_mod
df = pd.DataFrame({"CONTAINERID": ["C1"], "LOSSREASONNAME": ["R1"], "REJECT_TOTAL_QTY": [1]})
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: None)
monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
monkeypatch.setattr(cache_svc, "_store_query_result", lambda *_a, **_kw: False)
monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *_a, **_kw: None)
monkeypatch.setattr(
cache_svc,
"_build_primary_response",
lambda qid, result_df, meta, resolution_info: {"query_id": qid, "meta": meta},
)
monkeypatch.setattr(cache_svc, "_store_partial_failure_flag", lambda *_a, **_kw: None)
monkeypatch.setattr(engine_mod, "should_decompose_by_time", lambda *_a, **_kw: True)
monkeypatch.setattr(
engine_mod,
"decompose_by_time_range",
lambda *_a, **_kw: [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
)
monkeypatch.setattr(engine_mod, "execute_plan", lambda *a, **kw: kw.get("query_hash"))
monkeypatch.setattr(engine_mod, "merge_chunks", lambda *a, **kw: df.copy())
monkeypatch.setattr(
engine_mod,
"get_batch_progress",
lambda *_a, **_kw: {
"has_partial_failure": "True",
"failed": "2",
"failed_ranges": json.dumps([{"start": "2025-01-01", "end": "2025-01-10"}]),
},
)
result = cache_svc.execute_primary_query(
mode="date_range",
start_date="2025-01-01",
end_date="2025-03-01",
)
meta = result.get("meta") or {}
assert meta.get("has_partial_failure") is True
assert meta.get("failed_chunk_count") == 2
assert meta.get("failed_ranges") == [{"start": "2025-01-01", "end": "2025-01-10"}]
def test_cache_hit_restores_partial_failure(monkeypatch):
cached_df = pd.DataFrame({"CONTAINERID": ["C1"], "LOSSREASONNAME": ["R1"], "REJECT_TOTAL_QTY": [1]})
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: cached_df)
monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
monkeypatch.setattr(
cache_svc,
"_load_partial_failure_flag",
lambda _qid: {
"has_partial_failure": True,
"failed_chunk_count": 3,
"failed_ranges": [],
},
)
monkeypatch.setattr(
cache_svc,
"_build_primary_response",
lambda qid, result_df, meta, resolution_info: {"query_id": qid, "meta": meta},
)
result = cache_svc.execute_primary_query(
mode="date_range",
start_date="2025-01-01",
end_date="2025-01-31",
)
meta = result.get("meta") or {}
assert meta.get("has_partial_failure") is True
assert meta.get("failed_chunk_count") == 3
assert meta.get("failed_ranges") == []
@pytest.mark.parametrize(
"store_result,expected_ttl",
[
(True, cache_svc._REJECT_ENGINE_SPOOL_TTL_SECONDS),
(False, cache_svc._CACHE_TTL),
],
)
def test_partial_failure_ttl_matches_spool(monkeypatch, store_result, expected_ttl):
import mes_dashboard.services.batch_query_engine as engine_mod
df = pd.DataFrame({"CONTAINERID": ["C1"], "LOSSREASONNAME": ["R1"], "REJECT_TOTAL_QTY": [1]})
captured = {"ttls": []}
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: None)
monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
monkeypatch.setattr(cache_svc, "_store_query_result", lambda *_a, **_kw: store_result)
monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *_a, **_kw: None)
monkeypatch.setattr(
cache_svc,
"_build_primary_response",
lambda qid, result_df, meta, resolution_info: {"query_id": qid, "meta": meta},
)
monkeypatch.setattr(
cache_svc,
"_store_partial_failure_flag",
lambda _qid, _failed, _ranges, ttl: captured["ttls"].append(ttl),
)
monkeypatch.setattr(engine_mod, "should_decompose_by_time", lambda *_a, **_kw: True)
monkeypatch.setattr(
engine_mod,
"decompose_by_time_range",
lambda *_a, **_kw: [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
)
monkeypatch.setattr(engine_mod, "execute_plan", lambda *a, **kw: kw.get("query_hash"))
monkeypatch.setattr(engine_mod, "merge_chunks", lambda *a, **kw: df.copy())
monkeypatch.setattr(
engine_mod,
"get_batch_progress",
lambda *_a, **_kw: {"has_partial_failure": "True", "failed": "1", "failed_ranges": "[]"},
)
cache_svc.execute_primary_query(
mode="date_range",
start_date="2025-01-01",
end_date="2025-03-01",
)
assert captured["ttls"] == [expected_ttl]