feat(reject-history): fix silent data loss by propagating partial failure metadata to frontend

Chunk failures in BatchQueryEngine were silently discarded — `has_partial_failure` was tracked in Redis but never surfaced to the API response or frontend. Users could see incomplete data without any warning. This commit closes the gap end-to-end: Backend: - Track failed chunk time ranges (`failed_ranges`) in batch engine progress metadata - Add single retry for transient Oracle errors (timeout, connection) in `_execute_single_chunk` - Read `get_batch_progress()` after merge but before `redis_clear_batch()` cleanup - Inject `has_partial_failure`, `failed_chunk_count`, `failed_ranges` into API response meta - Persist partial failure flag to independent Redis key with TTL aligned to data storage layer - Add shared container-resolution policy module with wildcard/expansion guardrails - Refactor reason filter from single-value to multi-select (`reason` → `reasons`) Frontend: - Add client-side date range validation (730-day limit) before API submission - Display amber warning banner on partial failure with specific failed date ranges - Support generic fallback message for container-mode queries without date ranges - Update FilterPanel to support multi-select reason chips Specs & tests: - Create batch-query-resilience spec; update reject-history-api and reject-history-page specs - Add 7 new tests for retry, memory guard, failed ranges, partial failure propagation, TTL - Cross-service regression verified (hold, resource, job, msd — 411 tests pass) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 14:00:07 +08:00
parent f1506787fb
commit a275c30c0e
35 changed files with 3028 additions and 1460 deletions
--- a/.env.example
+++ b/.env.example
@@ -59,6 +59,16 @@ QUERY_TOOL_MAX_CONTAINER_IDS=200
 RESOURCE_DETAIL_DEFAULT_LIMIT=500
 RESOURCE_DETAIL_MAX_LIMIT=500
 # Shared container-resolution guardrails
 # 0 = disable raw input count cap (recommended: rely on expansion limits instead)
 CONTAINER_RESOLVE_INPUT_MAX_VALUES=0
 # Wildcard pattern must include this many literal-prefix chars before %/_ (e.g., GA%)
 CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN=4
 # Per-token expansion guard (avoid one wildcard exploding into too many container IDs)
 CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN=2000
 # Total resolved container-ID guard for a single resolve request
 CONTAINER_RESOLVE_MAX_CONTAINER_IDS=30000
 # Trust boundary for forwarded headers (safe default: false)
 # Direct-exposure deployment (no reverse proxy): keep this false
 TRUST_PROXY_HEADERS=false
@@ -101,14 +111,14 @@ GUNICORN_WORKERS=2
 GUNICORN_THREADS=4
 # Worker timeout (seconds): should stay above DB/query-tool slow paths
-GUNICORN_TIMEOUT=130
+GUNICORN_TIMEOUT=360
 # Graceful shutdown timeout for worker reloads (seconds)
-GUNICORN_GRACEFUL_TIMEOUT=60
+GUNICORN_GRACEFUL_TIMEOUT=300
 # Worker recycle policy (set 0 to disable)
-GUNICORN_MAX_REQUESTS=5000
+GUNICORN_MAX_REQUESTS=1200
-GUNICORN_MAX_REQUESTS_JITTER=500
+GUNICORN_MAX_REQUESTS_JITTER=300
 # ============================================================
 # Redis Configuration (for WIP cache)
@@ -201,6 +211,8 @@ TRACE_EVENTS_MAX_WORKERS=2
 # Max parallel workers for EventFetcher batch queries (per domain)
 # Recommend: 2 (peak concurrent slow queries = TRACE_EVENTS_MAX_WORKERS × this)
 EVENT_FETCHER_MAX_WORKERS=2
 # false = any failed batch raises error (avoid silent partial data)
 EVENT_FETCHER_ALLOW_PARTIAL_RESULTS=false
 # Max parallel workers for forward pipeline WIP+rejects fetching
 FORWARD_PIPELINE_MAX_WORKERS=2
@@ -351,7 +363,7 @@ REJECT_ENGINE_SPOOL_CLEANUP_INTERVAL_SECONDS=300
 REJECT_ENGINE_SPOOL_ORPHAN_GRACE_SECONDS=600
 # Batch query engine thresholds
-BATCH_QUERY_TIME_THRESHOLD_DAYS=60
+BATCH_QUERY_TIME_THRESHOLD_DAYS=10
 BATCH_QUERY_ID_THRESHOLD=1000
 BATCH_CHUNK_MAX_MEMORY_MB=256
--- a/README.md
+++ b/README.md
@@ -284,6 +284,15 @@ QUERY_TOOL_MAX_CONTAINER_IDS=200
 RESOURCE_DETAIL_DEFAULT_LIMIT=500
 RESOURCE_DETAIL_MAX_LIMIT=500
 # 共用解析防護（LOT/WAFER/工單）
 CONTAINER_RESOLVE_INPUT_MAX_VALUES=0          # 0=不限制輸入筆數
 CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN=4    # 萬用字元前最少字首長度（例如 GA25%）
 CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN=2000
 CONTAINER_RESOLVE_MAX_CONTAINER_IDS=30000
 # EventFetcher 批次容錯策略
 EVENT_FETCHER_ALLOW_PARTIAL_RESULTS=false     # false=任一批次失敗即整體失敗，避免靜默缺資料
 # 反向代理信任邊界（無反向代理時務必維持 false）
 TRUST_PROXY_HEADERS=false
 TRUSTED_PROXY_IPS=127.0.0.1
--- a/frontend/src/core/reject-history-filters.js
+++ b/frontend/src/core/reject-history-filters.js
@@ -35,7 +35,7 @@ export function toRejectFilterSnapshot(input = {}) {
    endDate: normalizeText(input.endDate),
    workcenterGroups: normalizeArray(input.workcenterGroups),
    packages: normalizeArray(input.packages),
-    reason: normalizeText(input.reason),
+    reasons: normalizeArray(input.reasons),
    includeExcludedScrap: normalizeBoolean(input.includeExcludedScrap, false),
    excludeMaterialScrap: normalizeBoolean(input.excludeMaterialScrap, true),
    excludePbDiode: normalizeBoolean(input.excludePbDiode, true),
@@ -77,7 +77,7 @@ export function pruneRejectFilterSelections(filters = {}, options = {}) {
  const removed = {
    workcenterGroups: [],
    packages: [],
-    reason: '',
+    reasons: [],
  };
  if (hasWorkcenterOptions) {
@@ -100,9 +100,14 @@ export function pruneRejectFilterSelections(filters = {}, options = {}) {
    });
  }
-  if (next.reason && hasReasonOptions && !validReasons.has(next.reason)) {
+  if (hasReasonOptions) {
-    removed.reason = next.reason;
+    next.reasons = next.reasons.filter((value) => {
-    next.reason = '';
+      if (validReasons.has(value)) {
        return true;
      }
      removed.reasons.push(value);
      return false;
    });
  }
  return {
@@ -111,7 +116,7 @@ export function pruneRejectFilterSelections(filters = {}, options = {}) {
    removedCount:
      removed.workcenterGroups.length +
      removed.packages.length +
-      (removed.reason ? 1 : 0),
+      removed.reasons.length,
  };
 }
@@ -126,13 +131,13 @@ export function buildRejectOptionsRequestParams(filters = {}) {
    exclude_material_scrap: next.excludeMaterialScrap,
    exclude_pb_diode: next.excludePbDiode,
  };
-  if (next.reason) {
+  if (next.reasons.length > 0) {
-    params.reason = next.reason;
+    params.reasons = next.reasons;
  }
  return params;
 }
-export function buildRejectCommonQueryParams(filters = {}, { reason = '' } = {}) {
+export function buildRejectCommonQueryParams(filters = {}, { reasons: extraReasons = [] } = {}) {
  const next = toRejectFilterSnapshot(filters);
  const params = {
    start_date: next.startDate,
@@ -143,9 +148,9 @@ export function buildRejectCommonQueryParams(filters = {}, { reason = '' } = {})
    exclude_material_scrap: next.excludeMaterialScrap,
    exclude_pb_diode: next.excludePbDiode,
  };
-  const effectiveReason = normalizeText(reason) || next.reason;
+  const merged = normalizeArray([...next.reasons, ...normalizeArray(extraReasons)]);
-  if (effectiveReason) {
+  if (merged.length > 0) {
-    params.reasons = [effectiveReason];
+    params.reasons = merged;
  }
  return params;
 }
@@ -168,6 +173,30 @@ export function parseMultiLineInput(text) {
  return result;
 }
 export function validateDateRange(startDate, endDate) {
  const MAX_QUERY_DAYS = 730;
  const start = normalizeText(startDate);
  const end = normalizeText(endDate);
  if (!start || !end) {
    return '請先設定開始與結束日期';
  }
  const startDt = new Date(`${start}T00:00:00`);
  const endDt = new Date(`${end}T00:00:00`);
  if (Number.isNaN(startDt.getTime()) || Number.isNaN(endDt.getTime())) {
    return '日期格式不正確';
  }
  if (endDt < startDt) {
    return '結束日期必須大於起始日期';
  }
  const dayMs = 24 * 60 * 60 * 1000;
  const days = Math.floor((endDt - startDt) / dayMs) + 1;
  if (days > MAX_QUERY_DAYS) {
    return '查詢範圍不可超過 730 天（約兩年）';
  }
  return '';
 }
 export function buildViewParams(queryId, {
  supplementaryFilters = {},
  metricFilter = 'all',
@@ -185,8 +214,8 @@ export function buildViewParams(queryId, {
  if (supplementaryFilters.workcenterGroups?.length > 0) {
    params.workcenter_groups = supplementaryFilters.workcenterGroups;
  }
-  if (supplementaryFilters.reason) {
+  if (supplementaryFilters.reasons?.length > 0) {
-    params.reason = supplementaryFilters.reason;
+    params.reasons = supplementaryFilters.reasons;
  }
  if (metricFilter && metricFilter !== 'all') {
    params.metric_filter = metricFilter;
--- a/frontend/src/reject-history/App.vue
+++ b/frontend/src/reject-history/App.vue
@@ -5,6 +5,7 @@ import { apiGet, apiPost } from '../core/api.js';
 import {
  buildViewParams,
  parseMultiLineInput,
  validateDateRange,
 } from '../core/reject-history-filters.js';
 import { replaceRuntimeHistory } from '../core/shell-navigation.js';
@@ -104,14 +105,14 @@ const availableFilters = ref({ workcenterGroups: [], packages: [], reasons: [] }
 const supplementaryFilters = reactive({
  packages: [],
  workcenterGroups: [],
-  reason: '',
+  reasons: [],
 });
 // ---- Interactive state ----
 const page = ref(1);
 const selectedTrendDates = ref([]);
 const trendLegendSelected = ref({ '扣帳報廢量': true, '不扣帳報廢量': true });
-const paretoDisplayScope = ref('all');
+const paretoDisplayScope = ref('top20');
 const paretoSelections = reactive(createEmptyParetoSelections());
 const paretoData = reactive(createEmptyParetoData());
@@ -146,6 +147,7 @@ const loading = reactive({
  exporting: false,
 });
 const errorMessage = ref('');
 const partialFailureWarning = ref('');
 const lastQueryAt = ref('');
 // ---- Request staleness tracking ----
@@ -241,8 +243,8 @@ function buildBatchParetoParams() {
  if (supplementaryFilters.workcenterGroups.length > 0) {
    params.workcenter_groups = supplementaryFilters.workcenterGroups;
  }
-  if (supplementaryFilters.reason) {
+  if (supplementaryFilters.reasons.length > 0) {
-    params.reason = supplementaryFilters.reason;
+    params.reasons = supplementaryFilters.reasons;
  }
  if (selectedTrendDates.value.length > 0) {
    params.trend_dates = selectedTrendDates.value;
@@ -301,11 +303,20 @@ async function executePrimaryQuery() {
  loading.querying = true;
  loading.list = true;
  errorMessage.value = '';
  partialFailureWarning.value = '';
  try {
    const body = { mode: queryMode.value };
    if (queryMode.value === 'date_range') {
      const dateValidationError = validateDateRange(
        draftFilters.startDate,
        draftFilters.endDate,
      );
      if (dateValidationError) {
        errorMessage.value = dateValidationError;
        return;
      }
      body.start_date = draftFilters.startDate;
      body.end_date = draftFilters.endDate;
    } else {
@@ -321,6 +332,19 @@ async function executePrimaryQuery() {
    if (isStaleRequest(requestId)) return;
    const result = unwrapApiResult(resp, '主查詢執行失敗');
    const meta = result.meta || {};
    if (meta.has_partial_failure) {
      const failedChunkCount = Number(meta.failed_chunk_count || 0);
      const failedRanges = Array.isArray(meta.failed_ranges) ? meta.failed_ranges : [];
      if (failedRanges.length > 0) {
        const rangesText = failedRanges
          .map((item) => `${item.start} ~ ${item.end}`)
          .join('、');
        partialFailureWarning.value = `警告：以下日期區間的資料擷取失敗（${failedChunkCount} 個批次）：${rangesText}。目前顯示結果可能不完整。`;
      } else {
        partialFailureWarning.value = `警告：${failedChunkCount} 個查詢批次的資料擷取失敗。目前顯示結果可能不完整。`;
      }
    }
    committedPrimary.mode = queryMode.value;
    committedPrimary.startDate = draftFilters.startDate;
@@ -344,7 +368,7 @@ async function executePrimaryQuery() {
    supplementaryFilters.packages = [];
    supplementaryFilters.workcenterGroups = [];
-    supplementaryFilters.reason = '';
+    supplementaryFilters.reasons = [];
    page.value = 1;
    selectedTrendDates.value = [];
    resetParetoSelections();
@@ -445,7 +469,7 @@ function clearFilters() {
  draftFilters.excludeMaterialScrap = true;
  draftFilters.excludePbDiode = true;
  draftFilters.paretoTop80 = true;
-  paretoDisplayScope.value = 'all';
+  paretoDisplayScope.value = 'top20';
  resetParetoSelections();
  void executePrimaryQuery();
 }
@@ -520,7 +544,7 @@ function clearParetoSelection() {
 function onSupplementaryChange(filters) {
  supplementaryFilters.packages = filters.packages || [];
  supplementaryFilters.workcenterGroups = filters.workcenterGroups || [];
-  supplementaryFilters.reason = filters.reason || '';
+  supplementaryFilters.reasons = filters.reasons || [];
  page.value = 1;
  selectedTrendDates.value = [];
  resetParetoSelections();
@@ -545,7 +569,7 @@ function removeFilterChip(chip) {
  }
  if (chip.type === 'reason') {
-    supplementaryFilters.reason = '';
+    supplementaryFilters.reasons = supplementaryFilters.reasons.filter((r) => r !== chip.value);
    page.value = 1;
    updateUrlState();
    void Promise.all([refreshView(), fetchBatchPareto()]);
@@ -584,7 +608,7 @@ async function exportCsv() {
    params.set('query_id', queryId.value);
    for (const pkg of supplementaryFilters.packages) params.append('packages', pkg);
    for (const wc of supplementaryFilters.workcenterGroups) params.append('workcenter_groups', wc);
-    if (supplementaryFilters.reason) params.set('reason', supplementaryFilters.reason);
+    for (const r of supplementaryFilters.reasons) params.append('reasons', r);
    params.set('metric_filter', metricFilterParam());
    for (const date of selectedTrendDates.value) params.append('trend_dates', date);
    for (const [dimension, key] of Object.entries(PARETO_SELECTION_PARAM_MAP)) {
@@ -760,13 +784,13 @@ const activeFilterChips = computed(() => {
    value: '',
  });
-  if (supplementaryFilters.reason) {
+  for (const reason of supplementaryFilters.reasons) {
    chips.push({
-      key: `reason:${supplementaryFilters.reason}`,
+      key: `reason:${reason}`,
-      label: `原因: ${supplementaryFilters.reason}`,
+      label: `原因: ${reason}`,
      removable: true,
      type: 'reason',
-      value: supplementaryFilters.reason,
+      value: reason,
    });
  }
@@ -866,16 +890,14 @@ function updateUrlState() {
  appendArrayParams(params, 'packages', supplementaryFilters.packages);
  appendArrayParams(params, 'workcenter_groups', supplementaryFilters.workcenterGroups);
-  if (supplementaryFilters.reason) {
+  appendArrayParams(params, 'reasons', supplementaryFilters.reasons);
    params.set('reason', supplementaryFilters.reason);
  }
  appendArrayParams(params, 'trend_dates', selectedTrendDates.value);
  for (const [dimension, key] of Object.entries(PARETO_SELECTION_PARAM_MAP)) {
    appendArrayParams(params, key, paretoSelections[dimension] || []);
  }
-  if (paretoDisplayScope.value !== 'all') {
+  if (paretoDisplayScope.value !== 'top20') {
    params.set('pareto_display_scope', paretoDisplayScope.value);
  }
  if (!committedPrimary.paretoTop80) {
@@ -945,7 +967,7 @@ function restoreFromUrl() {
  supplementaryFilters.packages = readArrayParam(params, 'packages');
  supplementaryFilters.workcenterGroups = readArrayParam(params, 'workcenter_groups');
-  supplementaryFilters.reason = String(params.get('reason') || '').trim();
+  supplementaryFilters.reasons = readArrayParam(params, 'reasons');
  selectedTrendDates.value = readArrayParam(params, 'trend_dates');
@@ -969,7 +991,7 @@ function restoreFromUrl() {
  }
  const urlParetoDisplayScope = String(params.get('pareto_display_scope') || '').trim().toLowerCase();
-  paretoDisplayScope.value = urlParetoDisplayScope === 'top20' ? 'top20' : 'all';
+  paretoDisplayScope.value = urlParetoDisplayScope === 'all' ? 'all' : 'top20';
  const parsedPage = Number(params.get('page') || '1');
  page.value = Number.isFinite(parsedPage) && parsedPage > 0 ? parsedPage : 1;
@@ -1001,6 +1023,9 @@ onMounted(() => {
    </header>
    <div v-if="errorMessage" class="error-banner">{{ errorMessage }}</div>
    <div v-if="partialFailureWarning" class="warning-banner">
      {{ partialFailureWarning }}
    </div>
    <FilterPanel
      :filters="draftFilters"
--- a/frontend/src/reject-history/components/FilterPanel.vue
+++ b/frontend/src/reject-history/components/FilterPanel.vue
@@ -8,23 +8,23 @@ const props = defineProps({
  containerInput: { type: String, default: '' },
  availableFilters: { type: Object, default: () => ({}) },
  supplementaryFilters: { type: Object, default: () => ({}) },
-  queryId: { type: String, default: '' },
+  queryId: { type: String, default: '' },
-  resolutionInfo: { type: Object, default: null },
+  resolutionInfo: { type: Object, default: null },
-  loading: { type: Object, required: true },
+  loading: { type: Object, required: true },
-  activeFilterChips: { type: Array, default: () => [] },
+  activeFilterChips: { type: Array, default: () => [] },
-  paretoDisplayScope: { type: String, default: 'all' },
+  paretoDisplayScope: { type: String, default: 'all' },
-});
+});
 const emit = defineEmits([
  'apply',
  'clear',
  'export-csv',
-  'remove-chip',
+  'remove-chip',
-  'pareto-scope-toggle',
+  'pareto-scope-toggle',
-  'pareto-display-scope-change',
+  'pareto-display-scope-change',
-  'update:queryMode',
+  'update:queryMode',
-  'update:containerInputType',
+  'update:containerInputType',
-  'update:containerInput',
+  'update:containerInput',
  'supplementary-change',
 ]);
@@ -32,7 +32,7 @@ function emitSupplementary(patch) {
  emit('supplementary-change', {
    packages: props.supplementaryFilters.packages || [],
    workcenterGroups: props.supplementaryFilters.workcenterGroups || [],
-    reason: props.supplementaryFilters.reason || '',
+    reasons: props.supplementaryFilters.reasons || [],
    ...patch,
  });
 }
@@ -86,23 +86,23 @@ function emitSupplementary(patch) {
      <!-- Container mode -->
      <template v-else>
-        <div class="filter-group">
+        <div class="filter-group filter-group-full container-input-group">
-          <label class="filter-label" for="container-type">輸入類型</label>
+          <div class="container-label-row">
-          <select
+            <label class="filter-label" for="container-type">輸入類型</label>
-            id="container-type"
+            <select
-            class="filter-input"
+              id="container-type"
-            :value="containerInputType"
+              class="filter-input container-type-select"
-            @change="$emit('update:containerInputType', $event.target.value)"
+              :value="containerInputType"
-          >
+              @change="$emit('update:containerInputType', $event.target.value)"
-            <option value="lot">LOT</option>
+            >
-            <option value="work_order">工單</option>
+              <option value="lot">LOT</option>
-            <option value="wafer_lot">WAFER LOT</option>
+              <option value="work_order">工單</option>
-          </select>
+              <option value="wafer_lot">WAFER LOT</option>
-        </div>
+            </select>
-        <div class="filter-group filter-group-wide">
+            <label class="filter-label" for="container-input"
-          <label class="filter-label" for="container-input"
+              >輸入值 (每行一個，支援 * 或 % wildcard)</label
-            >輸入值 (每行一個，支援 * 或 % wildcard)</label
+            >
-          >
+          </div>
          <textarea
            id="container-input"
            class="filter-input filter-textarea"
@@ -124,12 +124,12 @@ function emitSupplementary(patch) {
            <input v-model="filters.excludeMaterialScrap" type="checkbox" />
            排除原物料報廢
          </label>
-          <label class="checkbox-pill">
+          <label class="checkbox-pill">
-            <input v-model="filters.excludePbDiode" type="checkbox" />
+            <input v-model="filters.excludePbDiode" type="checkbox" />
-            排除 PB_* 系列
+            排除 PB_* 系列
-          </label>
+          </label>
-        </div>
+        </div>
-        <div class="filter-actions">
+        <div class="filter-actions">
          <button
            class="btn btn-primary"
            :disabled="loading.querying"
@@ -181,30 +181,30 @@ function emitSupplementary(patch) {
      </template>
    </div>
-    <!-- Supplementary filters (only after primary query) -->
+    <!-- Supplementary filters (only after primary query) -->
-    <div v-if="queryId" class="supplementary-panel">
+    <div v-if="queryId" class="supplementary-panel">
-      <div class="supplementary-header">補充篩選 (快取內篩選)</div>
+      <div class="supplementary-header">補充篩選 (快取內篩選)</div>
-      <div class="supplementary-toolbar">
+      <div class="supplementary-toolbar">
-        <label class="checkbox-pill">
+        <label class="checkbox-pill">
-          <input
+          <input
-            :checked="filters.paretoTop80"
+            :checked="filters.paretoTop80"
-            type="checkbox"
+            type="checkbox"
-            @change="$emit('pareto-scope-toggle', $event.target.checked)"
+            @change="$emit('pareto-scope-toggle', $event.target.checked)"
-          />
+          />
-          Pareto 僅顯示累計前 80%
+          Pareto 僅顯示累計前 80%
-        </label>
+        </label>
-        <label class="filter-label">顯示範圍</label>
+        <label class="filter-label">顯示範圍</label>
-        <select
+        <select
-          class="dimension-select pareto-scope-select"
+          class="dimension-select pareto-scope-select"
-          :value="paretoDisplayScope"
+          :value="paretoDisplayScope"
-          @change="$emit('pareto-display-scope-change', $event.target.value)"
+          @change="$emit('pareto-display-scope-change', $event.target.value)"
-        >
+        >
-          <option value="all">全部顯示</option>
+          <option value="all">全部顯示</option>
-          <option value="top20">只顯示 TOP 20</option>
+          <option value="top20">只顯示 TOP 20</option>
-        </select>
+        </select>
-      </div>
+      </div>
-      <div class="supplementary-row">
+      <div class="supplementary-row">
-        <div class="filter-group">
+        <div class="filter-group">
          <label class="filter-label">WORKCENTER GROUP</label>
          <MultiSelect
            :model-value="supplementaryFilters.workcenterGroups"
@@ -227,22 +227,14 @@ function emitSupplementary(patch) {
        </div>
        <div class="filter-group">
-          <label class="filter-label" for="supp-reason">報廢原因</label>
+          <label class="filter-label">報廢原因</label>
-          <select
+          <MultiSelect
-            id="supp-reason"
+            :model-value="supplementaryFilters.reasons"
-            class="filter-input"
+            :options="availableFilters.reasons || []"
-            :value="supplementaryFilters.reason"
+            placeholder="全部原因"
-            @change="emitSupplementary({ reason: $event.target.value })"
+            searchable
-          >
+            @update:model-value="emitSupplementary({ reasons: $event })"
-            <option value="">全部原因</option>
+          />
            <option
              v-for="r in availableFilters.reasons || []"
              :key="r"
              :value="r"
            >
              {{ r }}
            </option>
          </select>
        </div>
      </div>
    </div>
--- a/frontend/src/reject-history/style.css
+++ b/frontend/src/reject-history/style.css
@@ -41,6 +41,19 @@
  line-height: 1.5;
 }
 .container-label-row {
  display: flex;
  align-items: center;
  gap: 8px;
  flex-wrap: wrap;
 }
 .container-type-select {
  width: auto;
  min-width: 120px;
  max-width: 180px;
 }
 .supplementary-panel {
  border-top: 1px solid var(--border);
  padding: 16px 18px;
@@ -119,6 +132,15 @@
  font-size: 13px;
 }
 .warning-banner {
  margin-bottom: 14px;
  padding: 10px 12px;
  border-radius: 6px;
  background: #fffbeb;
  color: #b45309;
  font-size: 13px;
 }
 .filter-panel {
  display: grid;
  grid-template-columns: repeat(4, minmax(0, 1fr));
--- a/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/.openspec.yaml
+++ b/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/.openspec.yaml
@@ -0,0 +1,2 @@
 schema: spec-driven
 created: 2026-03-03
--- a/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/design.md
+++ b/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/design.md
@@ -0,0 +1,80 @@
 ## Context
 報廢歷史查詢使用 `BatchQueryEngine` 將長日期範圍拆成 10 天 chunks 平行查詢 Oracle。每個 chunk 有記憶體上限（256 MB）和 timeout（300s）防護。當 chunk 失敗時，`has_partial_failure` 旗標寫入 Redis HSET（key: `batch:reject:{hash}:meta`），但此資訊**在三個斷點被丟失**：
 1. `reject_dataset_cache.py` 的 `execute_primary_query()` 未讀取 batch progress metadata
 2. API route 直接 `jsonify({"success": True, **result})`，在 partial chunk failure 路徑下仍回 HTTP 200 + `success: true`，不區分完整與不完整結果
 3. 前端 `App.vue` 沒有任何 partial failure 處理邏輯
 另一個問題：`redis_clear_batch()` 在 `execute_primary_query()` 的清理階段會刪除 metadata key，所以讀取必須在清理之前。
 前端的 730 天日期上限驗證只在後端 `_validate_range()` 做，前端缺乏即時回饋。
 ## Goals / Non-Goals
 **Goals:**
 - 將 `has_partial_failure` 從 Redis metadata 傳遞到 API response `meta` 欄位
 - 追蹤失敗 chunk 的時間範圍，讓前端可顯示具體的缺漏區間
 - 前端顯示 amber warning banner，告知使用者資料可能不完整
 - 前端加入日期範圍即時驗證，避免無效 API 請求
 - 對 transient error（Oracle timeout、連線失敗）加入單次重試，減少不必要的 partial failure
 - 持久化 partial failure 旗標到獨立 Redis key，讓 cache-hit 路徑也能還原警告狀態
 **Non-Goals:**
 - 不改變現有 chunk 分片策略或記憶體上限數值
 - 不實作前端的自動重查/重試機制
 - 不修改 `EVENT_FETCHER_ALLOW_PARTIAL_RESULTS` 的行為（預設已是安全的 false）
 - 不加入 progress bar / 即時進度追蹤 UI
 ## Decisions
 ### D1: 在 `redis_clear_batch` 之前讀取 metadata
 **決定**: 在 `execute_primary_query()` 中，`merge_chunks()` 之後、`redis_clear_batch()` 之前，呼叫 `get_batch_progress("reject", engine_hash)` 讀取 partial failure 狀態。
 **理由**: `redis_clear_batch` 會刪除包含 metadata 的 key，之後就讀不到了。此時 chunk 資料已合併完成，是最後可讀取 metadata 的時機點。
 ### D2: 用獨立 Redis key 持久化 partial failure flag，TTL 對齊實際資料層
 **決定**: 在 `_store_query_result()` 之後，將 partial failure 資訊存到 `reject_dataset:{query_id}:partial_failure` Redis HSET。**TTL 必須與資料實際存活的層一致**：若資料 spill 到 parquet spool（`_REJECT_ENGINE_SPOOL_TTL_SECONDS = 21600s`），partial failure flag 的 TTL 也要用 21600s；若資料存在 L1/L2（`_CACHE_TTL = 900s`），flag TTL 用 900s。實作方式：`_store_partial_failure_flag()` 接受 `ttl` 參數，由呼叫端根據 `should_spill` 判斷傳入 `_REJECT_ENGINE_SPOOL_TTL_SECONDS` 或 `_CACHE_TTL`。Cache-hit 路徑透過 `_load_partial_failure_flag(query_id)` 還原。
 **替代方案 A**: 將 flag 嵌入 DataFrame 的 attrs 或另外 pickle。
 **為何不採用**: DataFrame attrs 在 parquet 序列化時會丟失；pickle 增加反序列化風險。
 **替代方案 B**: 固定 TTL=900s。
 **為何不採用**: 大查詢 spill 到 parquet spool（21600s TTL），資料還能讀 6 小時，但 partial failure flag 15 分鐘就過期，造成「資料讀得到但警告消失」。
 ### D3: 在 `_update_progress` 中追蹤 failed_ranges（僅 time-range chunk）
 **決定**: 擴充 `_update_progress()` 接受 `failed_ranges: Optional[List[Dict]]` 參數，以 JSON 字串存入 Redis HSET。Sequential 和 parallel path 均從失敗的 chunk descriptor 提取 `chunk_start` / `chunk_end`。**僅當 chunk descriptor 包含 `chunk_start`/`chunk_end` 時才記錄**（即 `decompose_by_time_range` 產生的 time-range chunk）。
 **container-id 分塊的情境**: reject 的 container 模式使用 `decompose_by_ids()`，chunk 結構為 `{"ids": [...]}` 不含日期範圍。此時 `failed_ranges` 為空 list，前端透過 `failed_chunk_count > 0` 顯示 generic 警告訊息（「N 個查詢批次的資料擷取失敗」），不含日期區間。
 **理由**: chunk descriptor 的結構由 decompose 函式決定，engine 層不應假設所有 chunk 都有時間範圍。
 ### D4: Memory guard 失敗不重試
 **決定**: `_execute_single_chunk()` 加入 `max_retries=1`，但只對 `_is_retryable_error()` 回傳 true 的 exception 重試。Memory guard（記憶體超限）和 Redis store 失敗直接 return False，不重試。
 **理由**: Memory guard 代表該時段資料量確實過大，重試結果相同；Oracle timeout 和連線錯誤則可能是暫態問題。
 ### D5: 前端 warning banner 使用既有 amber 色系
 **決定**: 新增 `.warning-banner` CSS class，使用 `background: #fffbeb; color: #b45309`，與既有 `.resolution-warn` 的 amber 色系一致。放在 `.error-banner` 之後。
 **替代方案**: 使用 toast/notification 元件。
 **為何不採用**: 此專案無 toast 系統，amber banner 與 red error-banner 模式統一。
 ### D6: 前端日期驗證函式放在共用 filters module
 **決定**: 在 `frontend/src/core/reject-history-filters.js` 新增 `validateDateRange()`，複用 `resource-history/App.vue:231-248` 的驗證模式。
 **理由**: reject-history-filters.js 已是此頁面的 filter 工具模組，validateDateRange 屬於 filter 驗證邏輯。
 ## Risks / Trade-offs
 - **[中] 重試邏輯影響所有 execute_plan 呼叫端** — `_execute_single_chunk()` 是 shared function，被 reject / hold / resource / job / msd 五個服務共用。重試邏輯為加法行為（新增 retry loop 包在既有 try/except 外），成功路徑不變。→ 需要對其他 4 個服務執行 smoke test（既有測試通過即可）。若需更保守，可加入 `max_retries` 參數讓呼叫端控制（預設 1），但目前判斷統一重試對所有服務都是正面效果。
 - **[低] 重試增加 Oracle 負擔** — 單次重試最多增加 1 倍的失敗查詢量。→ 透過 `_is_retryable_error()` 嚴格過濾，只重試 transient error，且 parallel path 最多 3 worker，影響可控。
 - **[低] failed_ranges JSON 大小** — 理論上 73 chunks（730/10）全部失敗會產生 73 筆 range，JSON < 5 KB。→ 遠低於 Redis HSET 欄位限制。
--- a/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/proposal.md
+++ b/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/proposal.md
@@ -0,0 +1,34 @@
 ## Why
 報廢歷史查詢的防爆機制（時間分片 + 記憶體上限 256 MB + Oracle timeout 300s）在 chunk 失敗時會丟棄該 chunk 的資料，`has_partial_failure` 旗標僅寫入 Redis metadata，**從未傳遞到 API response 或前端**。使用者查到不完整資料卻毫不知情，影響決策正確性。此外，730 天日期上限僅在後端驗證，前端無即時提示，導致不必要的等待。
 ## What Changes
 - 後端 `reject_dataset_cache` 在 `execute_plan()` 後讀取 batch progress metadata，將 `has_partial_failure`、失敗 chunk 數量及失敗時間範圍注入 API response `meta` 欄位
 - 後端 `batch_query_engine` 追蹤失敗 chunk 的時間區間描述，寫入 Redis metadata 的 `failed_ranges` 欄位
 - 後端 `_execute_single_chunk()` 對 transient error（Oracle timeout / 連線錯誤）加入單次重試，memory guard 失敗不重試
 - 前端新增 amber warning banner，當 `meta.has_partial_failure` 為 true 時顯示不完整資料警告及失敗的日期區間
 - 前端新增日期範圍即時驗證（730 天上限），在 API 發送前攔截無效範圍
 ## Capabilities
 ### New Capabilities
 - `batch-query-resilience`: 批次查詢引擎的失敗範圍追蹤、partial failure metadata 傳遞、及 transient error 單次重試機制
 ### Modified Capabilities
 - `reject-history-api`: API response `meta` 新增 `has_partial_failure`、`failed_chunk_count`、`failed_ranges` 欄位，讓前端得知查詢結果完整性
 - `reject-history-page`: 新增 amber warning banner 顯示 partial failure 警告；新增前端日期範圍即時驗證（730 天上限）
 ## Impact
 - **後端服務 — batch_query_engine.py（共用模組，影響所有使用 execute_plan 的服務）**:
  - 追蹤 failed_ranges + 重試邏輯修改的是 `_execute_single_chunk()`，此函式被 **reject / hold / resource / job / msd** 五個 dataset cache 服務共用
  - 重試邏輯為加法行為（新增 retry loop），不改變既有成功路徑，對其他服務向後相容
  - `failed_ranges` 追蹤僅在 chunk descriptor 含 `chunk_start`/`chunk_end` 時才記錄，container-id 分塊（僅 reject container 模式使用）不受影響
  - 需對 hold / resource / job / msd 執行回歸 smoke test
 - **後端服務 — reject_dataset_cache.py**: 讀取 metadata + 注入 response + 持久化 partial failure flag
 - **前端**: `App.vue`（warning banner + 日期驗證）、`reject-history-filters.js`（validateDateRange 函式）、`style.css`（.warning-banner 樣式）
 - **API 契約**: response `meta` 新增可選欄位（向後相容，現有前端不受影響）
 - **測試**: `test_batch_query_engine.py`、`test_reject_dataset_cache.py` 需新增對應測試案例；hold / resource / job / msd 需回歸驗證
--- a/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/specs/batch-query-resilience/spec.md
+++ b/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/specs/batch-query-resilience/spec.md
@@ -0,0 +1,82 @@
 ## ADDED Requirements
 ### Requirement: BatchQueryEngine SHALL track failed chunk time ranges in progress metadata
 The engine SHALL record the time ranges of failed chunks in Redis progress metadata so consumers can report which date intervals have missing data.
 #### Scenario: Failed chunk range recorded in sequential path
 - **WHEN** a chunk with `chunk_start` and `chunk_end` keys fails during sequential execution
 - **THEN** `_update_progress()` SHALL store a `failed_ranges` field in the Redis HSET metadata
 - **THEN** `failed_ranges` SHALL be a JSON array of objects, each with `start` and `end` string keys
 - **THEN** the array SHALL contain one entry per failed chunk
 #### Scenario: Failed chunk range recorded in parallel path
 - **WHEN** a chunk with `chunk_start` and `chunk_end` keys fails during parallel execution
 - **THEN** the failed chunk's time range SHALL be appended to `failed_ranges` in the same format as the sequential path
 #### Scenario: No failed ranges when all chunks succeed
 - **WHEN** all chunks complete successfully
 - **THEN** the `failed_ranges` field SHALL NOT be present in Redis metadata
 #### Scenario: ID-batch chunks produce no failed_ranges entries
 - **WHEN** a chunk created by `decompose_by_ids()` (containing only an `ids` key, no `chunk_start`/`chunk_end`) fails
 - **THEN** no entry SHALL be appended to `failed_ranges` for that chunk
 - **THEN** `has_partial_failure` SHALL still be set to `True`
 - **THEN** `failed` count SHALL still be incremented
 #### Scenario: get_batch_progress returns failed_ranges
 - **WHEN** `get_batch_progress()` is called after execution with failed chunks
 - **THEN** the returned dict SHALL include `failed_ranges` as a JSON string parseable to a list of `{start, end}` objects
 ### Requirement: BatchQueryEngine SHALL retry transient chunk failures once
 The engine SHALL retry chunk execution once for transient errors (Oracle timeout, connection errors) but SHALL NOT retry deterministic failures (memory guard, Redis store).
 #### Scenario: Oracle timeout retried once
 - **WHEN** `_execute_single_chunk()` raises an exception matching Oracle timeout patterns (`DPY-4024`, `ORA-01013`)
 - **THEN** the chunk SHALL be retried exactly once
 - **WHEN** the retry succeeds
 - **THEN** the chunk SHALL be marked as successful
 #### Scenario: Connection error retried once
 - **WHEN** `_execute_single_chunk()` raises `TimeoutError`, `ConnectionError`, or `OSError`
 - **THEN** the chunk SHALL be retried exactly once
 #### Scenario: Retry exhausted marks chunk as failed
 - **WHEN** a chunk fails on both the initial attempt and the retry
 - **THEN** the chunk SHALL be marked as failed
 - **THEN** `has_partial_failure` SHALL be set to `True`
 #### Scenario: Memory guard failure NOT retried
 - **WHEN** a chunk's DataFrame exceeds `BATCH_CHUNK_MAX_MEMORY_MB`
 - **THEN** the chunk SHALL return `False` immediately without retry
 - **THEN** the query function SHALL have been called exactly once for that chunk
 #### Scenario: Redis store failure NOT retried
 - **WHEN** `redis_store_chunk()` returns `False`
 - **THEN** the chunk SHALL return `False` immediately without retry
 ### Requirement: reject_dataset_cache SHALL propagate partial failure metadata to API response
 The cache service SHALL read batch execution metadata and include partial failure information in the API response `meta` field.
 #### Scenario: Partial failure metadata included in response
 - **WHEN** `execute_primary_query()` uses the batch engine path and `get_batch_progress()` returns `has_partial_failure=True`
 - **THEN** the response `meta` dict SHALL include `has_partial_failure: true`
 - **THEN** the response `meta` dict SHALL include `failed_chunk_count` as an integer
 - **THEN** if `failed_ranges` is present, the response `meta` dict SHALL include `failed_ranges` as a list of `{start, end}` objects
 #### Scenario: Metadata read before redis_clear_batch
 - **WHEN** `execute_primary_query()` calls `get_batch_progress()`
 - **THEN** the call SHALL occur after `merge_chunks()` and before `redis_clear_batch()`
 #### Scenario: No partial failure on successful query
 - **WHEN** all chunks complete successfully
 - **THEN** the response `meta` dict SHALL NOT include `has_partial_failure`
 #### Scenario: Cache-hit path restores partial failure flag
 - **WHEN** a cached DataFrame is returned (cache hit) and a partial failure flag was stored during the original query
 - **THEN** the response `meta` dict SHALL include the same `has_partial_failure`, `failed_chunk_count`, and `failed_ranges` as the original response
 #### Scenario: Partial failure flag TTL matches data storage layer
 - **WHEN** partial failure is detected and the query result is spilled to parquet spool
 - **THEN** the partial failure flag SHALL be stored with TTL equal to `_REJECT_ENGINE_SPOOL_TTL_SECONDS` (default 21600 seconds)
 - **WHEN** partial failure is detected and the query result is stored in L1/L2 Redis cache
 - **THEN** the partial failure flag SHALL be stored with TTL equal to `_CACHE_TTL` (default 900 seconds)
--- a/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/specs/reject-history-api/spec.md
+++ b/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/specs/reject-history-api/spec.md
@@ -0,0 +1,36 @@
 ## MODIFIED Requirements
 ### Requirement: Reject History API SHALL validate required query parameters
 The API SHALL validate date parameters and basic paging bounds before executing database work.
 #### Scenario: Missing required dates
 - **WHEN** a reject-history endpoint requiring date range is called without `start_date` or `end_date`
 - **THEN** the API SHALL return HTTP 400 with a descriptive validation error
 #### Scenario: Invalid date order
 - **WHEN** `end_date` is earlier than `start_date`
 - **THEN** the API SHALL return HTTP 400 and SHALL NOT run SQL queries
 #### Scenario: Date range exceeds maximum
 - **WHEN** the date range between `start_date` and `end_date` exceeds 730 days
 - **THEN** the API SHALL return HTTP 400 with error message "日期範圍不可超過 730 天"
 ## ADDED Requirements
 ### Requirement: Reject History API primary query response SHALL include partial failure metadata
 The primary query endpoint SHALL include batch execution completeness information in the response `meta` field when chunks fail during batch query execution.
 #### Scenario: Partial failure metadata in response
 - **WHEN** `POST /api/reject-history/query` completes with some chunks failing
 - **THEN** the response SHALL include `meta.has_partial_failure: true`
 - **THEN** the response SHALL include `meta.failed_chunk_count` as a positive integer
 - **THEN** the response SHALL include `meta.failed_ranges` as an array of `{start, end}` date strings (if available)
 - **THEN** the HTTP status SHALL still be 200 (data is partially available)
 #### Scenario: No partial failure metadata on full success
 - **WHEN** `POST /api/reject-history/query` completes with all chunks succeeding
 - **THEN** the response `meta` SHALL NOT include `has_partial_failure`, `failed_chunk_count`, or `failed_ranges`
 #### Scenario: Partial failure metadata preserved on cache hit
 - **WHEN** `POST /api/reject-history/query` returns cached data that originally had partial failures
 - **THEN** the response SHALL include the same `meta.has_partial_failure`, `meta.failed_chunk_count`, and `meta.failed_ranges` as the original response
--- a/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/specs/reject-history-page/spec.md
+++ b/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/specs/reject-history-page/spec.md
@@ -0,0 +1,58 @@
 ## ADDED Requirements
 ### Requirement: Reject History page SHALL display partial failure warning banner
 The page SHALL display an amber warning banner when the query result contains partial failures, informing users that displayed data may be incomplete.
 #### Scenario: Warning banner displayed on partial failure
 - **WHEN** the primary query response includes `meta.has_partial_failure: true`
 - **THEN** an amber warning banner SHALL be displayed below the error banner position
 - **THEN** the warning message SHALL be in Traditional Chinese
 #### Scenario: Warning banner shows failed date ranges
 - **WHEN** `meta.failed_ranges` contains date range objects
 - **THEN** the warning banner SHALL display the specific failed date ranges (e.g., "以下日期區間的資料擷取失敗：2025-01-01 ~ 2025-01-10")
 #### Scenario: Warning banner shows generic message without ranges (container mode or missing range data)
 - **WHEN** `meta.has_partial_failure` is true but `meta.failed_ranges` is empty or absent (e.g., container-id batch query)
 - **THEN** the warning banner SHALL display a generic message with the failed chunk count (e.g., "3 個查詢批次的資料擷取失敗")
 #### Scenario: Warning banner cleared on new query
 - **WHEN** user initiates a new primary query
 - **THEN** the warning banner SHALL be cleared before the new query executes
 - **THEN** if the new query also has partial failures, the warning SHALL update with new failure information
 #### Scenario: Warning banner coexists with error banner
 - **WHEN** both an error message and a partial failure warning exist
 - **THEN** the error banner SHALL appear first, followed by the warning banner
 #### Scenario: Warning banner visual style
 - **WHEN** the warning banner is rendered
 - **THEN** it SHALL use amber/orange color scheme (background `#fffbeb`, text `#b45309`)
 - **THEN** the style SHALL be consistent with the existing `.resolution-warn` color pattern
 ### Requirement: Reject History page SHALL validate date range before query submission
 The page SHALL validate the date range on the client side before sending the API request, providing immediate feedback for invalid ranges.
 #### Scenario: Date range exceeds 730-day limit
 - **WHEN** user selects a date range exceeding 730 days and clicks "查詢"
 - **THEN** the page SHALL display an error message "查詢範圍不可超過 730 天（約兩年）"
 - **THEN** the API request SHALL NOT be sent
 #### Scenario: Missing start or end date
 - **WHEN** user clicks "查詢" without setting both start_date and end_date (in date_range mode)
 - **THEN** the page SHALL display an error message "請先設定開始與結束日期"
 - **THEN** the API request SHALL NOT be sent
 #### Scenario: End date before start date
 - **WHEN** user selects an end_date earlier than start_date
 - **THEN** the page SHALL display an error message "結束日期必須大於起始日期"
 - **THEN** the API request SHALL NOT be sent
 #### Scenario: Valid date range proceeds normally
 - **WHEN** user selects a valid date range within 730 days and clicks "查詢"
 - **THEN** no validation error SHALL be shown
 - **THEN** the API request SHALL proceed normally
 #### Scenario: Container mode skips date validation
 - **WHEN** query mode is "container" (not "date_range")
 - **THEN** date range validation SHALL be skipped
--- a/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/tasks.md
+++ b/openspec/changes/archive/2026-03-03-fix-silent-data-loss-reject-history/tasks.md
@@ -0,0 +1,46 @@
 ## 1. 前端日期範圍即時驗證
 - [x] 1.1 在 `frontend/src/core/reject-history-filters.js` 末尾新增 `validateDateRange(startDate, endDate)` 函式（MAX_QUERY_DAYS=730），回傳空字串表示通過、非空字串為錯誤訊息
 - [x] 1.2 在 `frontend/src/reject-history/App.vue` import `validateDateRange`，在 `executePrimaryQuery()` 的 API 呼叫前（`errorMessage.value = ''` 重置之後）加入 date_range 模式的驗證邏輯，驗證失敗時設定 `errorMessage` 並 return
 ## 2. 後端追蹤失敗 chunk 時間範圍
 - [x] 2.1 在 `batch_query_engine.py` 的 `_update_progress()` 簽名加入 `failed_ranges: Optional[List] = None` 參數，在 mapping dict 中條件性加入 `json.dumps(failed_ranges)` 欄位
 - [x] 2.2 在 `execute_plan()` 的 sequential path（`for idx, chunk in enumerate(chunks)` 迴圈區段）新增 `failed_range_list = []`，chunk 失敗時從 chunk descriptor 條件性提取 `chunk_start`/`chunk_end` append 到 list（僅 time-range chunk 才有），傳入每次 `_update_progress()` 呼叫
 - [x] 2.3 在 `_execute_parallel()` 修改 `futures` dict 為 `futures[future] = (idx, chunk)` 以保留 chunk descriptor，新增 `failed_range_list`，失敗時條件性 append range，返回值改為 4-tuple `(completed, failed, has_partial_failure, failed_range_list)`；同步更新 `execute_plan()` 中呼叫 `_execute_parallel()` 的解構為 4-tuple
 ## 3. 後端 chunk 失敗單次重試
 - [x] 3.1 在 `batch_query_engine.py` 新增 `_RETRYABLE_PATTERNS` 常數和 `_is_retryable_error(exc)` 函式，辨識 Oracle timeout / 連線錯誤
 - [x] 3.2 修改 `_execute_single_chunk()` 加入 `max_retries: int = 1` 參數，將 try/except 包在 retry loop 中：memory guard 和 Redis store 失敗直接 return False 不重試；exception 中若 `_is_retryable_error()` 為 True 則 log warning 並 continue
 ## 4. 後端傳遞 partial failure 到 API response
 - [x] 4.1 在 `reject_dataset_cache.py` 的 `execute_primary_query()` 內 batch_query_engine local import 區塊加入 `get_batch_progress`
 - [x] 4.2 在 `execute_primary_query()` 的 `merge_chunks()` 呼叫之後、`redis_clear_batch()` 呼叫之前，呼叫 `get_batch_progress("reject", engine_hash)` 讀取 `has_partial_failure`、`failed`、`failed_ranges`
 - [x] 4.3 在 `redis_clear_batch()` 之後、`_apply_policy_filters()` 之前，將 partial failure 資訊條件性注入 `meta` dict（`has_partial_failure`、`failed_chunk_count`、`failed_ranges`）
 - [x] 4.4 新增 `_store_partial_failure_flag(query_id, failed_count, failed_ranges, ttl)` 和 `_load_partial_failure_flag(query_id)` 兩個 helper，使用 Redis HSET 存取 `reject_dataset:{query_id}:partial_failure`；`ttl` 由呼叫端傳入
 - [x] 4.5 在 `_store_query_result()` 呼叫之後呼叫 `_store_partial_failure_flag()`，TTL 根據 `_store_query_result()` 內的 `should_spill` 判斷：spill 到 spool 時用 `_REJECT_ENGINE_SPOOL_TTL_SECONDS`（21600s），否則用 `_CACHE_TTL`（900s）；在 `_get_cached_df()` cache-hit 路徑呼叫 `_load_partial_failure_flag()` 並 `meta.update()`
 ## 5. 前端 partial failure 警告 banner
 - [x] 5.1 在 `frontend/src/reject-history/App.vue` 新增 `partialFailureWarning` ref，在 `executePrimaryQuery()` 開頭重置，在讀取 result 後根據 `result.meta.has_partial_failure` 設定警告訊息（含 failed_ranges 的日期區間文字；無 ranges 時用 failed_chunk_count 的 generic 訊息）
 - [x] 5.2 在 App.vue template 的 error-banner `<div>` 之後加入 `<div v-if="partialFailureWarning" class="warning-banner">{{ partialFailureWarning }}</div>`
 - [x] 5.3 在 `frontend/src/reject-history/style.css` 的 `.error-banner` 規則之後加入 `.warning-banner` 樣式（background: #fffbeb, color: #b45309）
 ## 6. 測試
 - [x] 6.1 在 `tests/test_batch_query_engine.py` 新增 `test_transient_failure_retried_once`：mock query_fn 第一次 raise TimeoutError、第二次成功，assert chunk 最終成功且 query_fn 被呼叫 2 次
 - [x] 6.2 在 `tests/test_batch_query_engine.py` 新增 `test_memory_guard_not_retried`：mock query_fn 回傳超大 DataFrame，assert query_fn 僅被呼叫 1 次
 - [x] 6.3 在 `tests/test_batch_query_engine.py` 新增 `test_failed_ranges_tracked`：3 chunks 其中 1 個失敗，assert Redis metadata 含 `failed_ranges` JSON
 - [x] 6.4 在 `tests/test_reject_dataset_cache.py` 新增 `test_partial_failure_in_response_meta`：mock `get_batch_progress` 回傳 `has_partial_failure=True`，assert response `meta` 包含旗標和 `failed_ranges`
 - [x] 6.5 在 `tests/test_reject_dataset_cache.py` 新增 `test_cache_hit_restores_partial_failure`：先寫入 partial failure flag，cache hit 時 assert meta 有旗標
 - [x] 6.6 在 `tests/test_reject_dataset_cache.py` 新增 `test_partial_failure_ttl_matches_spool`：當 should_spill=True 時 assert flag TTL 為 `_REJECT_ENGINE_SPOOL_TTL_SECONDS`，否則為 `_CACHE_TTL`
 - [x] 6.7 在 `tests/test_batch_query_engine.py` 新增 `test_id_batch_chunk_no_failed_ranges`：container-id 分塊 chunk 失敗時 assert `failed_ranges` 為空 list 但 `has_partial_failure=True`
 ## 7. 跨服務回歸驗證
 - [x] 7.1 執行 `pytest tests/test_batch_query_engine.py tests/test_reject_dataset_cache.py -v` 確認本次修改的測試全部通過
 - [x] 7.2 執行 hold_dataset_cache 相關測試確認重試邏輯不影響 hold：`pytest tests/ -k "hold" -v`
 - [x] 7.3 執行 resource / job / msd 相關測試確認回歸：`pytest tests/ -k "resource or job or mid_section" -v`
 - [x] 7.4 若任何跨服務測試失敗，檢查是否為 `_execute_single_chunk` 簽名變更（`max_retries` 參數）導致，確認 keyword-only 預設值不影響既有呼叫
--- a/openspec/specs/batch-query-resilience/spec.md
+++ b/openspec/specs/batch-query-resilience/spec.md
@@ -0,0 +1,86 @@
 # batch-query-resilience Specification
 ## Purpose
 Batch query engine resilience features: failed chunk range tracking, transient error retry, and partial failure metadata propagation to API consumers.
 ## Requirements
 ### Requirement: BatchQueryEngine SHALL track failed chunk time ranges in progress metadata
 The engine SHALL record the time ranges of failed chunks in Redis progress metadata so consumers can report which date intervals have missing data.
 #### Scenario: Failed chunk range recorded in sequential path
 - **WHEN** a chunk with `chunk_start` and `chunk_end` keys fails during sequential execution
 - **THEN** `_update_progress()` SHALL store a `failed_ranges` field in the Redis HSET metadata
 - **THEN** `failed_ranges` SHALL be a JSON array of objects, each with `start` and `end` string keys
 - **THEN** the array SHALL contain one entry per failed chunk
 #### Scenario: Failed chunk range recorded in parallel path
 - **WHEN** a chunk with `chunk_start` and `chunk_end` keys fails during parallel execution
 - **THEN** the failed chunk's time range SHALL be appended to `failed_ranges` in the same format as the sequential path
 #### Scenario: No failed ranges when all chunks succeed
 - **WHEN** all chunks complete successfully
 - **THEN** the `failed_ranges` field SHALL NOT be present in Redis metadata
 #### Scenario: ID-batch chunks produce no failed_ranges entries
 - **WHEN** a chunk created by `decompose_by_ids()` (containing only an `ids` key, no `chunk_start`/`chunk_end`) fails
 - **THEN** no entry SHALL be appended to `failed_ranges` for that chunk
 - **THEN** `has_partial_failure` SHALL still be set to `True`
 - **THEN** `failed` count SHALL still be incremented
 #### Scenario: get_batch_progress returns failed_ranges
 - **WHEN** `get_batch_progress()` is called after execution with failed chunks
 - **THEN** the returned dict SHALL include `failed_ranges` as a JSON string parseable to a list of `{start, end}` objects
 ### Requirement: BatchQueryEngine SHALL retry transient chunk failures once
 The engine SHALL retry chunk execution once for transient errors (Oracle timeout, connection errors) but SHALL NOT retry deterministic failures (memory guard, Redis store).
 #### Scenario: Oracle timeout retried once
 - **WHEN** `_execute_single_chunk()` raises an exception matching Oracle timeout patterns (`DPY-4024`, `ORA-01013`)
 - **THEN** the chunk SHALL be retried exactly once
 - **WHEN** the retry succeeds
 - **THEN** the chunk SHALL be marked as successful
 #### Scenario: Connection error retried once
 - **WHEN** `_execute_single_chunk()` raises `TimeoutError`, `ConnectionError`, or `OSError`
 - **THEN** the chunk SHALL be retried exactly once
 #### Scenario: Retry exhausted marks chunk as failed
 - **WHEN** a chunk fails on both the initial attempt and the retry
 - **THEN** the chunk SHALL be marked as failed
 - **THEN** `has_partial_failure` SHALL be set to `True`
 #### Scenario: Memory guard failure NOT retried
 - **WHEN** a chunk's DataFrame exceeds `BATCH_CHUNK_MAX_MEMORY_MB`
 - **THEN** the chunk SHALL return `False` immediately without retry
 - **THEN** the query function SHALL have been called exactly once for that chunk
 #### Scenario: Redis store failure NOT retried
 - **WHEN** `redis_store_chunk()` returns `False`
 - **THEN** the chunk SHALL return `False` immediately without retry
 ### Requirement: reject_dataset_cache SHALL propagate partial failure metadata to API response
 The cache service SHALL read batch execution metadata and include partial failure information in the API response `meta` field.
 #### Scenario: Partial failure metadata included in response
 - **WHEN** `execute_primary_query()` uses the batch engine path and `get_batch_progress()` returns `has_partial_failure=True`
 - **THEN** the response `meta` dict SHALL include `has_partial_failure: true`
 - **THEN** the response `meta` dict SHALL include `failed_chunk_count` as an integer
 - **THEN** if `failed_ranges` is present, the response `meta` dict SHALL include `failed_ranges` as a list of `{start, end}` objects
 #### Scenario: Metadata read before redis_clear_batch
 - **WHEN** `execute_primary_query()` calls `get_batch_progress()`
 - **THEN** the call SHALL occur after `merge_chunks()` and before `redis_clear_batch()`
 #### Scenario: No partial failure on successful query
 - **WHEN** all chunks complete successfully
 - **THEN** the response `meta` dict SHALL NOT include `has_partial_failure`
 #### Scenario: Cache-hit path restores partial failure flag
 - **WHEN** a cached DataFrame is returned (cache hit) and a partial failure flag was stored during the original query
 - **THEN** the response `meta` dict SHALL include the same `has_partial_failure`, `failed_chunk_count`, and `failed_ranges` as the original response
 #### Scenario: Partial failure flag TTL matches data storage layer
 - **WHEN** partial failure is detected and the query result is spilled to parquet spool
 - **THEN** the partial failure flag SHALL be stored with TTL equal to `_REJECT_ENGINE_SPOOL_TTL_SECONDS` (default 21600 seconds)
 - **WHEN** partial failure is detected and the query result is stored in L1/L2 Redis cache
 - **THEN** the partial failure flag SHALL be stored with TTL equal to `_CACHE_TTL` (default 900 seconds)
--- a/openspec/specs/reject-history-api/spec.md
+++ b/openspec/specs/reject-history-api/spec.md
@@ -14,6 +14,28 @@ The API SHALL validate date parameters and basic paging bounds before executing
 - **WHEN** `end_date` is earlier than `start_date`
 - **THEN** the API SHALL return HTTP 400 and SHALL NOT run SQL queries
 #### Scenario: Date range exceeds maximum
 - **WHEN** the date range between `start_date` and `end_date` exceeds 730 days
 - **THEN** the API SHALL return HTTP 400 with error message "日期範圍不可超過 730 天"
 ### Requirement: Reject History API primary query response SHALL include partial failure metadata
 The primary query endpoint SHALL include batch execution completeness information in the response `meta` field when chunks fail during batch query execution.
 #### Scenario: Partial failure metadata in response
 - **WHEN** `POST /api/reject-history/query` completes with some chunks failing
 - **THEN** the response SHALL include `meta.has_partial_failure: true`
 - **THEN** the response SHALL include `meta.failed_chunk_count` as a positive integer
 - **THEN** the response SHALL include `meta.failed_ranges` as an array of `{start, end}` date strings (if available)
 - **THEN** the HTTP status SHALL still be 200 (data is partially available)
 #### Scenario: No partial failure metadata on full success
 - **WHEN** `POST /api/reject-history/query` completes with all chunks succeeding
 - **THEN** the response `meta` SHALL NOT include `has_partial_failure`, `failed_chunk_count`, or `failed_ranges`
 #### Scenario: Partial failure metadata preserved on cache hit
 - **WHEN** `POST /api/reject-history/query` returns cached data that originally had partial failures
 - **THEN** the response SHALL include the same `meta.has_partial_failure`, `meta.failed_chunk_count`, and `meta.failed_ranges` as the original response
 ### Requirement: Reject History API SHALL provide summary metrics endpoint
 The API SHALL provide aggregated summary metrics for the selected filter context.
--- a/openspec/specs/reject-history-page/spec.md
+++ b/openspec/specs/reject-history-page/spec.md
@@ -236,6 +236,63 @@ The page template SHALL delegate sections to focused sub-components, following t
 - **THEN** `App.vue` SHALL hold all reactive state and API logic
 - **THEN** sub-components SHALL receive data via props and communicate via events
 ### Requirement: Reject History page SHALL display partial failure warning banner
 The page SHALL display an amber warning banner when the query result contains partial failures, informing users that displayed data may be incomplete.
 #### Scenario: Warning banner displayed on partial failure
 - **WHEN** the primary query response includes `meta.has_partial_failure: true`
 - **THEN** an amber warning banner SHALL be displayed below the error banner position
 - **THEN** the warning message SHALL be in Traditional Chinese
 #### Scenario: Warning banner shows failed date ranges
 - **WHEN** `meta.failed_ranges` contains date range objects
 - **THEN** the warning banner SHALL display the specific failed date ranges (e.g., "以下日期區間的資料擷取失敗：2025-01-01 ~ 2025-01-10")
 #### Scenario: Warning banner shows generic message without ranges (container mode or missing range data)
 - **WHEN** `meta.has_partial_failure` is true but `meta.failed_ranges` is empty or absent (e.g., container-id batch query)
 - **THEN** the warning banner SHALL display a generic message with the failed chunk count (e.g., "3 個查詢批次的資料擷取失敗")
 #### Scenario: Warning banner cleared on new query
 - **WHEN** user initiates a new primary query
 - **THEN** the warning banner SHALL be cleared before the new query executes
 - **THEN** if the new query also has partial failures, the warning SHALL update with new failure information
 #### Scenario: Warning banner coexists with error banner
 - **WHEN** both an error message and a partial failure warning exist
 - **THEN** the error banner SHALL appear first, followed by the warning banner
 #### Scenario: Warning banner visual style
 - **WHEN** the warning banner is rendered
 - **THEN** it SHALL use amber/orange color scheme (background `#fffbeb`, text `#b45309`)
 - **THEN** the style SHALL be consistent with the existing `.resolution-warn` color pattern
 ### Requirement: Reject History page SHALL validate date range before query submission
 The page SHALL validate the date range on the client side before sending the API request, providing immediate feedback for invalid ranges.
 #### Scenario: Date range exceeds 730-day limit
 - **WHEN** user selects a date range exceeding 730 days and clicks "查詢"
 - **THEN** the page SHALL display an error message "查詢範圍不可超過 730 天（約兩年）"
 - **THEN** the API request SHALL NOT be sent
 #### Scenario: Missing start or end date
 - **WHEN** user clicks "查詢" without setting both start_date and end_date (in date_range mode)
 - **THEN** the page SHALL display an error message "請先設定開始與結束日期"
 - **THEN** the API request SHALL NOT be sent
 #### Scenario: End date before start date
 - **WHEN** user selects an end_date earlier than start_date
 - **THEN** the page SHALL display an error message "結束日期必須大於起始日期"
 - **THEN** the API request SHALL NOT be sent
 #### Scenario: Valid date range proceeds normally
 - **WHEN** user selects a valid date range within 730 days and clicks "查詢"
 - **THEN** no validation error SHALL be shown
 - **THEN** the API request SHALL proceed normally
 #### Scenario: Container mode skips date validation
 - **WHEN** query mode is "container" (not "date_range")
 - **THEN** date range validation SHALL be skipped
 ### Requirement: Frontend API timeout
 The reject-history page SHALL use a 360-second API timeout (up from 60 seconds) for all Oracle-backed API calls.
--- a/src/mes_dashboard/routes/reject_history_routes.py
+++ b/src/mes_dashboard/routes/reject_history_routes.py
@@ -11,6 +11,7 @@ from flask import Blueprint, Response, jsonify, request
 from mes_dashboard.core.cache import cache_get, cache_set, make_cache_key
 from mes_dashboard.core.rate_limit import configured_rate_limit
 from mes_dashboard.core.request_validation import parse_json_payload
 from mes_dashboard.core.utils import parse_bool_query
 from mes_dashboard.services.reject_dataset_cache import (
    apply_view,
@@ -344,7 +345,7 @@ def api_reject_history_reason_pareto():
                pareto_scope=pareto_scope,
                packages=_parse_multi_param("packages") or None,
                workcenter_groups=_parse_multi_param("workcenter_groups") or None,
-                reason=request.args.get("reason", "").strip() or None,
+                reasons=_parse_multi_param("reasons") or None,
                trend_dates=_parse_multi_param("trend_dates") or None,
                include_excluded_scrap=include_excluded_scrap,
                exclude_material_scrap=exclude_material_scrap,
@@ -404,7 +405,7 @@ def api_reject_history_batch_pareto():
            pareto_display_scope=pareto_display_scope,
            packages=_parse_multi_param("packages") or None,
            workcenter_groups=_parse_multi_param("workcenter_groups") or None,
-            reason=request.args.get("reason", "").strip() or None,
+            reasons=_parse_multi_param("reasons") or None,
            trend_dates=_parse_multi_param("trend_dates") or None,
            pareto_selections=_parse_multi_pareto_selections(),
            include_excluded_scrap=include_excluded_scrap,
@@ -548,7 +549,9 @@ def api_reject_history_analytics():
@reject_history_bp.route("/api/reject-history/query", methods=["POST"])
 def api_reject_history_query():
    """Primary query: execute Oracle → cache DataFrame → return results."""
-    body = request.get_json(silent=True) or {}
+    body, payload_error = parse_json_payload(require_non_empty_object=True)
    if payload_error is not None:
        return jsonify({"success": False, "error": payload_error.message}), payload_error.status_code
    mode = str(body.get("mode", "")).strip()
    if mode not in ("date_range", "container"):
@@ -599,7 +602,7 @@ def api_reject_history_view():
    page = request.args.get("page", 1, type=int) or 1
    per_page = request.args.get("per_page", 50, type=int) or 50
    metric_filter = request.args.get("metric_filter", "all").strip().lower() or "all"
-    reason = request.args.get("reason", "").strip() or None
+    reasons = _parse_multi_param("reasons") or None
    detail_reason = request.args.get("detail_reason", "").strip() or None
    pareto_selections = _parse_multi_pareto_selections()
    pareto_dimension = None
@@ -618,7 +621,7 @@ def api_reject_history_view():
            query_id=query_id,
            packages=_parse_multi_param("packages") or None,
            workcenter_groups=_parse_multi_param("workcenter_groups") or None,
-            reason=reason,
+            reasons=reasons,
            metric_filter=metric_filter,
            trend_dates=_parse_multi_param("trend_dates") or None,
            detail_reason=detail_reason,
@@ -653,7 +656,7 @@ def api_reject_history_export_cached():
        return jsonify({"success": False, "error": "缺少必要參數: query_id"}), 400
    metric_filter = request.args.get("metric_filter", "all").strip().lower() or "all"
-    reason = request.args.get("reason", "").strip() or None
+    reasons = _parse_multi_param("reasons") or None
    detail_reason = request.args.get("detail_reason", "").strip() or None
    pareto_selections = _parse_multi_pareto_selections()
    pareto_dimension = None
@@ -672,7 +675,7 @@ def api_reject_history_export_cached():
            query_id=query_id,
            packages=_parse_multi_param("packages") or None,
            workcenter_groups=_parse_multi_param("workcenter_groups") or None,
-            reason=reason,
+            reasons=reasons,
            metric_filter=metric_filter,
            trend_dates=_parse_multi_param("trend_dates") or None,
            detail_reason=detail_reason,
--- a/src/mes_dashboard/services/batch_query_engine.py
+++ b/src/mes_dashboard/services/batch_query_engine.py
@@ -56,6 +56,18 @@ from mes_dashboard.core.redis_df_store import (
 logger = logging.getLogger("mes_dashboard.batch_query_engine")
 _RETRYABLE_PATTERNS = (
    "dpy-4024",
    "ora-01013",
    "ora-03113",
    "ora-03135",
    "ora-12514",
    "ora-12541",
    "timeout",
    "timed out",
 )
 # ============================================================
 # Configuration (env-overridable)
 # ============================================================
@@ -65,7 +77,7 @@ BATCH_CHUNK_MAX_MEMORY_MB: int = int(
 )
 BATCH_QUERY_TIME_THRESHOLD_DAYS: int = int(
-    os.getenv("BATCH_QUERY_TIME_THRESHOLD_DAYS", "60")
+    os.getenv("BATCH_QUERY_TIME_THRESHOLD_DAYS", "10")
 )
 BATCH_QUERY_ID_THRESHOLD: int = int(
@@ -196,6 +208,7 @@ def _update_progress(
    failed: int,
    status: str = "running",
    has_partial_failure: bool = False,
    failed_ranges: Optional[List[Dict[str, str]]] = None,
    ttl: int = 900,
 ) -> None:
    """Write/update batch progress metadata to Redis."""
@@ -212,6 +225,10 @@ def _update_progress(
        "status": status,
        "has_partial_failure": str(has_partial_failure),
    }
    if failed_ranges is not None:
        mapping["failed_ranges"] = json.dumps(
            failed_ranges, ensure_ascii=False, default=str
        )
    try:
        client.hset(key, mapping=mapping)
        client.expire(key, ttl)
@@ -279,6 +296,7 @@ def execute_plan(
    completed = 0
    failed = 0
    has_partial_failure = False
    failed_range_list: Optional[List[Dict[str, str]]] = None
    _update_progress(
        cache_prefix, query_hash,
@@ -296,7 +314,9 @@ def execute_plan(
                _update_progress(
                    cache_prefix, query_hash,
                    total=total, completed=completed, failed=failed,
-                    has_partial_failure=has_partial_failure, ttl=chunk_ttl,
+                    has_partial_failure=has_partial_failure,
                    failed_ranges=failed_range_list,
                    ttl=chunk_ttl,
                )
                continue
            ok = _execute_single_chunk(
@@ -308,14 +328,24 @@ def execute_plan(
            else:
                failed += 1
                has_partial_failure = True
                if failed_range_list is None:
                    failed_range_list = []
                chunk_start = chunk.get("chunk_start")
                chunk_end = chunk.get("chunk_end")
                if chunk_start and chunk_end:
                    failed_range_list.append(
                        {"start": str(chunk_start), "end": str(chunk_end)}
                    )
            _update_progress(
                cache_prefix, query_hash,
                total=total, completed=completed, failed=failed,
-                has_partial_failure=has_partial_failure, ttl=chunk_ttl,
+                has_partial_failure=has_partial_failure,
                failed_ranges=failed_range_list,
                ttl=chunk_ttl,
            )
    else:
        # --- Parallel path ---
-        completed, failed, has_partial_failure = _execute_parallel(
+        completed, failed, has_partial_failure, failed_range_list = _execute_parallel(
            chunks, query_fn, cache_prefix, query_hash,
            chunk_ttl, max_rows_per_chunk, skip_cached,
            effective_parallel,
@@ -327,6 +357,7 @@ def execute_plan(
        total=total, completed=completed, failed=failed,
        status=final_status,
        has_partial_failure=has_partial_failure,
        failed_ranges=failed_range_list,
        ttl=chunk_ttl,
    )
@@ -366,53 +397,59 @@ def _execute_single_chunk(
    query_hash: str,
    chunk_ttl: int,
    max_rows_per_chunk: Optional[int],
    max_retries: int = 1,
 ) -> bool:
    """Run one chunk through *query_fn*, apply guards, store result.
    Returns True on success, False on failure.
    """
-    try:
+    attempts = max(0, int(max_retries)) + 1
-        df = query_fn(chunk, max_rows_per_chunk=max_rows_per_chunk)
+    for attempt in range(attempts):
-        if df is None:
+        try:
-            df = pd.DataFrame()
+            df = query_fn(chunk, max_rows_per_chunk=max_rows_per_chunk)
            if df is None:
                df = pd.DataFrame()
-        # ---- Memory guard ----
+            # ---- Memory guard ----
-        mem_bytes = df.memory_usage(deep=True).sum()
+            mem_bytes = df.memory_usage(deep=True).sum()
-        mem_mb = mem_bytes / (1024 * 1024)
+            mem_mb = mem_bytes / (1024 * 1024)
-        if mem_mb > BATCH_CHUNK_MAX_MEMORY_MB:
+            if mem_mb > BATCH_CHUNK_MAX_MEMORY_MB:
-            logger.warning(
+                logger.warning(
-                "Chunk %d memory %.1f MB exceeds limit %d MB — discarded",
+                    "Chunk %d memory %.1f MB exceeds limit %d MB — discarded",
-                idx, mem_mb, BATCH_CHUNK_MAX_MEMORY_MB,
+                    idx, mem_mb, BATCH_CHUNK_MAX_MEMORY_MB,
                )
                return False
            # ---- Store to Redis ----
            stored = redis_store_chunk(cache_prefix, query_hash, idx, df, ttl=chunk_ttl)
            if not stored:
                logger.warning(
                    "Chunk %d failed to persist into Redis, marking as failed", idx
                )
                return False
            logger.debug(
                "Chunk %d completed: %d rows, %.1f MB",
                idx, len(df), mem_mb,
            )
            return True
        except Exception as exc:
            should_retry = attempt < attempts - 1 and _is_retryable_error(exc)
            if should_retry:
                logger.warning(
                    "Chunk %d transient failure on attempt %d/%d: %s; retrying",
                    idx,
                    attempt + 1,
                    attempts,
                    exc,
                )
                continue
            logger.error(
                "Chunk %d failed: %s", idx, exc, exc_info=True,
            )
            return False
-
+    return False
        # ---- Truncation flag ----
        truncated = (
            max_rows_per_chunk is not None
            and len(df) == max_rows_per_chunk
        )
        if truncated:
            logger.info("Chunk %d returned exactly max_rows_per_chunk=%d (truncated)", idx, max_rows_per_chunk)
        # ---- Store to Redis ----
        stored = redis_store_chunk(cache_prefix, query_hash, idx, df, ttl=chunk_ttl)
        if not stored:
            logger.warning(
                "Chunk %d failed to persist into Redis, marking as failed", idx
            )
            return False
        logger.debug(
            "Chunk %d completed: %d rows, %.1f MB",
            idx, len(df), mem_mb,
        )
        return True
    except Exception as exc:
        logger.error(
            "Chunk %d failed: %s", idx, exc, exc_info=True,
        )
        return False
 def _execute_parallel(
@@ -427,12 +464,13 @@ def _execute_parallel(
 ) -> tuple:
    """Execute chunks in parallel via ThreadPoolExecutor.
-    Returns (completed, failed, has_partial_failure).
+    Returns (completed, failed, has_partial_failure, failed_ranges).
    """
    total = len(chunks)
    completed = 0
    failed = 0
    has_partial_failure = False
    failed_range_list: Optional[List[Dict[str, str]]] = None
    futures = {}
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
@@ -445,10 +483,10 @@ def _execute_parallel(
                idx, chunk, query_fn,
                cache_prefix, query_hash, chunk_ttl, max_rows_per_chunk,
            )
-            futures[future] = idx
+            futures[future] = (idx, chunk)
        for future in as_completed(futures):
-            idx = futures[future]
+            idx, chunk = futures[future]
            try:
                ok = future.result()
                if ok:
@@ -456,18 +494,46 @@ def _execute_parallel(
                else:
                    failed += 1
                    has_partial_failure = True
                    if failed_range_list is None:
                        failed_range_list = []
                    chunk_start = chunk.get("chunk_start")
                    chunk_end = chunk.get("chunk_end")
                    if chunk_start and chunk_end:
                        failed_range_list.append(
                            {"start": str(chunk_start), "end": str(chunk_end)}
                        )
            except Exception as exc:
                logger.error("Chunk %d future error: %s", idx, exc)
                failed += 1
                has_partial_failure = True
                if failed_range_list is None:
                    failed_range_list = []
                chunk_start = chunk.get("chunk_start")
                chunk_end = chunk.get("chunk_end")
                if chunk_start and chunk_end:
                    failed_range_list.append(
                        {"start": str(chunk_start), "end": str(chunk_end)}
                    )
            _update_progress(
                cache_prefix, query_hash,
                total=total, completed=completed, failed=failed,
-                has_partial_failure=has_partial_failure, ttl=chunk_ttl,
+                has_partial_failure=has_partial_failure,
                failed_ranges=failed_range_list,
                ttl=chunk_ttl,
            )
-    return completed, failed, has_partial_failure
+    return completed, failed, has_partial_failure, failed_range_list
 def _is_retryable_error(exc: Exception) -> bool:
    """Return True for transient Oracle/network timeout errors."""
    if isinstance(exc, (TimeoutError, ConnectionError, OSError)):
        return True
    text = str(exc).strip().lower()
    if not text:
        return False
    return any(pattern in text for pattern in _RETRYABLE_PATTERNS)
 # ============================================================
--- a/src/mes_dashboard/services/container_resolution_policy.py
+++ b/src/mes_dashboard/services/container_resolution_policy.py
@@ -0,0 +1,152 @@
 # -*- coding: utf-8 -*-
 """Shared guardrails for LOT/WAFER/工單 container resolution."""
 from __future__ import annotations
 import os
 from typing import Any, Dict, Iterable, List, Optional
 def _env_int(name: str, default: int) -> int:
    raw = os.getenv(name)
    if raw is None:
        return int(default)
    try:
        return int(raw)
    except (TypeError, ValueError):
        return int(default)
 def _normalize_wildcard_token(value: str) -> str:
    return str(value or "").replace("*", "%")
 def _is_pattern_token(value: str) -> bool:
    token = _normalize_wildcard_token(value)
    return "%" in token or "_" in token
 def _literal_prefix_before_wildcard(value: str) -> str:
    token = _normalize_wildcard_token(value)
    for idx, ch in enumerate(token):
        if ch in ("%", "_"):
            return token[:idx]
    return token
 def normalize_input_values(values: Iterable[Any]) -> List[str]:
    normalized: List[str] = []
    seen = set()
    for raw in values or []:
        token = str(raw or "").strip()
        if not token or token in seen:
            continue
        seen.add(token)
        normalized.append(token)
    return normalized
 def validate_resolution_request(input_type: str, values: Iterable[Any]) -> Optional[str]:
    """Validate resolver request without hard-capping raw input count."""
    tokens = normalize_input_values(values)
    if not tokens:
        return "請輸入至少一個查詢條件"
    # Compatibility switch. Default 0 means "no count cap".
    max_values = max(_env_int("CONTAINER_RESOLVE_INPUT_MAX_VALUES", 0), 0)
    if max_values and len(tokens) > max_values:
        return f"輸入數量超過上限 ({max_values} 筆)"
    # Wildcard safety: avoid full-table scans like "%" or "_".
    min_prefix_len = max(_env_int("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", 2), 0)
    if min_prefix_len > 0:
        invalid_patterns: List[str] = []
        for token in tokens:
            if not _is_pattern_token(token):
                continue
            if len(_literal_prefix_before_wildcard(token).strip()) < min_prefix_len:
                invalid_patterns.append(token)
        if invalid_patterns:
            sample = ", ".join(invalid_patterns[:3])
            suffix = "..." if len(invalid_patterns) > 3 else ""
            return (
                f"{input_type} 萬用字元條件過於寬鬆（需至少 {min_prefix_len} 碼前綴）: "
                f"{sample}{suffix}"
            )
    return None
 def extract_container_ids(rows: Iterable[Dict[str, Any]]) -> List[str]:
    ids: List[str] = []
    seen = set()
    for row in rows or []:
        cid = str(
            row.get("container_id")
            or row.get("CONTAINERID")
            or ""
        ).strip()
        if not cid or cid in seen:
            continue
        seen.add(cid)
        ids.append(cid)
    return ids
 def assess_resolution_result(result: Dict[str, Any]) -> Dict[str, Any]:
    """Assess expansion result against guardrails."""
    expansion_info = result.get("expansion_info") or {}
    max_expand_per_token = max(
        _env_int("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", 2000),
        1,
    )
    offenders: List[Dict[str, Any]] = []
    for token, count in expansion_info.items():
        try:
            c = int(count)
        except (TypeError, ValueError):
            continue
        if c > max_expand_per_token:
            offenders.append({"token": str(token), "count": c})
    unique_ids = extract_container_ids(result.get("data") or [])
    max_container_ids = max(
        _env_int("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", 30000),
        1,
    )
    return {
        "max_expansion_per_token": max_expand_per_token,
        "expansion_offenders": offenders,
        "max_container_ids": max_container_ids,
        "resolved_container_ids": len(unique_ids),
        "over_container_limit": len(unique_ids) > max_container_ids,
    }
 def validate_resolution_result(
    result: Dict[str, Any],
    *,
    strict: bool = True,
 ) -> Optional[str]:
    """Validate expansion result guardrails.
    strict=True: exceed guardrail -> return error message.
    strict=False: exceed guardrail -> allow caller to continue (split/decompose path).
    """
    assessment = assess_resolution_result(result)
    offenders = assessment.get("expansion_offenders") or []
    if offenders and strict:
        first = offenders[0]
        token = str(first.get("token") or "")
        count = int(first.get("count") or 0)
        return (
            f"單一條件展開過大 ({count} 筆，限制 {assessment['max_expansion_per_token']})，"
            f"請縮小範圍: {token}"
        )
    if bool(assessment.get("over_container_limit")) and strict:
        return (
            f"解析結果過大（{assessment['resolved_container_ids']} 筆 CONTAINERID，限制 {assessment['max_container_ids']}）"
            "，請縮小查詢條件"
        )
    return None
--- a/src/mes_dashboard/services/event_fetcher.py
+++ b/src/mes_dashboard/services/event_fetcher.py
@@ -21,6 +21,10 @@ logger = logging.getLogger("mes_dashboard.event_fetcher")
 ORACLE_IN_BATCH_SIZE = 1000
 EVENT_FETCHER_MAX_WORKERS = int(os.getenv('EVENT_FETCHER_MAX_WORKERS', '2'))
 CACHE_SKIP_CID_THRESHOLD = int(os.getenv('EVENT_FETCHER_CACHE_SKIP_CID_THRESHOLD', '10000'))
 EVENT_FETCHER_ALLOW_PARTIAL_RESULTS = (
    os.getenv('EVENT_FETCHER_ALLOW_PARTIAL_RESULTS', 'false').strip().lower()
    in {'1', 'true', 'yes', 'on'}
 )
 _DOMAIN_SPECS: Dict[str, Dict[str, Any]] = {
    "history": {
@@ -280,16 +284,23 @@ class EventFetcher:
            for batch in batches:
                _fetch_and_group_batch(batch)
        else:
            failures = []
            with ThreadPoolExecutor(max_workers=min(len(batches), EVENT_FETCHER_MAX_WORKERS)) as executor:
                futures = {executor.submit(_fetch_and_group_batch, b): b for b in batches}
                for future in as_completed(futures):
                    try:
                        future.result()
-                    except Exception:
+                    except Exception as exc:
                        failures.append((futures[future], exc))
                        logger.error(
                            "EventFetcher batch query failed domain=%s batch_size=%s",
                            domain, len(futures[future]), exc_info=True,
                        )
            if failures and not EVENT_FETCHER_ALLOW_PARTIAL_RESULTS:
                failed_cids = sum(len(batch) for batch, _ in failures)
                raise RuntimeError(
                    f"EventFetcher chunk failed (domain={domain}, failed_chunks={len(failures)}, failed_cids={failed_cids})"
                )
        result = dict(grouped)
        del grouped
--- a/src/mes_dashboard/services/job_query_service.py
+++ b/src/mes_dashboard/services/job_query_service.py
@@ -150,7 +150,7 @@ def get_jobs_by_resources(
 ) -> Dict[str, Any]:
    """Query jobs for selected resources within date range.
-    For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 60),
+    For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 10),
    the query is decomposed into monthly chunks via BatchQueryEngine.
    Results are cached in Redis to avoid redundant Oracle queries.
--- a/src/mes_dashboard/services/mid_section_defect_service.py
+++ b/src/mes_dashboard/services/mid_section_defect_service.py
@@ -863,7 +863,7 @@ def _fetch_station_detection_data(
 ) -> Optional[pd.DataFrame]:
    """Execute station_detection.sql and return raw DataFrame.
-    For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 60),
+    For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 10),
    the query is decomposed into monthly chunks via BatchQueryEngine to
    prevent Oracle timeout on high-volume stations.
    """
--- a/src/mes_dashboard/services/query_tool_service.py
+++ b/src/mes_dashboard/services/query_tool_service.py
@@ -26,9 +26,15 @@ from typing import Any, Dict, List, Optional, Generator, Iterable, Tuple
 import pandas as pd
-from mes_dashboard.core.database import read_sql_df
+from mes_dashboard.core.database import read_sql_df
-from mes_dashboard.sql import QueryBuilder, SQLLoader
+from mes_dashboard.sql import QueryBuilder, SQLLoader
-from mes_dashboard.services.event_fetcher import EventFetcher
+from mes_dashboard.services.container_resolution_policy import (
    assess_resolution_result,
    normalize_input_values,
    validate_resolution_request,
    validate_resolution_result,
 )
 from mes_dashboard.services.event_fetcher import EventFetcher
 try:
    from mes_dashboard.core.database import read_sql_df_slow
@@ -89,7 +95,7 @@ def validate_date_range(start_date: str, end_date: str, max_days: int = MAX_DATE
        return f'日期格式錯誤: {e}'
-def validate_lot_input(input_type: str, values: List[str]) -> Optional[str]:
+def validate_lot_input(input_type: str, values: List[str]) -> Optional[str]:
    """Validate LOT input based on type.
    Args:
@@ -99,23 +105,7 @@ def validate_lot_input(input_type: str, values: List[str]) -> Optional[str]:
    Returns:
        Error message if validation fails, None if valid.
    """
-    if not values:
+    return validate_resolution_request(input_type, values)
        return '請輸入至少一個查詢條件'
    limits = {
        'lot_id': MAX_LOT_IDS,
        'wafer_lot': MAX_LOT_IDS,
        'gd_lot_id': MAX_LOT_IDS,
        'serial_number': MAX_SERIAL_NUMBERS,
        'work_order': MAX_WORK_ORDERS,
        'gd_work_order': MAX_GD_WORK_ORDERS,
    }
    limit = limits.get(input_type, MAX_LOT_IDS)
    if len(values) > limit:
        return f'輸入數量超過上限 ({limit} 筆)'
    return None
 def validate_equipment_input(equipment_ids: List[str]) -> Optional[str]:
@@ -344,27 +334,50 @@ def resolve_lots(input_type: str, values: List[str]) -> Dict[str, Any]:
        return {'error': validation_error}
    # Clean values
-    cleaned = [v.strip() for v in values if v.strip()]
+    cleaned = normalize_input_values(values)
-    if not cleaned:
+    if not cleaned:
-        return {'error': '請輸入有效的查詢條件'}
+        return {'error': '請輸入有效的查詢條件'}
    try:
-        if input_type == 'lot_id':
+        if input_type == 'lot_id':
-            return _resolve_by_lot_id(cleaned)
+            result = _resolve_by_lot_id(cleaned)
-        elif input_type == 'wafer_lot':
+        elif input_type == 'wafer_lot':
-            return _resolve_by_wafer_lot(cleaned)
+            result = _resolve_by_wafer_lot(cleaned)
-        elif input_type == 'gd_lot_id':
+        elif input_type == 'gd_lot_id':
-            return _resolve_by_gd_lot_id(cleaned)
+            result = _resolve_by_gd_lot_id(cleaned)
-        elif input_type == 'serial_number':
+        elif input_type == 'serial_number':
-            return _resolve_by_serial_number(cleaned)
+            result = _resolve_by_serial_number(cleaned)
-        elif input_type == 'work_order':
+        elif input_type == 'work_order':
-            return _resolve_by_work_order(cleaned)
+            result = _resolve_by_work_order(cleaned)
-        elif input_type == 'gd_work_order':
+        elif input_type == 'gd_work_order':
-            return _resolve_by_gd_work_order(cleaned)
+            result = _resolve_by_gd_work_order(cleaned)
-        else:
+        else:
-            return {'error': f'不支援的輸入類型: {input_type}'}
+            return {'error': f'不支援的輸入類型: {input_type}'}
-
+
-    except Exception as exc:
+        guard_assessment = assess_resolution_result(result)
        overflow_tokens = guard_assessment.get("expansion_offenders") or []
        overflow_total = bool(guard_assessment.get("over_container_limit"))
        if overflow_tokens or overflow_total:
            logger.warning(
                "Resolution guardrail overflow (input_type=%s, offenders=%s, resolved=%s, max=%s); continuing with decompose path",
                input_type,
                len(overflow_tokens),
                guard_assessment.get("resolved_container_ids"),
                guard_assessment.get("max_container_ids"),
            )
            result["guardrail"] = {
                "overflow": True,
                "expansion_offenders": overflow_tokens,
                "resolved_container_ids": guard_assessment.get("resolved_container_ids"),
                "max_container_ids": guard_assessment.get("max_container_ids"),
            }
        # Keep compatibility: validation API remains available for strict call sites.
        guard_error = validate_resolution_result(result, strict=False)
        if guard_error:
            return {'error': guard_error}
        return result
    except Exception as exc:
        logger.error(f"LOT resolution failed: {exc}")
        return {'error': f'解析失敗: {str(exc)}'}
--- a/src/mes_dashboard/services/reject_dataset_cache.py
+++ b/src/mes_dashboard/services/reject_dataset_cache.py
--- a/src/mes_dashboard/sql/reject_history/performance_daily.sql
+++ b/src/mes_dashboard/sql/reject_history/performance_daily.sql
@@ -30,6 +30,31 @@ WITH spec_map AS (
    WHERE SPEC IS NOT NULL
    GROUP BY SPEC
 ),
 reject_scope AS (
    SELECT DISTINCT
        r.WIPTRACKINGGROUPKEYID
    FROM DWH.DW_MES_LOTREJECTHISTORY r
    WHERE {{ BASE_WHERE }}
      AND r.WIPTRACKINGGROUPKEYID IS NOT NULL
 ),
 wip_workflow_map AS (
    SELECT
        WIPTRACKINGGROUPKEYID,
        WORKFLOWNAME
    FROM (
        SELECT
            lwh.WIPTRACKINGGROUPKEYID,
            lwh.WORKFLOWNAME,
            ROW_NUMBER() OVER (
                PARTITION BY lwh.WIPTRACKINGGROUPKEYID
                ORDER BY lwh.MOVEOUTTIMESTAMP DESC NULLS LAST
            ) AS rn
        FROM DWH.DW_MES_LOTWIPHISTORY lwh
        INNER JOIN reject_scope rs
            ON rs.WIPTRACKINGGROUPKEYID = lwh.WIPTRACKINGGROUPKEYID
    )
    WHERE rn = 1
 ),
 reject_raw AS (
    SELECT
        TRUNC(r.TXNDATE) AS TXN_DAY,
@@ -105,7 +130,7 @@ reject_raw AS (
    FROM DWH.DW_MES_LOTREJECTHISTORY r
    LEFT JOIN DWH.DW_MES_CONTAINER c
      ON c.CONTAINERID = r.CONTAINERID
-    LEFT JOIN DWH.DW_MES_LOTWIPHISTORY lwh
+    LEFT JOIN wip_workflow_map lwh
      ON lwh.WIPTRACKINGGROUPKEYID = r.WIPTRACKINGGROUPKEYID
    LEFT JOIN spec_map sm
      ON sm.SPEC = TRIM(r.SPECNAME)
--- a/src/mes_dashboard/sql/reject_history/performance_daily_lot.sql
+++ b/src/mes_dashboard/sql/reject_history/performance_daily_lot.sql
@@ -6,8 +6,8 @@
 --   :end_date   - End date (YYYY-MM-DD)
 WITH spec_map AS (
-    SELECT
+    SELECT
-        SPEC,
+        SPEC,
        MIN(WORK_CENTER) KEEP (
            DENSE_RANK FIRST ORDER BY WORKCENTERSEQUENCE_GROUP
        ) AS WORK_CENTER,
@@ -15,9 +15,34 @@ WITH spec_map AS (
            DENSE_RANK FIRST ORDER BY WORKCENTERSEQUENCE_GROUP
        ) AS WORKCENTER_GROUP,
        MIN(WORKCENTERSEQUENCE_GROUP) AS WORKCENTERSEQUENCE_GROUP
-    FROM DWH.DW_MES_SPEC_WORKCENTER_V
+    FROM DWH.DW_MES_SPEC_WORKCENTER_V
-    WHERE SPEC IS NOT NULL
+    WHERE SPEC IS NOT NULL
-    GROUP BY SPEC
+    GROUP BY SPEC
 ),
 reject_scope AS (
    SELECT DISTINCT
        r.WIPTRACKINGGROUPKEYID
    FROM DWH.DW_MES_LOTREJECTHISTORY r
    WHERE {{ BASE_WHERE }}
      AND r.WIPTRACKINGGROUPKEYID IS NOT NULL
 ),
 wip_workflow_map AS (
    SELECT
        WIPTRACKINGGROUPKEYID,
        WORKFLOWNAME
    FROM (
        SELECT
            lwh.WIPTRACKINGGROUPKEYID,
            lwh.WORKFLOWNAME,
            ROW_NUMBER() OVER (
                PARTITION BY lwh.WIPTRACKINGGROUPKEYID
                ORDER BY lwh.MOVEOUTTIMESTAMP DESC NULLS LAST
            ) AS rn
        FROM DWH.DW_MES_LOTWIPHISTORY lwh
        INNER JOIN reject_scope rs
            ON rs.WIPTRACKINGGROUPKEYID = lwh.WIPTRACKINGGROUPKEYID
    )
    WHERE rn = 1
 ),
 reject_raw AS (
    SELECT
@@ -99,7 +124,7 @@ reject_raw AS (
    FROM DWH.DW_MES_LOTREJECTHISTORY r
    LEFT JOIN DWH.DW_MES_CONTAINER c
      ON c.CONTAINERID = r.CONTAINERID
-    LEFT JOIN DWH.DW_MES_LOTWIPHISTORY lwh
+    LEFT JOIN wip_workflow_map lwh
      ON lwh.WIPTRACKINGGROUPKEYID = r.WIPTRACKINGGROUPKEYID
    LEFT JOIN spec_map sm
      ON sm.SPEC = TRIM(r.SPECNAME)
--- a/tests/test_batch_query_engine.py
+++ b/tests/test_batch_query_engine.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 """Unit tests for BatchQueryEngine module."""
 import json
 import pytest
 from unittest.mock import patch, MagicMock, call
@@ -482,8 +483,8 @@ class TestChunkFailureResilience:
                skip_cached=False,
            )
-        # All 3 chunks attempted
+        # One chunk retried once due retryable timeout pattern.
-        assert call_count["n"] == 3
+        assert call_count["n"] == 4
        # Final metadata should reflect partial failure
        last = hset_calls[-1]
        assert last["status"] == "partial"
@@ -567,10 +568,147 @@ class TestShouldDecompose:
        assert should_decompose_by_time("2025-01-01", "2025-12-31")
    def test_short_range_false(self):
-        assert not should_decompose_by_time("2025-01-01", "2025-02-01")
+        assert not should_decompose_by_time("2025-01-01", "2025-01-11")
    def test_large_ids_true(self):
        assert should_decompose_by_ids(list(range(2000)))
    def test_small_ids_false(self):
        assert not should_decompose_by_ids(list(range(500)))
 class TestRetryAndFailedRanges:
    def _mock_redis(self):
        mock_client = MagicMock()
        stored = {}
        hashes = {}
        mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
        mock_client.get.side_effect = lambda k: stored.get(k)
        mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
        mock_client.hset.side_effect = lambda k, mapping=None: hashes.setdefault(k, {}).update(mapping or {})
        mock_client.hgetall.side_effect = lambda k: hashes.get(k, {})
        mock_client.expire.return_value = None
        return mock_client
    def test_transient_failure_retried_once(self):
        import mes_dashboard.core.redis_df_store as rds
        import mes_dashboard.services.batch_query_engine as bqe
        mock_client = self._mock_redis()
        call_count = {"n": 0}
        def flaky_query_fn(chunk, max_rows_per_chunk=None):
            call_count["n"] += 1
            if call_count["n"] == 1:
                raise TimeoutError("connection timed out")
            return pd.DataFrame({"V": [1]})
        with patch.object(rds, "REDIS_ENABLED", True), \
             patch.object(rds, "get_redis_client", return_value=mock_client), \
             patch.object(bqe, "get_redis_client", return_value=mock_client):
            execute_plan(
                [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
                flaky_query_fn,
                query_hash="retryonce",
                cache_prefix="retry",
                skip_cached=False,
            )
            progress = bqe.get_batch_progress("retry", "retryonce")
        assert call_count["n"] == 2
        assert progress is not None
        assert progress.get("status") == "completed"
        assert progress.get("failed") == "0"
    def test_memory_guard_not_retried(self):
        import mes_dashboard.core.redis_df_store as rds
        import mes_dashboard.services.batch_query_engine as bqe
        mock_client = self._mock_redis()
        call_count = {"n": 0}
        def large_df_query_fn(chunk, max_rows_per_chunk=None):
            call_count["n"] += 1
            return pd.DataFrame({"V": [1]})
        with patch.object(rds, "REDIS_ENABLED", True), \
             patch.object(rds, "get_redis_client", return_value=mock_client), \
             patch.object(bqe, "get_redis_client", return_value=mock_client), \
             patch.object(bqe, "BATCH_CHUNK_MAX_MEMORY_MB", 0):
            execute_plan(
                [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
                large_df_query_fn,
                query_hash="memnoretry",
                cache_prefix="retry",
                skip_cached=False,
            )
        assert call_count["n"] == 1
    def test_failed_ranges_tracked(self):
        import mes_dashboard.core.redis_df_store as rds
        import mes_dashboard.services.batch_query_engine as bqe
        mock_client = self._mock_redis()
        def query_fn(chunk, max_rows_per_chunk=None):
            if chunk["chunk_start"] == "2025-01-11":
                raise RuntimeError("chunk failure")
            return pd.DataFrame({"V": [1]})
        chunks = [
            {"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"},
            {"chunk_start": "2025-01-11", "chunk_end": "2025-01-20"},
            {"chunk_start": "2025-01-21", "chunk_end": "2025-01-30"},
        ]
        with patch.object(rds, "REDIS_ENABLED", True), \
             patch.object(rds, "get_redis_client", return_value=mock_client), \
             patch.object(bqe, "get_redis_client", return_value=mock_client):
            execute_plan(
                chunks,
                query_fn,
                query_hash="franges",
                cache_prefix="retry",
                skip_cached=False,
            )
            progress = bqe.get_batch_progress("retry", "franges")
        assert progress is not None
        assert progress.get("has_partial_failure") == "True"
        assert progress.get("failed") == "1"
        failed_ranges = json.loads(progress.get("failed_ranges", "[]"))
        assert failed_ranges == [{"start": "2025-01-11", "end": "2025-01-20"}]
    def test_id_batch_chunk_no_failed_ranges(self):
        import mes_dashboard.core.redis_df_store as rds
        import mes_dashboard.services.batch_query_engine as bqe
        mock_client = self._mock_redis()
        def query_fn(chunk, max_rows_per_chunk=None):
            if chunk.get("ids") == ["B"]:
                raise RuntimeError("id chunk failed")
            return pd.DataFrame({"V": [1]})
        chunks = [
            {"ids": ["A"]},
            {"ids": ["B"]},
        ]
        with patch.object(rds, "REDIS_ENABLED", True), \
             patch.object(rds, "get_redis_client", return_value=mock_client), \
             patch.object(bqe, "get_redis_client", return_value=mock_client):
            execute_plan(
                chunks,
                query_fn,
                query_hash="idfail",
                cache_prefix="retry",
                skip_cached=False,
            )
            progress = bqe.get_batch_progress("retry", "idfail")
        assert progress is not None
        assert progress.get("has_partial_failure") == "True"
        assert progress.get("failed") == "1"
        failed_ranges = json.loads(progress.get("failed_ranges", "[]"))
        assert failed_ranges == []
--- a/tests/test_container_resolution_policy.py
+++ b/tests/test_container_resolution_policy.py
@@ -0,0 +1,73 @@
 # -*- coding: utf-8 -*-
 """Unit tests for shared container resolution policy helpers."""
 from __future__ import annotations
 from mes_dashboard.services import container_resolution_policy as policy
 def test_validate_resolution_request_rejects_empty_values():
    assert policy.validate_resolution_request("lot_id", []) is not None
 def test_validate_resolution_request_rejects_broad_pattern(monkeypatch):
    monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
    error = policy.validate_resolution_request("lot_id", ["%"])
    assert error is not None
    assert "萬用字元條件過於寬鬆" in error
 def test_validate_resolution_request_allows_pattern_with_prefix(monkeypatch):
    monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
    error = policy.validate_resolution_request("lot_id", ["GA26%"])
    assert error is None
 def test_validate_resolution_result_rejects_excessive_expansion(monkeypatch):
    monkeypatch.setenv("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", "3")
    result = {
        "data": [{"container_id": "C1"}],
        "expansion_info": {"GA%": 10},
    }
    error = policy.validate_resolution_result(result)
    assert error is not None
    assert "單一條件展開過大" in error
 def test_validate_resolution_result_rejects_excessive_container_count(monkeypatch):
    monkeypatch.setenv("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", "2")
    result = {
        "data": [
            {"container_id": "C1"},
            {"container_id": "C2"},
            {"container_id": "C3"},
        ],
        "expansion_info": {},
    }
    error = policy.validate_resolution_result(result)
    assert error is not None
    assert "解析結果過大" in error
 def test_validate_resolution_result_non_strict_allows_overflow(monkeypatch):
    monkeypatch.setenv("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", "2")
    result = {
        "data": [
            {"container_id": "C1"},
            {"container_id": "C2"},
            {"container_id": "C3"},
        ],
        "expansion_info": {"GA%": 999},
    }
    error = policy.validate_resolution_result(result, strict=False)
    assert error is None
 def test_extract_container_ids_deduplicates_and_preserves_order():
    rows = [
        {"container_id": "C1"},
        {"container_id": "C1"},
        {"CONTAINERID": "C2"},
        {"container_id": "C3"},
    ]
    assert policy.extract_container_ids(rows) == ["C1", "C2", "C3"]
--- a/tests/test_event_fetcher.py
+++ b/tests/test_event_fetcher.py
@@ -198,3 +198,60 @@ def test_fetch_events_sanitizes_nan_values(
    result = EventFetcher.fetch_events(["CID-1"], "upstream_history")
    assert result["CID-1"][0]["VALUE"] is None
@patch("mes_dashboard.services.event_fetcher.cache_set")
@patch("mes_dashboard.services.event_fetcher.cache_get", return_value=None)
@patch("mes_dashboard.services.event_fetcher.read_sql_df_slow_iter")
@patch("mes_dashboard.services.event_fetcher.SQLLoader.load")
 def test_fetch_events_raises_when_parallel_batch_fails_and_partial_disabled(
    mock_sql_load,
    mock_iter,
    _mock_cache_get,
    _mock_cache_set,
    monkeypatch,
 ):
    mock_sql_load.return_value = "SELECT * FROM t WHERE h.CONTAINERID = :container_id {{ WORKCENTER_FILTER }}"
    monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_ALLOW_PARTIAL_RESULTS", False)
    monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_MAX_WORKERS", 2)
    def _side_effect(sql, params, timeout_seconds=60):
        if "CID-1000" in params.values():
            raise RuntimeError("chunk fail")
        return iter([])
    mock_iter.side_effect = _side_effect
    cids = [f"CID-{i}" for i in range(1001)]  # force >1 batch
    try:
        EventFetcher.fetch_events(cids, "history")
        assert False, "expected RuntimeError"
    except RuntimeError as exc:
        assert "chunk failed" in str(exc)
@patch("mes_dashboard.services.event_fetcher.cache_set")
@patch("mes_dashboard.services.event_fetcher.cache_get", return_value=None)
@patch("mes_dashboard.services.event_fetcher.read_sql_df_slow_iter")
@patch("mes_dashboard.services.event_fetcher.SQLLoader.load")
 def test_fetch_events_allows_partial_when_enabled(
    mock_sql_load,
    mock_iter,
    _mock_cache_get,
    _mock_cache_set,
    monkeypatch,
 ):
    mock_sql_load.return_value = "SELECT * FROM t WHERE h.CONTAINERID = :container_id {{ WORKCENTER_FILTER }}"
    monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_ALLOW_PARTIAL_RESULTS", True)
    monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_MAX_WORKERS", 2)
    def _side_effect(sql, params, timeout_seconds=60):
        if "CID-1000" in params.values():
            raise RuntimeError("chunk fail")
        return iter([])
    mock_iter.side_effect = _side_effect
    cids = [f"CID-{i}" for i in range(1001)]
    result = EventFetcher.fetch_events(cids, "history")
    assert result == {}
--- a/tests/test_job_query_engine.py
+++ b/tests/test_job_query_engine.py
@@ -77,7 +77,7 @@ class TestJobQueryEngineDecomposition:
        result = job_svc.get_jobs_by_resources(
            resource_ids=["R1"],
            start_date="2025-06-01",
-            end_date="2025-06-30",
+            end_date="2025-06-05",
        )
        assert engine_calls["execute"] == 0  # Engine NOT used
--- a/tests/test_job_query_service.py
+++ b/tests/test_job_query_service.py
@@ -191,7 +191,7 @@ class TestErrorLeakageProtection:
    def test_query_error_masks_internal_details(self, mock_read):
        mock_read.side_effect = RuntimeError("ORA-00942: table or view does not exist")
-        result = get_jobs_by_resources(["RES001"], "2024-01-01", "2024-01-31")
+        result = get_jobs_by_resources(["RES001"], "2024-01-01", "2024-01-05")
        assert result["error"] == QUERY_ERROR_MESSAGE
        assert "ORA-00942" not in result["error"]
--- a/tests/test_mid_section_defect_engine.py
+++ b/tests/test_mid_section_defect_engine.py
@@ -85,7 +85,7 @@ class TestDetectionEngineDecomposition:
        df = msd_svc._fetch_station_detection_data(
            start_date="2025-06-01",
-            end_date="2025-06-30",
+            end_date="2025-06-05",
            station="測試",
        )
--- a/tests/test_query_tool_routes.py
+++ b/tests/test_query_tool_routes.py
@@ -14,7 +14,7 @@ from unittest.mock import patch, MagicMock
 from mes_dashboard import create_app
 from mes_dashboard.core.cache import NoOpCache
 from mes_dashboard.core.rate_limit import reset_rate_limits_for_tests
-from mes_dashboard.services.query_tool_service import MAX_DATE_RANGE_DAYS, MAX_LOT_IDS
+from mes_dashboard.services.query_tool_service import MAX_DATE_RANGE_DAYS
@pytest.fixture
@@ -118,20 +118,19 @@ class TestResolveEndpoint:
        data = json.loads(response.data)
        assert 'error' in data
-    def test_values_over_limit(self, client):
+    def test_rejects_too_broad_wildcard(self, client):
-        """Should reject values exceeding limit."""
+        """Should reject wildcard patterns that are too broad."""
        values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS + 1)]
        response = client.post(
            '/api/query-tool/resolve',
            json={
                'input_type': 'lot_id',
-                'values': values
+                'values': ['%']
-            }
+            }
-        )
+        )
        assert response.status_code == 400
        data = json.loads(response.data)
        assert 'error' in data
-        assert '超過上限' in data['error'] or str(MAX_LOT_IDS) in data['error']
+        assert '萬用字元條件過於寬鬆' in data['error']
    @patch('mes_dashboard.routes.query_tool_routes.resolve_lots')
    def test_resolve_success(self, mock_resolve, client):
--- a/tests/test_query_tool_service.py
+++ b/tests/test_query_tool_service.py
@@ -90,7 +90,7 @@ class TestValidateDateRange:
        assert '格式' in result or 'format' in result.lower()
-class TestValidateLotInput:
+class TestValidateLotInput:
    """Tests for validate_lot_input function."""
    def test_valid_lot_ids(self):
@@ -117,53 +117,24 @@ class TestValidateLotInput:
        assert result is not None
        assert '至少一個' in result
-    def test_exceeds_lot_id_limit(self):
+    def test_large_input_list_allowed_when_no_count_cap(self, monkeypatch):
-        """Should reject LOT IDs exceeding limit."""
+        """Should allow large lists when count cap is disabled."""
-        values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS + 1)]
+        monkeypatch.setenv("CONTAINER_RESOLVE_INPUT_MAX_VALUES", "0")
-        result = validate_lot_input('lot_id', values)
+        values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS + 50)]
-        assert result is not None
+        result = validate_lot_input('lot_id', values)
-        assert '超過上限' in result
+        assert result is None
        assert str(MAX_LOT_IDS) in result
    def test_exceeds_serial_number_limit(self):
        """Should reject serial numbers exceeding limit."""
        values = [f'SN{i:06d}' for i in range(MAX_SERIAL_NUMBERS + 1)]
        result = validate_lot_input('serial_number', values)
        assert result is not None
        assert '超過上限' in result
        assert str(MAX_SERIAL_NUMBERS) in result
    def test_exceeds_work_order_limit(self):
        """Should reject work orders exceeding limit."""
        values = [f'WO{i:06d}' for i in range(MAX_WORK_ORDERS + 1)]
        result = validate_lot_input('work_order', values)
        assert result is not None
        assert '超過上限' in result
        assert str(MAX_WORK_ORDERS) in result
-    def test_exceeds_gd_work_order_limit(self):
+    def test_rejects_too_broad_wildcard_pattern(self, monkeypatch):
-        """Should reject GD work orders exceeding limit."""
+        """Should reject broad wildcard like '%' to prevent full scan."""
-        values = [f'GD{i:06d}' for i in range(MAX_GD_WORK_ORDERS + 1)]
+        monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
-        result = validate_lot_input('gd_work_order', values)
+        result = validate_lot_input('lot_id', ['%'])
        assert result is not None
-        assert '超過上限' in result
+        assert '萬用字元條件過於寬鬆' in result
-        assert str(MAX_GD_WORK_ORDERS) in result
+
-
+    def test_accepts_wildcard_with_prefix(self, monkeypatch):
-    def test_exactly_at_limit(self):
+        monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
-        """Should accept values exactly at limit."""
+        result = validate_lot_input('lot_id', ['GA25%'])
-        values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS)]
+        assert result is None
        result = validate_lot_input('lot_id', values)
        assert result is None
    def test_unknown_input_type_uses_default_limit(self):
        """Should use default limit for unknown input types."""
        values = [f'X{i}' for i in range(MAX_LOT_IDS)]
        result = validate_lot_input('unknown_type', values)
        assert result is None
        values_over = [f'X{i}' for i in range(MAX_LOT_IDS + 1)]
        result = validate_lot_input('unknown_type', values_over)
        assert result is not None
 class TestValidateEquipmentInput:
--- a/tests/test_reject_dataset_cache.py
+++ b/tests/test_reject_dataset_cache.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 import json
 from decimal import Decimal
 from unittest.mock import MagicMock
@@ -400,6 +401,72 @@ class TestEngineDecompositionDateRange:
        assert engine_calls["parallel"] == cache_svc._REJECT_ENGINE_PARALLEL
        assert engine_calls["max_rows_per_chunk"] == cache_svc._REJECT_ENGINE_MAX_ROWS_PER_CHUNK
    def test_engine_chunk_uses_paged_fetch_without_truncation(self, monkeypatch):
        """Engine chunk should fetch all pages (offset paging), not truncate at page size."""
        import mes_dashboard.services.batch_query_engine as engine_mod
        offsets = []
        captured = {"df": pd.DataFrame(), "merge_kwargs": None}
        def fake_read_sql(sql, params):
            offset = int(params.get("offset", 0))
            limit = int(params.get("limit", 0))
            offsets.append(offset)
            total_rows = 5
            remaining = max(total_rows - offset, 0)
            take = min(limit, remaining)
            if take <= 0:
                return pd.DataFrame()
            return pd.DataFrame(
                {
                    "CONTAINERID": [f"C{offset + i}" for i in range(take)],
                    "LOSSREASONNAME": ["R1"] * take,
                    "REJECT_TOTAL_QTY": [1] * take,
                }
            )
        def fake_execute_plan(chunks, query_fn, **kwargs):
            page_size = kwargs.get("max_rows_per_chunk")
            captured["df"] = query_fn(chunks[0], max_rows_per_chunk=page_size)
            return kwargs.get("query_hash", "qh")
        def fake_merge_chunks(prefix, qhash, **kwargs):
            captured["merge_kwargs"] = kwargs
            return captured["df"]
        monkeypatch.setattr(cache_svc, "_REJECT_ENGINE_MAX_ROWS_PER_CHUNK", 2)
        monkeypatch.setattr(engine_mod, "should_decompose_by_time", lambda *_a, **_kw: True)
        monkeypatch.setattr(
            engine_mod,
            "decompose_by_time_range",
            lambda *_a, **_kw: [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-31"}],
        )
        monkeypatch.setattr(engine_mod, "execute_plan", fake_execute_plan)
        monkeypatch.setattr(engine_mod, "merge_chunks", fake_merge_chunks)
        monkeypatch.setattr(cache_svc, "read_sql_df", fake_read_sql)
        monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: None)
        monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
        monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
        monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
        monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda df, **kw: df)
        monkeypatch.setattr(cache_svc, "_store_query_result", lambda *_a, **_kw: None)
        monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *_a, **_kw: 0)
        monkeypatch.setattr(
            cache_svc,
            "_build_primary_response",
            lambda qid, df, meta, ri: {"query_id": qid, "rows": len(df)},
        )
        result = cache_svc.execute_primary_query(
            mode="date_range",
            start_date="2025-01-01",
            end_date="2025-03-01",
        )
        assert result["rows"] == 5
        assert offsets == [0, 2, 4]
        assert captured["merge_kwargs"] == {}
    def test_short_range_skips_engine(self, monkeypatch):
        """Short date range (<= threshold) uses direct path, no engine."""
        import mes_dashboard.services.batch_query_engine as engine_mod
@@ -453,7 +520,7 @@ class TestEngineDecompositionDateRange:
        result = cache_svc.execute_primary_query(
            mode="date_range",
            start_date="2025-06-01",
-            end_date="2025-06-30",
+            end_date="2025-06-10",
        )
        assert engine_calls["decompose"] == 0  # Engine NOT used
@@ -629,7 +696,7 @@ def test_large_result_spills_to_parquet_and_view_export_use_spool_fallback(monke
    result = cache_svc.execute_primary_query(
        mode="date_range",
        start_date="2025-01-01",
-        end_date="2025-01-31",
+        end_date="2025-01-05",
    )
    query_id = result["query_id"]
@@ -651,3 +718,185 @@ def test_large_result_spills_to_parquet_and_view_export_use_spool_fallback(monke
    export_rows = cache_svc.export_csv_from_cache(query_id=query_id)
    assert export_rows is not None
    assert len(export_rows) == len(df)
 def test_resolve_containers_deduplicates_container_ids(monkeypatch):
    monkeypatch.setattr(
        cache_svc,
        "_RESOLVERS",
        {
            "lot": lambda values: {
                "data": [
                    {"container_id": "CID-1"},
                    {"container_id": "CID-1"},
                    {"container_id": "CID-2"},
                ],
                "input_count": len(values),
                "not_found": [],
                "expansion_info": {"LOT%": 2},
            }
        },
    )
    monkeypatch.setenv("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", "10")
    monkeypatch.setenv("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", "10")
    resolved = cache_svc.resolve_containers("lot", ["LOT%"])
    assert resolved["container_ids"] == ["CID-1", "CID-2"]
    assert resolved["resolution_info"]["resolved_count"] == 2
 def test_resolve_containers_allows_oversized_expansion_and_sets_guardrail(monkeypatch):
    monkeypatch.setattr(
        cache_svc,
        "_RESOLVERS",
        {
            "lot": lambda values: {
                "data": [{"container_id": "CID-1"}],
                "input_count": len(values),
                "not_found": [],
                "expansion_info": {"GA%": 999},
            }
        },
    )
    monkeypatch.setenv("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", "50")
    monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
    resolved = cache_svc.resolve_containers("lot", ["GA%"])
    guardrail = resolved["resolution_info"].get("guardrail") or {}
    assert guardrail.get("overflow") is True
    assert len(guardrail.get("expansion_offenders") or []) == 1
 def test_partial_failure_in_response_meta(monkeypatch):
    import mes_dashboard.services.batch_query_engine as engine_mod
    df = pd.DataFrame({"CONTAINERID": ["C1"], "LOSSREASONNAME": ["R1"], "REJECT_TOTAL_QTY": [1]})
    monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: None)
    monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
    monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
    monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
    monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
    monkeypatch.setattr(cache_svc, "_store_query_result", lambda *_a, **_kw: False)
    monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *_a, **_kw: None)
    monkeypatch.setattr(
        cache_svc,
        "_build_primary_response",
        lambda qid, result_df, meta, resolution_info: {"query_id": qid, "meta": meta},
    )
    monkeypatch.setattr(cache_svc, "_store_partial_failure_flag", lambda *_a, **_kw: None)
    monkeypatch.setattr(engine_mod, "should_decompose_by_time", lambda *_a, **_kw: True)
    monkeypatch.setattr(
        engine_mod,
        "decompose_by_time_range",
        lambda *_a, **_kw: [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
    )
    monkeypatch.setattr(engine_mod, "execute_plan", lambda *a, **kw: kw.get("query_hash"))
    monkeypatch.setattr(engine_mod, "merge_chunks", lambda *a, **kw: df.copy())
    monkeypatch.setattr(
        engine_mod,
        "get_batch_progress",
        lambda *_a, **_kw: {
            "has_partial_failure": "True",
            "failed": "2",
            "failed_ranges": json.dumps([{"start": "2025-01-01", "end": "2025-01-10"}]),
        },
    )
    result = cache_svc.execute_primary_query(
        mode="date_range",
        start_date="2025-01-01",
        end_date="2025-03-01",
    )
    meta = result.get("meta") or {}
    assert meta.get("has_partial_failure") is True
    assert meta.get("failed_chunk_count") == 2
    assert meta.get("failed_ranges") == [{"start": "2025-01-01", "end": "2025-01-10"}]
 def test_cache_hit_restores_partial_failure(monkeypatch):
    cached_df = pd.DataFrame({"CONTAINERID": ["C1"], "LOSSREASONNAME": ["R1"], "REJECT_TOTAL_QTY": [1]})
    monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: cached_df)
    monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
    monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
    monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
    monkeypatch.setattr(
        cache_svc,
        "_load_partial_failure_flag",
        lambda _qid: {
            "has_partial_failure": True,
            "failed_chunk_count": 3,
            "failed_ranges": [],
        },
    )
    monkeypatch.setattr(
        cache_svc,
        "_build_primary_response",
        lambda qid, result_df, meta, resolution_info: {"query_id": qid, "meta": meta},
    )
    result = cache_svc.execute_primary_query(
        mode="date_range",
        start_date="2025-01-01",
        end_date="2025-01-31",
    )
    meta = result.get("meta") or {}
    assert meta.get("has_partial_failure") is True
    assert meta.get("failed_chunk_count") == 3
    assert meta.get("failed_ranges") == []
@pytest.mark.parametrize(
    "store_result,expected_ttl",
    [
        (True, cache_svc._REJECT_ENGINE_SPOOL_TTL_SECONDS),
        (False, cache_svc._CACHE_TTL),
    ],
 )
 def test_partial_failure_ttl_matches_spool(monkeypatch, store_result, expected_ttl):
    import mes_dashboard.services.batch_query_engine as engine_mod
    df = pd.DataFrame({"CONTAINERID": ["C1"], "LOSSREASONNAME": ["R1"], "REJECT_TOTAL_QTY": [1]})
    captured = {"ttls": []}
    monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: None)
    monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
    monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
    monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
    monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
    monkeypatch.setattr(cache_svc, "_store_query_result", lambda *_a, **_kw: store_result)
    monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *_a, **_kw: None)
    monkeypatch.setattr(
        cache_svc,
        "_build_primary_response",
        lambda qid, result_df, meta, resolution_info: {"query_id": qid, "meta": meta},
    )
    monkeypatch.setattr(
        cache_svc,
        "_store_partial_failure_flag",
        lambda _qid, _failed, _ranges, ttl: captured["ttls"].append(ttl),
    )
    monkeypatch.setattr(engine_mod, "should_decompose_by_time", lambda *_a, **_kw: True)
    monkeypatch.setattr(
        engine_mod,
        "decompose_by_time_range",
        lambda *_a, **_kw: [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
    )
    monkeypatch.setattr(engine_mod, "execute_plan", lambda *a, **kw: kw.get("query_hash"))
    monkeypatch.setattr(engine_mod, "merge_chunks", lambda *a, **kw: df.copy())
    monkeypatch.setattr(
        engine_mod,
        "get_batch_progress",
        lambda *_a, **_kw: {"has_partial_failure": "True", "failed": "1", "failed_ranges": "[]"},
    )
    cache_svc.execute_primary_query(
        mode="date_range",
        start_date="2025-01-01",
        end_date="2025-03-01",
    )
    assert captured["ttls"] == [expected_ttl]