feat(reject-history): fix silent data loss by propagating partial failure metadata to frontend
Chunk failures in BatchQueryEngine were silently discarded — `has_partial_failure` was tracked in Redis but never surfaced to the API response or frontend. Users could see incomplete data without any warning. This commit closes the gap end-to-end: Backend: - Track failed chunk time ranges (`failed_ranges`) in batch engine progress metadata - Add single retry for transient Oracle errors (timeout, connection) in `_execute_single_chunk` - Read `get_batch_progress()` after merge but before `redis_clear_batch()` cleanup - Inject `has_partial_failure`, `failed_chunk_count`, `failed_ranges` into API response meta - Persist partial failure flag to independent Redis key with TTL aligned to data storage layer - Add shared container-resolution policy module with wildcard/expansion guardrails - Refactor reason filter from single-value to multi-select (`reason` → `reasons`) Frontend: - Add client-side date range validation (730-day limit) before API submission - Display amber warning banner on partial failure with specific failed date ranges - Support generic fallback message for container-mode queries without date ranges - Update FilterPanel to support multi-select reason chips Specs & tests: - Create batch-query-resilience spec; update reject-history-api and reject-history-page specs - Add 7 new tests for retry, memory guard, failed ranges, partial failure propagation, TTL - Cross-service regression verified (hold, resource, job, msd — 411 tests pass) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Unit tests for BatchQueryEngine module."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock, call
|
||||
|
||||
@@ -482,8 +483,8 @@ class TestChunkFailureResilience:
|
||||
skip_cached=False,
|
||||
)
|
||||
|
||||
# All 3 chunks attempted
|
||||
assert call_count["n"] == 3
|
||||
# One chunk retried once due retryable timeout pattern.
|
||||
assert call_count["n"] == 4
|
||||
# Final metadata should reflect partial failure
|
||||
last = hset_calls[-1]
|
||||
assert last["status"] == "partial"
|
||||
@@ -567,10 +568,147 @@ class TestShouldDecompose:
|
||||
assert should_decompose_by_time("2025-01-01", "2025-12-31")
|
||||
|
||||
def test_short_range_false(self):
|
||||
assert not should_decompose_by_time("2025-01-01", "2025-02-01")
|
||||
assert not should_decompose_by_time("2025-01-01", "2025-01-11")
|
||||
|
||||
def test_large_ids_true(self):
|
||||
assert should_decompose_by_ids(list(range(2000)))
|
||||
|
||||
def test_small_ids_false(self):
|
||||
assert not should_decompose_by_ids(list(range(500)))
|
||||
|
||||
|
||||
class TestRetryAndFailedRanges:
|
||||
def _mock_redis(self):
|
||||
mock_client = MagicMock()
|
||||
stored = {}
|
||||
hashes = {}
|
||||
|
||||
mock_client.setex.side_effect = lambda k, t, v: stored.update({k: v})
|
||||
mock_client.get.side_effect = lambda k: stored.get(k)
|
||||
mock_client.exists.side_effect = lambda k: 1 if k in stored else 0
|
||||
mock_client.hset.side_effect = lambda k, mapping=None: hashes.setdefault(k, {}).update(mapping or {})
|
||||
mock_client.hgetall.side_effect = lambda k: hashes.get(k, {})
|
||||
mock_client.expire.return_value = None
|
||||
return mock_client
|
||||
|
||||
def test_transient_failure_retried_once(self):
|
||||
import mes_dashboard.core.redis_df_store as rds
|
||||
import mes_dashboard.services.batch_query_engine as bqe
|
||||
|
||||
mock_client = self._mock_redis()
|
||||
call_count = {"n": 0}
|
||||
|
||||
def flaky_query_fn(chunk, max_rows_per_chunk=None):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
raise TimeoutError("connection timed out")
|
||||
return pd.DataFrame({"V": [1]})
|
||||
|
||||
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||
execute_plan(
|
||||
[{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
|
||||
flaky_query_fn,
|
||||
query_hash="retryonce",
|
||||
cache_prefix="retry",
|
||||
skip_cached=False,
|
||||
)
|
||||
progress = bqe.get_batch_progress("retry", "retryonce")
|
||||
|
||||
assert call_count["n"] == 2
|
||||
assert progress is not None
|
||||
assert progress.get("status") == "completed"
|
||||
assert progress.get("failed") == "0"
|
||||
|
||||
def test_memory_guard_not_retried(self):
|
||||
import mes_dashboard.core.redis_df_store as rds
|
||||
import mes_dashboard.services.batch_query_engine as bqe
|
||||
|
||||
mock_client = self._mock_redis()
|
||||
call_count = {"n": 0}
|
||||
|
||||
def large_df_query_fn(chunk, max_rows_per_chunk=None):
|
||||
call_count["n"] += 1
|
||||
return pd.DataFrame({"V": [1]})
|
||||
|
||||
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||
patch.object(bqe, "get_redis_client", return_value=mock_client), \
|
||||
patch.object(bqe, "BATCH_CHUNK_MAX_MEMORY_MB", 0):
|
||||
execute_plan(
|
||||
[{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
|
||||
large_df_query_fn,
|
||||
query_hash="memnoretry",
|
||||
cache_prefix="retry",
|
||||
skip_cached=False,
|
||||
)
|
||||
|
||||
assert call_count["n"] == 1
|
||||
|
||||
def test_failed_ranges_tracked(self):
|
||||
import mes_dashboard.core.redis_df_store as rds
|
||||
import mes_dashboard.services.batch_query_engine as bqe
|
||||
|
||||
mock_client = self._mock_redis()
|
||||
|
||||
def query_fn(chunk, max_rows_per_chunk=None):
|
||||
if chunk["chunk_start"] == "2025-01-11":
|
||||
raise RuntimeError("chunk failure")
|
||||
return pd.DataFrame({"V": [1]})
|
||||
|
||||
chunks = [
|
||||
{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"},
|
||||
{"chunk_start": "2025-01-11", "chunk_end": "2025-01-20"},
|
||||
{"chunk_start": "2025-01-21", "chunk_end": "2025-01-30"},
|
||||
]
|
||||
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||
execute_plan(
|
||||
chunks,
|
||||
query_fn,
|
||||
query_hash="franges",
|
||||
cache_prefix="retry",
|
||||
skip_cached=False,
|
||||
)
|
||||
progress = bqe.get_batch_progress("retry", "franges")
|
||||
|
||||
assert progress is not None
|
||||
assert progress.get("has_partial_failure") == "True"
|
||||
assert progress.get("failed") == "1"
|
||||
failed_ranges = json.loads(progress.get("failed_ranges", "[]"))
|
||||
assert failed_ranges == [{"start": "2025-01-11", "end": "2025-01-20"}]
|
||||
|
||||
def test_id_batch_chunk_no_failed_ranges(self):
|
||||
import mes_dashboard.core.redis_df_store as rds
|
||||
import mes_dashboard.services.batch_query_engine as bqe
|
||||
|
||||
mock_client = self._mock_redis()
|
||||
|
||||
def query_fn(chunk, max_rows_per_chunk=None):
|
||||
if chunk.get("ids") == ["B"]:
|
||||
raise RuntimeError("id chunk failed")
|
||||
return pd.DataFrame({"V": [1]})
|
||||
|
||||
chunks = [
|
||||
{"ids": ["A"]},
|
||||
{"ids": ["B"]},
|
||||
]
|
||||
with patch.object(rds, "REDIS_ENABLED", True), \
|
||||
patch.object(rds, "get_redis_client", return_value=mock_client), \
|
||||
patch.object(bqe, "get_redis_client", return_value=mock_client):
|
||||
execute_plan(
|
||||
chunks,
|
||||
query_fn,
|
||||
query_hash="idfail",
|
||||
cache_prefix="retry",
|
||||
skip_cached=False,
|
||||
)
|
||||
progress = bqe.get_batch_progress("retry", "idfail")
|
||||
|
||||
assert progress is not None
|
||||
assert progress.get("has_partial_failure") == "True"
|
||||
assert progress.get("failed") == "1"
|
||||
failed_ranges = json.loads(progress.get("failed_ranges", "[]"))
|
||||
assert failed_ranges == []
|
||||
|
||||
73
tests/test_container_resolution_policy.py
Normal file
73
tests/test_container_resolution_policy.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Unit tests for shared container resolution policy helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from mes_dashboard.services import container_resolution_policy as policy
|
||||
|
||||
|
||||
def test_validate_resolution_request_rejects_empty_values():
|
||||
assert policy.validate_resolution_request("lot_id", []) is not None
|
||||
|
||||
|
||||
def test_validate_resolution_request_rejects_broad_pattern(monkeypatch):
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
|
||||
error = policy.validate_resolution_request("lot_id", ["%"])
|
||||
assert error is not None
|
||||
assert "萬用字元條件過於寬鬆" in error
|
||||
|
||||
|
||||
def test_validate_resolution_request_allows_pattern_with_prefix(monkeypatch):
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
|
||||
error = policy.validate_resolution_request("lot_id", ["GA26%"])
|
||||
assert error is None
|
||||
|
||||
|
||||
def test_validate_resolution_result_rejects_excessive_expansion(monkeypatch):
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", "3")
|
||||
result = {
|
||||
"data": [{"container_id": "C1"}],
|
||||
"expansion_info": {"GA%": 10},
|
||||
}
|
||||
error = policy.validate_resolution_result(result)
|
||||
assert error is not None
|
||||
assert "單一條件展開過大" in error
|
||||
|
||||
|
||||
def test_validate_resolution_result_rejects_excessive_container_count(monkeypatch):
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", "2")
|
||||
result = {
|
||||
"data": [
|
||||
{"container_id": "C1"},
|
||||
{"container_id": "C2"},
|
||||
{"container_id": "C3"},
|
||||
],
|
||||
"expansion_info": {},
|
||||
}
|
||||
error = policy.validate_resolution_result(result)
|
||||
assert error is not None
|
||||
assert "解析結果過大" in error
|
||||
|
||||
|
||||
def test_validate_resolution_result_non_strict_allows_overflow(monkeypatch):
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", "2")
|
||||
result = {
|
||||
"data": [
|
||||
{"container_id": "C1"},
|
||||
{"container_id": "C2"},
|
||||
{"container_id": "C3"},
|
||||
],
|
||||
"expansion_info": {"GA%": 999},
|
||||
}
|
||||
error = policy.validate_resolution_result(result, strict=False)
|
||||
assert error is None
|
||||
|
||||
|
||||
def test_extract_container_ids_deduplicates_and_preserves_order():
|
||||
rows = [
|
||||
{"container_id": "C1"},
|
||||
{"container_id": "C1"},
|
||||
{"CONTAINERID": "C2"},
|
||||
{"container_id": "C3"},
|
||||
]
|
||||
assert policy.extract_container_ids(rows) == ["C1", "C2", "C3"]
|
||||
@@ -198,3 +198,60 @@ def test_fetch_events_sanitizes_nan_values(
|
||||
result = EventFetcher.fetch_events(["CID-1"], "upstream_history")
|
||||
|
||||
assert result["CID-1"][0]["VALUE"] is None
|
||||
|
||||
|
||||
@patch("mes_dashboard.services.event_fetcher.cache_set")
|
||||
@patch("mes_dashboard.services.event_fetcher.cache_get", return_value=None)
|
||||
@patch("mes_dashboard.services.event_fetcher.read_sql_df_slow_iter")
|
||||
@patch("mes_dashboard.services.event_fetcher.SQLLoader.load")
|
||||
def test_fetch_events_raises_when_parallel_batch_fails_and_partial_disabled(
|
||||
mock_sql_load,
|
||||
mock_iter,
|
||||
_mock_cache_get,
|
||||
_mock_cache_set,
|
||||
monkeypatch,
|
||||
):
|
||||
mock_sql_load.return_value = "SELECT * FROM t WHERE h.CONTAINERID = :container_id {{ WORKCENTER_FILTER }}"
|
||||
monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_ALLOW_PARTIAL_RESULTS", False)
|
||||
monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_MAX_WORKERS", 2)
|
||||
|
||||
def _side_effect(sql, params, timeout_seconds=60):
|
||||
if "CID-1000" in params.values():
|
||||
raise RuntimeError("chunk fail")
|
||||
return iter([])
|
||||
|
||||
mock_iter.side_effect = _side_effect
|
||||
cids = [f"CID-{i}" for i in range(1001)] # force >1 batch
|
||||
|
||||
try:
|
||||
EventFetcher.fetch_events(cids, "history")
|
||||
assert False, "expected RuntimeError"
|
||||
except RuntimeError as exc:
|
||||
assert "chunk failed" in str(exc)
|
||||
|
||||
|
||||
@patch("mes_dashboard.services.event_fetcher.cache_set")
|
||||
@patch("mes_dashboard.services.event_fetcher.cache_get", return_value=None)
|
||||
@patch("mes_dashboard.services.event_fetcher.read_sql_df_slow_iter")
|
||||
@patch("mes_dashboard.services.event_fetcher.SQLLoader.load")
|
||||
def test_fetch_events_allows_partial_when_enabled(
|
||||
mock_sql_load,
|
||||
mock_iter,
|
||||
_mock_cache_get,
|
||||
_mock_cache_set,
|
||||
monkeypatch,
|
||||
):
|
||||
mock_sql_load.return_value = "SELECT * FROM t WHERE h.CONTAINERID = :container_id {{ WORKCENTER_FILTER }}"
|
||||
monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_ALLOW_PARTIAL_RESULTS", True)
|
||||
monkeypatch.setattr("mes_dashboard.services.event_fetcher.EVENT_FETCHER_MAX_WORKERS", 2)
|
||||
|
||||
def _side_effect(sql, params, timeout_seconds=60):
|
||||
if "CID-1000" in params.values():
|
||||
raise RuntimeError("chunk fail")
|
||||
return iter([])
|
||||
|
||||
mock_iter.side_effect = _side_effect
|
||||
cids = [f"CID-{i}" for i in range(1001)]
|
||||
|
||||
result = EventFetcher.fetch_events(cids, "history")
|
||||
assert result == {}
|
||||
|
||||
@@ -77,7 +77,7 @@ class TestJobQueryEngineDecomposition:
|
||||
result = job_svc.get_jobs_by_resources(
|
||||
resource_ids=["R1"],
|
||||
start_date="2025-06-01",
|
||||
end_date="2025-06-30",
|
||||
end_date="2025-06-05",
|
||||
)
|
||||
|
||||
assert engine_calls["execute"] == 0 # Engine NOT used
|
||||
|
||||
@@ -191,7 +191,7 @@ class TestErrorLeakageProtection:
|
||||
def test_query_error_masks_internal_details(self, mock_read):
|
||||
mock_read.side_effect = RuntimeError("ORA-00942: table or view does not exist")
|
||||
|
||||
result = get_jobs_by_resources(["RES001"], "2024-01-01", "2024-01-31")
|
||||
result = get_jobs_by_resources(["RES001"], "2024-01-01", "2024-01-05")
|
||||
|
||||
assert result["error"] == QUERY_ERROR_MESSAGE
|
||||
assert "ORA-00942" not in result["error"]
|
||||
|
||||
@@ -85,7 +85,7 @@ class TestDetectionEngineDecomposition:
|
||||
|
||||
df = msd_svc._fetch_station_detection_data(
|
||||
start_date="2025-06-01",
|
||||
end_date="2025-06-30",
|
||||
end_date="2025-06-05",
|
||||
station="測試",
|
||||
)
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ from unittest.mock import patch, MagicMock
|
||||
from mes_dashboard import create_app
|
||||
from mes_dashboard.core.cache import NoOpCache
|
||||
from mes_dashboard.core.rate_limit import reset_rate_limits_for_tests
|
||||
from mes_dashboard.services.query_tool_service import MAX_DATE_RANGE_DAYS, MAX_LOT_IDS
|
||||
from mes_dashboard.services.query_tool_service import MAX_DATE_RANGE_DAYS
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -118,20 +118,19 @@ class TestResolveEndpoint:
|
||||
data = json.loads(response.data)
|
||||
assert 'error' in data
|
||||
|
||||
def test_values_over_limit(self, client):
|
||||
"""Should reject values exceeding limit."""
|
||||
values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS + 1)]
|
||||
def test_rejects_too_broad_wildcard(self, client):
|
||||
"""Should reject wildcard patterns that are too broad."""
|
||||
response = client.post(
|
||||
'/api/query-tool/resolve',
|
||||
json={
|
||||
'input_type': 'lot_id',
|
||||
'values': values
|
||||
}
|
||||
)
|
||||
'values': ['%']
|
||||
}
|
||||
)
|
||||
assert response.status_code == 400
|
||||
data = json.loads(response.data)
|
||||
assert 'error' in data
|
||||
assert '超過上限' in data['error'] or str(MAX_LOT_IDS) in data['error']
|
||||
assert '萬用字元條件過於寬鬆' in data['error']
|
||||
|
||||
@patch('mes_dashboard.routes.query_tool_routes.resolve_lots')
|
||||
def test_resolve_success(self, mock_resolve, client):
|
||||
|
||||
@@ -90,7 +90,7 @@ class TestValidateDateRange:
|
||||
assert '格式' in result or 'format' in result.lower()
|
||||
|
||||
|
||||
class TestValidateLotInput:
|
||||
class TestValidateLotInput:
|
||||
"""Tests for validate_lot_input function."""
|
||||
|
||||
def test_valid_lot_ids(self):
|
||||
@@ -117,53 +117,24 @@ class TestValidateLotInput:
|
||||
assert result is not None
|
||||
assert '至少一個' in result
|
||||
|
||||
def test_exceeds_lot_id_limit(self):
|
||||
"""Should reject LOT IDs exceeding limit."""
|
||||
values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS + 1)]
|
||||
result = validate_lot_input('lot_id', values)
|
||||
assert result is not None
|
||||
assert '超過上限' in result
|
||||
assert str(MAX_LOT_IDS) in result
|
||||
|
||||
def test_exceeds_serial_number_limit(self):
|
||||
"""Should reject serial numbers exceeding limit."""
|
||||
values = [f'SN{i:06d}' for i in range(MAX_SERIAL_NUMBERS + 1)]
|
||||
result = validate_lot_input('serial_number', values)
|
||||
assert result is not None
|
||||
assert '超過上限' in result
|
||||
assert str(MAX_SERIAL_NUMBERS) in result
|
||||
|
||||
def test_exceeds_work_order_limit(self):
|
||||
"""Should reject work orders exceeding limit."""
|
||||
values = [f'WO{i:06d}' for i in range(MAX_WORK_ORDERS + 1)]
|
||||
result = validate_lot_input('work_order', values)
|
||||
assert result is not None
|
||||
assert '超過上限' in result
|
||||
assert str(MAX_WORK_ORDERS) in result
|
||||
def test_large_input_list_allowed_when_no_count_cap(self, monkeypatch):
|
||||
"""Should allow large lists when count cap is disabled."""
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_INPUT_MAX_VALUES", "0")
|
||||
values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS + 50)]
|
||||
result = validate_lot_input('lot_id', values)
|
||||
assert result is None
|
||||
|
||||
def test_exceeds_gd_work_order_limit(self):
|
||||
"""Should reject GD work orders exceeding limit."""
|
||||
values = [f'GD{i:06d}' for i in range(MAX_GD_WORK_ORDERS + 1)]
|
||||
result = validate_lot_input('gd_work_order', values)
|
||||
def test_rejects_too_broad_wildcard_pattern(self, monkeypatch):
|
||||
"""Should reject broad wildcard like '%' to prevent full scan."""
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
|
||||
result = validate_lot_input('lot_id', ['%'])
|
||||
assert result is not None
|
||||
assert '超過上限' in result
|
||||
assert str(MAX_GD_WORK_ORDERS) in result
|
||||
|
||||
def test_exactly_at_limit(self):
|
||||
"""Should accept values exactly at limit."""
|
||||
values = [f'GA{i:09d}' for i in range(MAX_LOT_IDS)]
|
||||
result = validate_lot_input('lot_id', values)
|
||||
assert result is None
|
||||
|
||||
def test_unknown_input_type_uses_default_limit(self):
|
||||
"""Should use default limit for unknown input types."""
|
||||
values = [f'X{i}' for i in range(MAX_LOT_IDS)]
|
||||
result = validate_lot_input('unknown_type', values)
|
||||
assert result is None
|
||||
|
||||
values_over = [f'X{i}' for i in range(MAX_LOT_IDS + 1)]
|
||||
result = validate_lot_input('unknown_type', values_over)
|
||||
assert result is not None
|
||||
assert '萬用字元條件過於寬鬆' in result
|
||||
|
||||
def test_accepts_wildcard_with_prefix(self, monkeypatch):
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
|
||||
result = validate_lot_input('lot_id', ['GA25%'])
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestValidateEquipmentInput:
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from decimal import Decimal
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
@@ -400,6 +401,72 @@ class TestEngineDecompositionDateRange:
|
||||
assert engine_calls["parallel"] == cache_svc._REJECT_ENGINE_PARALLEL
|
||||
assert engine_calls["max_rows_per_chunk"] == cache_svc._REJECT_ENGINE_MAX_ROWS_PER_CHUNK
|
||||
|
||||
def test_engine_chunk_uses_paged_fetch_without_truncation(self, monkeypatch):
|
||||
"""Engine chunk should fetch all pages (offset paging), not truncate at page size."""
|
||||
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||
|
||||
offsets = []
|
||||
captured = {"df": pd.DataFrame(), "merge_kwargs": None}
|
||||
|
||||
def fake_read_sql(sql, params):
|
||||
offset = int(params.get("offset", 0))
|
||||
limit = int(params.get("limit", 0))
|
||||
offsets.append(offset)
|
||||
total_rows = 5
|
||||
remaining = max(total_rows - offset, 0)
|
||||
take = min(limit, remaining)
|
||||
if take <= 0:
|
||||
return pd.DataFrame()
|
||||
return pd.DataFrame(
|
||||
{
|
||||
"CONTAINERID": [f"C{offset + i}" for i in range(take)],
|
||||
"LOSSREASONNAME": ["R1"] * take,
|
||||
"REJECT_TOTAL_QTY": [1] * take,
|
||||
}
|
||||
)
|
||||
|
||||
def fake_execute_plan(chunks, query_fn, **kwargs):
|
||||
page_size = kwargs.get("max_rows_per_chunk")
|
||||
captured["df"] = query_fn(chunks[0], max_rows_per_chunk=page_size)
|
||||
return kwargs.get("query_hash", "qh")
|
||||
|
||||
def fake_merge_chunks(prefix, qhash, **kwargs):
|
||||
captured["merge_kwargs"] = kwargs
|
||||
return captured["df"]
|
||||
|
||||
monkeypatch.setattr(cache_svc, "_REJECT_ENGINE_MAX_ROWS_PER_CHUNK", 2)
|
||||
monkeypatch.setattr(engine_mod, "should_decompose_by_time", lambda *_a, **_kw: True)
|
||||
monkeypatch.setattr(
|
||||
engine_mod,
|
||||
"decompose_by_time_range",
|
||||
lambda *_a, **_kw: [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-31"}],
|
||||
)
|
||||
monkeypatch.setattr(engine_mod, "execute_plan", fake_execute_plan)
|
||||
monkeypatch.setattr(engine_mod, "merge_chunks", fake_merge_chunks)
|
||||
monkeypatch.setattr(cache_svc, "read_sql_df", fake_read_sql)
|
||||
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: None)
|
||||
monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
|
||||
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
|
||||
monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
|
||||
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda df, **kw: df)
|
||||
monkeypatch.setattr(cache_svc, "_store_query_result", lambda *_a, **_kw: None)
|
||||
monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *_a, **_kw: 0)
|
||||
monkeypatch.setattr(
|
||||
cache_svc,
|
||||
"_build_primary_response",
|
||||
lambda qid, df, meta, ri: {"query_id": qid, "rows": len(df)},
|
||||
)
|
||||
|
||||
result = cache_svc.execute_primary_query(
|
||||
mode="date_range",
|
||||
start_date="2025-01-01",
|
||||
end_date="2025-03-01",
|
||||
)
|
||||
|
||||
assert result["rows"] == 5
|
||||
assert offsets == [0, 2, 4]
|
||||
assert captured["merge_kwargs"] == {}
|
||||
|
||||
def test_short_range_skips_engine(self, monkeypatch):
|
||||
"""Short date range (<= threshold) uses direct path, no engine."""
|
||||
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||
@@ -453,7 +520,7 @@ class TestEngineDecompositionDateRange:
|
||||
result = cache_svc.execute_primary_query(
|
||||
mode="date_range",
|
||||
start_date="2025-06-01",
|
||||
end_date="2025-06-30",
|
||||
end_date="2025-06-10",
|
||||
)
|
||||
|
||||
assert engine_calls["decompose"] == 0 # Engine NOT used
|
||||
@@ -629,7 +696,7 @@ def test_large_result_spills_to_parquet_and_view_export_use_spool_fallback(monke
|
||||
result = cache_svc.execute_primary_query(
|
||||
mode="date_range",
|
||||
start_date="2025-01-01",
|
||||
end_date="2025-01-31",
|
||||
end_date="2025-01-05",
|
||||
)
|
||||
|
||||
query_id = result["query_id"]
|
||||
@@ -651,3 +718,185 @@ def test_large_result_spills_to_parquet_and_view_export_use_spool_fallback(monke
|
||||
export_rows = cache_svc.export_csv_from_cache(query_id=query_id)
|
||||
assert export_rows is not None
|
||||
assert len(export_rows) == len(df)
|
||||
|
||||
|
||||
def test_resolve_containers_deduplicates_container_ids(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
cache_svc,
|
||||
"_RESOLVERS",
|
||||
{
|
||||
"lot": lambda values: {
|
||||
"data": [
|
||||
{"container_id": "CID-1"},
|
||||
{"container_id": "CID-1"},
|
||||
{"container_id": "CID-2"},
|
||||
],
|
||||
"input_count": len(values),
|
||||
"not_found": [],
|
||||
"expansion_info": {"LOT%": 2},
|
||||
}
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", "10")
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_CONTAINER_IDS", "10")
|
||||
|
||||
resolved = cache_svc.resolve_containers("lot", ["LOT%"])
|
||||
|
||||
assert resolved["container_ids"] == ["CID-1", "CID-2"]
|
||||
assert resolved["resolution_info"]["resolved_count"] == 2
|
||||
|
||||
|
||||
def test_resolve_containers_allows_oversized_expansion_and_sets_guardrail(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
cache_svc,
|
||||
"_RESOLVERS",
|
||||
{
|
||||
"lot": lambda values: {
|
||||
"data": [{"container_id": "CID-1"}],
|
||||
"input_count": len(values),
|
||||
"not_found": [],
|
||||
"expansion_info": {"GA%": 999},
|
||||
}
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_MAX_EXPANSION_PER_TOKEN", "50")
|
||||
monkeypatch.setenv("CONTAINER_RESOLVE_PATTERN_MIN_PREFIX_LEN", "2")
|
||||
|
||||
resolved = cache_svc.resolve_containers("lot", ["GA%"])
|
||||
guardrail = resolved["resolution_info"].get("guardrail") or {}
|
||||
assert guardrail.get("overflow") is True
|
||||
assert len(guardrail.get("expansion_offenders") or []) == 1
|
||||
|
||||
|
||||
def test_partial_failure_in_response_meta(monkeypatch):
|
||||
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||
|
||||
df = pd.DataFrame({"CONTAINERID": ["C1"], "LOSSREASONNAME": ["R1"], "REJECT_TOTAL_QTY": [1]})
|
||||
|
||||
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: None)
|
||||
monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
|
||||
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
|
||||
monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
|
||||
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
|
||||
monkeypatch.setattr(cache_svc, "_store_query_result", lambda *_a, **_kw: False)
|
||||
monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *_a, **_kw: None)
|
||||
monkeypatch.setattr(
|
||||
cache_svc,
|
||||
"_build_primary_response",
|
||||
lambda qid, result_df, meta, resolution_info: {"query_id": qid, "meta": meta},
|
||||
)
|
||||
monkeypatch.setattr(cache_svc, "_store_partial_failure_flag", lambda *_a, **_kw: None)
|
||||
|
||||
monkeypatch.setattr(engine_mod, "should_decompose_by_time", lambda *_a, **_kw: True)
|
||||
monkeypatch.setattr(
|
||||
engine_mod,
|
||||
"decompose_by_time_range",
|
||||
lambda *_a, **_kw: [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
|
||||
)
|
||||
monkeypatch.setattr(engine_mod, "execute_plan", lambda *a, **kw: kw.get("query_hash"))
|
||||
monkeypatch.setattr(engine_mod, "merge_chunks", lambda *a, **kw: df.copy())
|
||||
monkeypatch.setattr(
|
||||
engine_mod,
|
||||
"get_batch_progress",
|
||||
lambda *_a, **_kw: {
|
||||
"has_partial_failure": "True",
|
||||
"failed": "2",
|
||||
"failed_ranges": json.dumps([{"start": "2025-01-01", "end": "2025-01-10"}]),
|
||||
},
|
||||
)
|
||||
|
||||
result = cache_svc.execute_primary_query(
|
||||
mode="date_range",
|
||||
start_date="2025-01-01",
|
||||
end_date="2025-03-01",
|
||||
)
|
||||
meta = result.get("meta") or {}
|
||||
assert meta.get("has_partial_failure") is True
|
||||
assert meta.get("failed_chunk_count") == 2
|
||||
assert meta.get("failed_ranges") == [{"start": "2025-01-01", "end": "2025-01-10"}]
|
||||
|
||||
|
||||
def test_cache_hit_restores_partial_failure(monkeypatch):
|
||||
cached_df = pd.DataFrame({"CONTAINERID": ["C1"], "LOSSREASONNAME": ["R1"], "REJECT_TOTAL_QTY": [1]})
|
||||
|
||||
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: cached_df)
|
||||
monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
|
||||
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
|
||||
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
|
||||
monkeypatch.setattr(
|
||||
cache_svc,
|
||||
"_load_partial_failure_flag",
|
||||
lambda _qid: {
|
||||
"has_partial_failure": True,
|
||||
"failed_chunk_count": 3,
|
||||
"failed_ranges": [],
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
cache_svc,
|
||||
"_build_primary_response",
|
||||
lambda qid, result_df, meta, resolution_info: {"query_id": qid, "meta": meta},
|
||||
)
|
||||
|
||||
result = cache_svc.execute_primary_query(
|
||||
mode="date_range",
|
||||
start_date="2025-01-01",
|
||||
end_date="2025-01-31",
|
||||
)
|
||||
meta = result.get("meta") or {}
|
||||
assert meta.get("has_partial_failure") is True
|
||||
assert meta.get("failed_chunk_count") == 3
|
||||
assert meta.get("failed_ranges") == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"store_result,expected_ttl",
|
||||
[
|
||||
(True, cache_svc._REJECT_ENGINE_SPOOL_TTL_SECONDS),
|
||||
(False, cache_svc._CACHE_TTL),
|
||||
],
|
||||
)
|
||||
def test_partial_failure_ttl_matches_spool(monkeypatch, store_result, expected_ttl):
|
||||
import mes_dashboard.services.batch_query_engine as engine_mod
|
||||
|
||||
df = pd.DataFrame({"CONTAINERID": ["C1"], "LOSSREASONNAME": ["R1"], "REJECT_TOTAL_QTY": [1]})
|
||||
captured = {"ttls": []}
|
||||
|
||||
monkeypatch.setattr(cache_svc, "_get_cached_df", lambda _qid: None)
|
||||
monkeypatch.setattr(cache_svc, "_validate_range", lambda *_a, **_kw: None)
|
||||
monkeypatch.setattr(cache_svc, "_build_where_clause", lambda **kw: ("", {}, {}))
|
||||
monkeypatch.setattr(cache_svc, "_prepare_sql", lambda *a, **kw: "SELECT 1 FROM dual")
|
||||
monkeypatch.setattr(cache_svc, "_apply_policy_filters", lambda data, **kw: data)
|
||||
monkeypatch.setattr(cache_svc, "_store_query_result", lambda *_a, **_kw: store_result)
|
||||
monkeypatch.setattr(cache_svc, "redis_clear_batch", lambda *_a, **_kw: None)
|
||||
monkeypatch.setattr(
|
||||
cache_svc,
|
||||
"_build_primary_response",
|
||||
lambda qid, result_df, meta, resolution_info: {"query_id": qid, "meta": meta},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
cache_svc,
|
||||
"_store_partial_failure_flag",
|
||||
lambda _qid, _failed, _ranges, ttl: captured["ttls"].append(ttl),
|
||||
)
|
||||
|
||||
monkeypatch.setattr(engine_mod, "should_decompose_by_time", lambda *_a, **_kw: True)
|
||||
monkeypatch.setattr(
|
||||
engine_mod,
|
||||
"decompose_by_time_range",
|
||||
lambda *_a, **_kw: [{"chunk_start": "2025-01-01", "chunk_end": "2025-01-10"}],
|
||||
)
|
||||
monkeypatch.setattr(engine_mod, "execute_plan", lambda *a, **kw: kw.get("query_hash"))
|
||||
monkeypatch.setattr(engine_mod, "merge_chunks", lambda *a, **kw: df.copy())
|
||||
monkeypatch.setattr(
|
||||
engine_mod,
|
||||
"get_batch_progress",
|
||||
lambda *_a, **_kw: {"has_partial_failure": "True", "failed": "1", "failed_ranges": "[]"},
|
||||
)
|
||||
|
||||
cache_svc.execute_primary_query(
|
||||
mode="date_range",
|
||||
start_date="2025-01-01",
|
||||
end_date="2025-03-01",
|
||||
)
|
||||
assert captured["ttls"] == [expected_ttl]
|
||||
|
||||
Reference in New Issue
Block a user