Files
DashBoard/tests/test_lineage_engine.py
egg 519f8ae2f4 feat(lineage): unified LineageEngine, EventFetcher, and progressive trace API
Introduce a unified Seed→Lineage→Event pipeline replacing per-page Python
BFS with Oracle CONNECT BY NOCYCLE queries, add staged /api/trace/*
endpoints with rate limiting and L2 Redis caching, and wire progressive
frontend loading via useTraceProgress composable.

Key changes:
- Add LineageEngine (split ancestors / merge sources / full genealogy)
  with QueryBuilder bind-param safety and batched IN clauses
- Add EventFetcher with 6-domain support and L2 Redis cache
- Add trace_routes Blueprint (seed-resolve, lineage, events) with
  profile dispatch, rate limiting, and Redis TTL=300s caching
- Refactor query_tool_service to use LineageEngine and QueryBuilder,
  removing raw string interpolation (SQL injection fix)
- Add rate limits and resolve cache to query_tool_routes
- Integrate useTraceProgress into mid-section-defect with skeleton
  placeholders and fade-in transitions
- Add lineageCache and on-demand lot lineage to query-tool
- Add TraceProgressBar shared component
- Remove legacy query-tool.js static script (3k lines)
- Fix MatrixTable package column truncation (.slice(0,15) removed)
- Archive unified-lineage-engine change, add trace-progressive-ui specs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 16:30:24 +08:00

232 lines
7.4 KiB
Python

# -*- coding: utf-8 -*-
"""Unit tests for LineageEngine."""
from __future__ import annotations
from unittest.mock import patch
import pandas as pd
from mes_dashboard.services.lineage_engine import LineageEngine
@patch("mes_dashboard.services.lineage_engine.read_sql_df")
def test_resolve_split_ancestors_batches_and_enforces_max_depth(mock_read_sql_df):
cids = [f"C{i:04d}" for i in range(1001)]
mock_read_sql_df.side_effect = [
pd.DataFrame(
[
{
"CONTAINERID": "C0000",
"SPLITFROMID": "P0000",
"CONTAINERNAME": "LOT-0000",
"SPLIT_DEPTH": 1,
},
{
"CONTAINERID": "P0000",
"SPLITFROMID": None,
"CONTAINERNAME": "LOT-P0000",
"SPLIT_DEPTH": 2,
},
]
),
pd.DataFrame(
[
{
"CONTAINERID": "C1000",
"SPLITFROMID": "P1000",
"CONTAINERNAME": "LOT-1000",
"SPLIT_DEPTH": 1,
},
{
"CONTAINERID": "C-TOO-DEEP",
"SPLITFROMID": "P-TOO-DEEP",
"CONTAINERNAME": "LOT-DEEP",
"SPLIT_DEPTH": 21,
},
]
),
]
result = LineageEngine.resolve_split_ancestors(cids, {"INIT": "LOT-INIT"})
assert mock_read_sql_df.call_count == 2
first_sql, first_params = mock_read_sql_df.call_args_list[0].args
second_sql, second_params = mock_read_sql_df.call_args_list[1].args
assert "LEVEL <= 20" in first_sql
assert "LEVEL <= 20" in second_sql
assert len(first_params) == 1000
assert len(second_params) == 1
assert result["child_to_parent"]["C0000"] == "P0000"
assert result["child_to_parent"]["C1000"] == "P1000"
assert "C-TOO-DEEP" not in result["child_to_parent"]
assert result["cid_to_name"]["C0000"] == "LOT-0000"
assert result["cid_to_name"]["INIT"] == "LOT-INIT"
@patch("mes_dashboard.services.lineage_engine.read_sql_df")
def test_resolve_merge_sources_batches_and_returns_mapping(mock_read_sql_df):
names = [f"FN{i:04d}" for i in range(1001)]
mock_read_sql_df.side_effect = [
pd.DataFrame(
[
{"FINISHEDNAME": "FN0000", "SOURCE_CID": "SRC-A"},
{"FINISHEDNAME": "FN0000", "SOURCE_CID": "SRC-B"},
]
),
pd.DataFrame(
[
{"FINISHEDNAME": "FN1000", "SOURCE_CID": "SRC-C"},
{"FINISHEDNAME": "FN1000", "SOURCE_CID": "SRC-C"},
{"FINISHEDNAME": None, "SOURCE_CID": "SRC-INVALID"},
]
),
]
result = LineageEngine.resolve_merge_sources(names)
assert mock_read_sql_df.call_count == 2
first_sql, first_params = mock_read_sql_df.call_args_list[0].args
second_sql, second_params = mock_read_sql_df.call_args_list[1].args
assert "{{ FINISHED_NAME_FILTER }}" not in first_sql
assert "{{ FINISHED_NAME_FILTER }}" not in second_sql
assert len(first_params) == 1000
assert len(second_params) == 1
assert result["FN0000"] == ["SRC-A", "SRC-B"]
assert result["FN1000"] == ["SRC-C"]
@patch("mes_dashboard.services.lineage_engine.LineageEngine.resolve_merge_sources")
@patch("mes_dashboard.services.lineage_engine.LineageEngine.resolve_split_ancestors")
def test_resolve_full_genealogy_combines_split_and_merge(
mock_resolve_split_ancestors,
mock_resolve_merge_sources,
):
mock_resolve_split_ancestors.side_effect = [
{
"child_to_parent": {
"A": "B",
"B": "C",
},
"cid_to_name": {
"A": "LOT-A",
"B": "LOT-B",
"C": "LOT-C",
},
},
{
"child_to_parent": {
"M1": "M0",
},
"cid_to_name": {
"M1": "LOT-M1",
"M0": "LOT-M0",
},
},
]
mock_resolve_merge_sources.return_value = {"LOT-B": ["M1"]}
result = LineageEngine.resolve_full_genealogy(["A"], {"A": "LOT-A"})
assert result == {"A": {"B", "C", "M1", "M0"}}
assert mock_resolve_split_ancestors.call_count == 2
mock_resolve_merge_sources.assert_called_once()
@patch("mes_dashboard.services.lineage_engine.read_sql_df")
def test_split_ancestors_matches_legacy_bfs_for_five_known_lots(mock_read_sql_df):
parent_by_cid = {
"L1": "L1P1",
"L1P1": "L1P2",
"L2": "L2P1",
"L3": None,
"L4": "L4P1",
"L4P1": "L4P2",
"L4P2": "L4P3",
"L5": "L5P1",
"L5P1": "L5P2",
"L5P2": "L5P1",
}
name_by_cid = {
"L1": "LOT-1",
"L1P1": "LOT-1-P1",
"L1P2": "LOT-1-P2",
"L2": "LOT-2",
"L2P1": "LOT-2-P1",
"L3": "LOT-3",
"L4": "LOT-4",
"L4P1": "LOT-4-P1",
"L4P2": "LOT-4-P2",
"L4P3": "LOT-4-P3",
"L5": "LOT-5",
"L5P1": "LOT-5-P1",
"L5P2": "LOT-5-P2",
}
seed_lots = ["L1", "L2", "L3", "L4", "L5"]
def _connect_by_rows(start_cids):
rows = []
for seed in start_cids:
current = seed
depth = 1
visited = set()
while current and depth <= 20 and current not in visited:
visited.add(current)
rows.append(
{
"CONTAINERID": current,
"SPLITFROMID": parent_by_cid.get(current),
"CONTAINERNAME": name_by_cid.get(current),
"SPLIT_DEPTH": depth,
}
)
current = parent_by_cid.get(current)
depth += 1
return pd.DataFrame(rows)
def _mock_read_sql(_sql, params):
requested = [value for value in params.values()]
return _connect_by_rows(requested)
mock_read_sql_df.side_effect = _mock_read_sql
connect_by_result = LineageEngine.resolve_split_ancestors(seed_lots)
# Legacy BFS reference implementation from previous mid_section_defect_service.
legacy_child_to_parent = {}
legacy_cid_to_name = {}
frontier = list(seed_lots)
seen = set(seed_lots)
rounds = 0
while frontier:
rounds += 1
batch_rows = []
for cid in frontier:
batch_rows.append(
{
"CONTAINERID": cid,
"SPLITFROMID": parent_by_cid.get(cid),
"CONTAINERNAME": name_by_cid.get(cid),
}
)
new_parents = set()
for row in batch_rows:
cid = row["CONTAINERID"]
split_from = row["SPLITFROMID"]
name = row["CONTAINERNAME"]
if isinstance(name, str) and name:
legacy_cid_to_name[cid] = name
if isinstance(split_from, str) and split_from and split_from != cid:
legacy_child_to_parent[cid] = split_from
if split_from not in seen:
seen.add(split_from)
new_parents.add(split_from)
frontier = list(new_parents)
if rounds > 20:
break
assert connect_by_result["child_to_parent"] == legacy_child_to_parent
assert connect_by_result["cid_to_name"] == legacy_cid_to_name