Files
DashBoard/src/mes_dashboard/services/hold_dataset_cache.py

559 lines
17 KiB
Python

# -*- coding: utf-8 -*-
"""Two-phase hold-history dataset cache.
Primary query (POST /query) → Oracle → cache full hold/release DataFrame.
Supplementary view (GET /view) → read cache → pandas filter/derive.
Cache layers:
L1: ProcessLevelCache (in-process, per-worker)
L2: Redis (cross-worker, parquet bytes encoded as base64 string)
"""
from __future__ import annotations
import hashlib
import json
import logging
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Optional
import pandas as pd
from mes_dashboard.core.cache import ProcessLevelCache, register_process_cache
from mes_dashboard.core.database import read_sql_df_slow as read_sql_df
from mes_dashboard.core.redis_df_store import redis_load_df, redis_store_df
from mes_dashboard.services.filter_cache import get_workcenter_group as _get_wc_group
from mes_dashboard.services.hold_history_service import (
_clean_text,
_format_datetime,
_safe_float,
_safe_int,
)
from mes_dashboard.sql.filters import CommonFilters
logger = logging.getLogger("mes_dashboard.hold_dataset_cache")
_CACHE_TTL = 900 # 15 minutes
_CACHE_MAX_SIZE = 8
_REDIS_NAMESPACE = "hold_dataset"
_dataset_cache = ProcessLevelCache(ttl_seconds=_CACHE_TTL, max_size=_CACHE_MAX_SIZE)
register_process_cache("hold_dataset", _dataset_cache, "Hold Dataset (L1, 15min)")
_SQL_DIR = Path(__file__).resolve().parent.parent / "sql" / "hold_history"
# ============================================================
# SQL loading
# ============================================================
@lru_cache(maxsize=4)
def _load_sql(name: str) -> str:
path = _SQL_DIR / f"{name}.sql"
sql = path.read_text(encoding="utf-8")
if "{{ NON_QUALITY_REASONS }}" in sql:
sql = sql.replace(
"{{ NON_QUALITY_REASONS }}",
CommonFilters.get_non_quality_reasons_sql(),
)
return sql
# ============================================================
# Query ID
# ============================================================
def _make_query_id(params: dict) -> str:
"""Deterministic hash from primary query params."""
canonical = json.dumps(params, sort_keys=True, ensure_ascii=False, default=str)
return hashlib.sha256(canonical.encode("utf-8")).hexdigest()[:16]
# ============================================================
# Redis L2 helpers (delegated to shared redis_df_store)
# ============================================================
def _redis_store_df(query_id: str, df: pd.DataFrame) -> None:
redis_store_df(f"{_REDIS_NAMESPACE}:{query_id}", df, ttl=_CACHE_TTL)
def _redis_load_df(query_id: str) -> Optional[pd.DataFrame]:
return redis_load_df(f"{_REDIS_NAMESPACE}:{query_id}")
# ============================================================
# Cache read (L1 -> L2 -> None)
# ============================================================
def _get_cached_df(query_id: str) -> Optional[pd.DataFrame]:
"""Read cache: L1 hit -> return, L1 miss -> L2 -> write L1 -> return."""
df = _dataset_cache.get(query_id)
if df is not None:
return df
df = _redis_load_df(query_id)
if df is not None:
_dataset_cache.set(query_id, df)
return df
def _store_df(query_id: str, df: pd.DataFrame) -> None:
"""Write to L1 and L2."""
_dataset_cache.set(query_id, df)
_redis_store_df(query_id, df)
# ============================================================
# Primary query
# ============================================================
def execute_primary_query(
*,
start_date: str,
end_date: str,
hold_type: str = "quality",
record_type: str = "new",
) -> Dict[str, Any]:
"""Execute Oracle query -> cache DataFrame -> return structured result."""
query_id = _make_query_id({"start_date": start_date, "end_date": end_date})
cached_df = _get_cached_df(query_id)
if cached_df is not None:
logger.info("Hold dataset cache hit for query_id=%s", query_id)
else:
logger.info(
"Hold dataset cache miss for query_id=%s, querying Oracle", query_id
)
from mes_dashboard.services.batch_query_engine import (
decompose_by_time_range,
execute_plan,
merge_chunks,
compute_query_hash,
should_decompose_by_time,
)
if should_decompose_by_time(start_date, end_date):
# --- Engine path for long date ranges ---
engine_chunks = decompose_by_time_range(start_date, end_date)
engine_hash = compute_query_hash(
{"start_date": start_date, "end_date": end_date}
)
base_sql = _load_sql("base_facts")
def _run_hold_chunk(chunk, max_rows_per_chunk=None):
params = {
"start_date": chunk["chunk_start"],
"end_date": chunk["chunk_end"],
}
result = read_sql_df(base_sql, params)
return result if result is not None else pd.DataFrame()
logger.info(
"Engine activated for hold: %d chunks (query_id=%s)",
len(engine_chunks), query_id,
)
execute_plan(
engine_chunks, _run_hold_chunk,
query_hash=engine_hash,
cache_prefix="hold",
chunk_ttl=_CACHE_TTL,
)
df = merge_chunks("hold", engine_hash)
else:
# --- Direct path (short query) ---
sql = _load_sql("base_facts")
params = {"start_date": start_date, "end_date": end_date}
df = read_sql_df(sql, params)
if df is None:
df = pd.DataFrame()
if not df.empty:
df["_QUERY_START"] = pd.Timestamp(start_date)
df["_QUERY_END"] = pd.Timestamp(end_date)
_store_df(query_id, df)
cached_df = df
views = _derive_all_views(
cached_df,
hold_type=hold_type,
record_type=record_type,
page=1,
per_page=50,
)
return {"query_id": query_id, **views}
# ============================================================
# View (supplementary filtering on cache)
# ============================================================
def apply_view(
*,
query_id: str,
hold_type: str = "quality",
reason: Optional[str] = None,
record_type: str = "new",
duration_range: Optional[str] = None,
page: int = 1,
per_page: int = 50,
) -> Optional[Dict[str, Any]]:
"""Read cache -> apply filters -> return derived data. Returns None if expired."""
df = _get_cached_df(query_id)
if df is None:
return None
return _derive_all_views(
df,
hold_type=hold_type,
reason=reason,
record_type=record_type,
duration_range=duration_range,
page=page,
per_page=per_page,
)
# ============================================================
# Master derivation
# ============================================================
def _derive_all_views(
df: pd.DataFrame,
*,
hold_type: str = "quality",
reason: Optional[str] = None,
record_type: str = "new",
duration_range: Optional[str] = None,
page: int = 1,
per_page: int = 50,
) -> Dict[str, Any]:
"""Derive trend, reason_pareto, duration, and list from cached DataFrame."""
if df is None or df.empty:
return _empty_views()
# Trend uses full DF (no record_type/reason/duration filter, all hold_types)
trend = _derive_trend(df)
# Apply record_type filter for pareto, duration, list
filtered = _apply_record_type_filter(df, record_type)
# Apply hold_type filter
if hold_type != "all":
ht_value = "non-quality" if hold_type == "non-quality" else "quality"
filtered = filtered[filtered["HOLD_TYPE"] == ht_value]
reason_pareto = _derive_reason_pareto(filtered)
duration = _derive_duration(filtered)
# List: additional reason + duration_range filters
list_df = filtered
if reason:
list_df = list_df[
list_df["HOLDREASONNAME"].str.strip() == reason.strip()
]
if duration_range:
list_df = _apply_duration_range_filter(list_df, duration_range)
detail = _derive_list(list_df, page=page, per_page=per_page)
return {
"trend": trend,
"reason_pareto": reason_pareto,
"duration": duration,
"list": detail,
}
def _empty_views() -> Dict[str, Any]:
return {
"trend": {"days": []},
"reason_pareto": {"items": []},
"duration": {"items": []},
"list": {
"items": [],
"pagination": {
"page": 1,
"perPage": 50,
"total": 0,
"totalPages": 1,
},
},
}
# ============================================================
# Record-type & duration-range filters
# ============================================================
def _apply_record_type_filter(
df: pd.DataFrame, record_type: str
) -> pd.DataFrame:
if df is None or df.empty:
return df
types = {t.strip().lower() for t in str(record_type or "new").split(",")}
if types >= {"new", "on_hold", "released"}:
return df
mask = pd.Series(False, index=df.index)
if "new" in types:
if "_QUERY_START" in df.columns and "_QUERY_END" in df.columns:
qs = df["_QUERY_START"].iloc[0]
qe = df["_QUERY_END"].iloc[0]
mask |= (df["HOLD_DAY"] >= qs) & (df["HOLD_DAY"] <= qe)
else:
mask |= pd.Series(True, index=df.index)
if "on_hold" in types:
mask |= df["RELEASETXNDATE"].isna()
if "released" in types:
mask |= df["RELEASETXNDATE"].notna()
return df[mask]
def _apply_duration_range_filter(
df: pd.DataFrame, duration_range: str
) -> pd.DataFrame:
if df is None or df.empty or not duration_range:
return df
hours = df["HOLD_HOURS"]
if duration_range == "<4h":
return df[hours < 4]
if duration_range == "4-24h":
return df[(hours >= 4) & (hours < 24)]
if duration_range == "1-3d":
return df[(hours >= 24) & (hours < 72)]
if duration_range == ">3d":
return df[hours >= 72]
return df
# ============================================================
# Derivation: Trend
# ============================================================
def _derive_trend(df: pd.DataFrame) -> Dict[str, Any]:
"""Derive daily trend with quality / non_quality / all variants."""
if df is None or df.empty:
return {"days": []}
if "_QUERY_START" in df.columns:
start = pd.Timestamp(df["_QUERY_START"].iloc[0])
end = pd.Timestamp(df["_QUERY_END"].iloc[0])
else:
start = df["HOLD_DAY"].min()
end = df["HOLD_DAY"].max()
dates = pd.date_range(start, end, freq="D")
type_map: List[tuple] = [
("quality", "quality"),
("non_quality", "non-quality"),
("all", None),
]
days: List[Dict[str, Any]] = []
for d in dates:
day_data: Dict[str, Any] = {"date": d.strftime("%Y-%m-%d")}
for key, type_filter in type_map:
tdf = df if type_filter is None else df[df["HOLD_TYPE"] == type_filter]
if tdf.empty:
day_data[key] = _empty_trend_metrics()
continue
# holdQty: total QTY on hold as of this day
on_hold = (tdf["HOLD_DAY"] <= d) & (
tdf["RELEASE_DAY"].isna() | (tdf["RELEASE_DAY"] > d)
)
hold_qty = _safe_int(tdf.loc[on_hold, "QTY"].sum())
# newHoldQty: QTY of new holds arriving this day (dedup)
new_mask = (tdf["HOLD_DAY"] == d) & (tdf["RN_HOLD_DAY"] == 1)
new_hold_qty = _safe_int(tdf.loc[new_mask, "QTY"].sum())
# releaseQty: QTY released on this day
release_mask = tdf["RELEASE_DAY"] == d
release_qty = _safe_int(tdf.loc[release_mask, "QTY"].sum())
# futureHoldQty: QTY of future holds on this day
future_mask = (
(tdf["HOLD_DAY"] == d)
& (tdf["IS_FUTURE_HOLD"] == 1)
& (tdf["FUTURE_HOLD_FLAG"] == 1)
)
future_hold_qty = _safe_int(tdf.loc[future_mask, "QTY"].sum())
day_data[key] = {
"holdQty": hold_qty,
"newHoldQty": new_hold_qty,
"releaseQty": release_qty,
"futureHoldQty": future_hold_qty,
}
days.append(day_data)
return {"days": days}
def _empty_trend_metrics() -> Dict[str, int]:
return {"holdQty": 0, "newHoldQty": 0, "releaseQty": 0, "futureHoldQty": 0}
# ============================================================
# Derivation: Reason Pareto
# ============================================================
def _derive_reason_pareto(df: pd.DataFrame) -> Dict[str, Any]:
"""Group by HOLDREASONNAME -> count, qty, pct, cumPct."""
if df is None or df.empty:
return {"items": []}
grouped = (
df.groupby("HOLDREASONNAME", sort=False)
.agg(count=("CONTAINERID", "count"), qty=("QTY", "sum"))
.reset_index()
)
grouped = grouped.sort_values("qty", ascending=False)
total_qty = grouped["qty"].sum()
items: List[Dict[str, Any]] = []
cumulative = 0.0
for _, row in grouped.iterrows():
count = _safe_int(row["count"])
qty = _safe_int(row["qty"])
pct = round((qty / total_qty * 100) if total_qty > 0 else 0, 2)
cumulative += pct
items.append(
{
"reason": _clean_text(row["HOLDREASONNAME"]) or "(未填寫)",
"count": count,
"qty": qty,
"pct": pct,
"cumPct": round(cumulative, 2),
}
)
return {"items": items}
# ============================================================
# Derivation: Duration
# ============================================================
def _derive_duration(df: pd.DataFrame) -> Dict[str, Any]:
"""Bucket released holds into <4h / 4-24h / 1-3d / >3d."""
if df is None or df.empty:
return {"items": []}
released = df[df["RELEASETXNDATE"].notna()]
if released.empty:
return {"items": []}
hours = released["HOLD_HOURS"]
total_qty = _safe_int(released["QTY"].sum())
buckets = [
("<4h", hours < 4),
("4-24h", (hours >= 4) & (hours < 24)),
("1-3d", (hours >= 24) & (hours < 72)),
(">3d", hours >= 72),
]
items: List[Dict[str, Any]] = []
for label, mask in buckets:
count = int(mask.sum())
qty = _safe_int(released.loc[mask, "QTY"].sum())
pct = round((qty / total_qty * 100) if total_qty > 0 else 0, 2)
items.append({"range": label, "count": count, "qty": qty, "pct": pct})
return {"items": items}
# ============================================================
# Derivation: Paginated list
# ============================================================
def _derive_list(
df: pd.DataFrame,
*,
page: int = 1,
per_page: int = 50,
) -> Dict[str, Any]:
"""Sort by HOLDTXNDATE desc and paginate."""
if df is None or df.empty:
return {
"items": [],
"pagination": {
"page": 1,
"perPage": per_page,
"total": 0,
"totalPages": 1,
},
}
page = max(int(page), 1)
per_page = min(max(int(per_page), 1), 200)
sorted_df = df.sort_values("HOLDTXNDATE", ascending=False)
total = len(sorted_df)
total_pages = max((total + per_page - 1) // per_page, 1)
offset = (page - 1) * per_page
page_df = sorted_df.iloc[offset : offset + per_page]
items: List[Dict[str, Any]] = []
for _, row in page_df.iterrows():
wc_name = _clean_text(row.get("WORKCENTERNAME"))
wc_group = _get_wc_group(wc_name) if wc_name else None
items.append(
{
"lotId": _clean_text(row.get("LOT_ID")),
"workorder": _clean_text(row.get("PJ_WORKORDER")),
"product": _clean_text(row.get("PRODUCTNAME")),
"workcenter": wc_group or wc_name,
"holdReason": _clean_text(row.get("HOLDREASONNAME")),
"qty": _safe_int(row.get("QTY")),
"holdDate": _format_datetime(row.get("HOLDTXNDATE")),
"holdEmp": _clean_text(row.get("HOLDEMP")),
"holdComment": _clean_text(row.get("HOLDCOMMENTS")),
"releaseDate": _format_datetime(row.get("RELEASETXNDATE")),
"releaseEmp": _clean_text(row.get("RELEASEEMP")),
"releaseComment": _clean_text(row.get("RELEASECOMMENTS")),
"holdHours": round(_safe_float(row.get("HOLD_HOURS")), 2),
"ncr": _clean_text(row.get("NCRID")),
"futureHoldComment": _clean_text(row.get("FUTUREHOLDCOMMENTS")),
}
)
return {
"items": items,
"pagination": {
"page": page,
"perPage": per_page,
"total": total,
"totalPages": total_pages,
},
}