# -*- coding: utf-8 -*- """Two-phase hold-history dataset cache. Primary query (POST /query) → Oracle → cache full hold/release DataFrame. Supplementary view (GET /view) → read cache → pandas filter/derive. Cache layers: L1: ProcessLevelCache (in-process, per-worker) L2: Redis (cross-worker, parquet bytes encoded as base64 string) """ from __future__ import annotations import hashlib import json import logging from functools import lru_cache from pathlib import Path from typing import Any, Dict, List, Optional import pandas as pd from mes_dashboard.core.cache import ProcessLevelCache, register_process_cache from mes_dashboard.core.database import read_sql_df_slow as read_sql_df from mes_dashboard.core.redis_df_store import redis_load_df, redis_store_df from mes_dashboard.services.filter_cache import get_workcenter_group as _get_wc_group from mes_dashboard.services.hold_history_service import ( _clean_text, _format_datetime, _safe_float, _safe_int, ) from mes_dashboard.sql.filters import CommonFilters logger = logging.getLogger("mes_dashboard.hold_dataset_cache") _CACHE_TTL = 900 # 15 minutes _CACHE_MAX_SIZE = 8 _REDIS_NAMESPACE = "hold_dataset" _dataset_cache = ProcessLevelCache(ttl_seconds=_CACHE_TTL, max_size=_CACHE_MAX_SIZE) register_process_cache("hold_dataset", _dataset_cache, "Hold Dataset (L1, 15min)") _SQL_DIR = Path(__file__).resolve().parent.parent / "sql" / "hold_history" # ============================================================ # SQL loading # ============================================================ @lru_cache(maxsize=4) def _load_sql(name: str) -> str: path = _SQL_DIR / f"{name}.sql" sql = path.read_text(encoding="utf-8") if "{{ NON_QUALITY_REASONS }}" in sql: sql = sql.replace( "{{ NON_QUALITY_REASONS }}", CommonFilters.get_non_quality_reasons_sql(), ) return sql # ============================================================ # Query ID # ============================================================ def _make_query_id(params: dict) -> str: """Deterministic hash from primary query params.""" canonical = json.dumps(params, sort_keys=True, ensure_ascii=False, default=str) return hashlib.sha256(canonical.encode("utf-8")).hexdigest()[:16] # ============================================================ # Redis L2 helpers (delegated to shared redis_df_store) # ============================================================ def _redis_store_df(query_id: str, df: pd.DataFrame) -> None: redis_store_df(f"{_REDIS_NAMESPACE}:{query_id}", df, ttl=_CACHE_TTL) def _redis_load_df(query_id: str) -> Optional[pd.DataFrame]: return redis_load_df(f"{_REDIS_NAMESPACE}:{query_id}") # ============================================================ # Cache read (L1 -> L2 -> None) # ============================================================ def _get_cached_df(query_id: str) -> Optional[pd.DataFrame]: """Read cache: L1 hit -> return, L1 miss -> L2 -> write L1 -> return.""" df = _dataset_cache.get(query_id) if df is not None: return df df = _redis_load_df(query_id) if df is not None: _dataset_cache.set(query_id, df) return df def _store_df(query_id: str, df: pd.DataFrame) -> None: """Write to L1 and L2.""" _dataset_cache.set(query_id, df) _redis_store_df(query_id, df) # ============================================================ # Primary query # ============================================================ def execute_primary_query( *, start_date: str, end_date: str, hold_type: str = "quality", record_type: str = "new", ) -> Dict[str, Any]: """Execute Oracle query -> cache DataFrame -> return structured result.""" query_id = _make_query_id({"start_date": start_date, "end_date": end_date}) cached_df = _get_cached_df(query_id) if cached_df is not None: logger.info("Hold dataset cache hit for query_id=%s", query_id) else: logger.info( "Hold dataset cache miss for query_id=%s, querying Oracle", query_id ) from mes_dashboard.services.batch_query_engine import ( decompose_by_time_range, execute_plan, merge_chunks, compute_query_hash, should_decompose_by_time, ) if should_decompose_by_time(start_date, end_date): # --- Engine path for long date ranges --- engine_chunks = decompose_by_time_range(start_date, end_date) engine_hash = compute_query_hash( {"start_date": start_date, "end_date": end_date} ) base_sql = _load_sql("base_facts") def _run_hold_chunk(chunk, max_rows_per_chunk=None): params = { "start_date": chunk["chunk_start"], "end_date": chunk["chunk_end"], } result = read_sql_df(base_sql, params) return result if result is not None else pd.DataFrame() logger.info( "Engine activated for hold: %d chunks (query_id=%s)", len(engine_chunks), query_id, ) execute_plan( engine_chunks, _run_hold_chunk, query_hash=engine_hash, cache_prefix="hold", chunk_ttl=_CACHE_TTL, ) df = merge_chunks("hold", engine_hash) else: # --- Direct path (short query) --- sql = _load_sql("base_facts") params = {"start_date": start_date, "end_date": end_date} df = read_sql_df(sql, params) if df is None: df = pd.DataFrame() if not df.empty: df["_QUERY_START"] = pd.Timestamp(start_date) df["_QUERY_END"] = pd.Timestamp(end_date) _store_df(query_id, df) cached_df = df views = _derive_all_views( cached_df, hold_type=hold_type, record_type=record_type, page=1, per_page=50, ) return {"query_id": query_id, **views} # ============================================================ # View (supplementary filtering on cache) # ============================================================ def apply_view( *, query_id: str, hold_type: str = "quality", reason: Optional[str] = None, record_type: str = "new", duration_range: Optional[str] = None, page: int = 1, per_page: int = 50, ) -> Optional[Dict[str, Any]]: """Read cache -> apply filters -> return derived data. Returns None if expired.""" df = _get_cached_df(query_id) if df is None: return None return _derive_all_views( df, hold_type=hold_type, reason=reason, record_type=record_type, duration_range=duration_range, page=page, per_page=per_page, ) # ============================================================ # Master derivation # ============================================================ def _derive_all_views( df: pd.DataFrame, *, hold_type: str = "quality", reason: Optional[str] = None, record_type: str = "new", duration_range: Optional[str] = None, page: int = 1, per_page: int = 50, ) -> Dict[str, Any]: """Derive trend, reason_pareto, duration, and list from cached DataFrame.""" if df is None or df.empty: return _empty_views() # Trend uses full DF (no record_type/reason/duration filter, all hold_types) trend = _derive_trend(df) # Apply record_type filter for pareto, duration, list filtered = _apply_record_type_filter(df, record_type) # Apply hold_type filter if hold_type != "all": ht_value = "non-quality" if hold_type == "non-quality" else "quality" filtered = filtered[filtered["HOLD_TYPE"] == ht_value] reason_pareto = _derive_reason_pareto(filtered) duration = _derive_duration(filtered) # List: additional reason + duration_range filters list_df = filtered if reason: list_df = list_df[ list_df["HOLDREASONNAME"].str.strip() == reason.strip() ] if duration_range: list_df = _apply_duration_range_filter(list_df, duration_range) detail = _derive_list(list_df, page=page, per_page=per_page) return { "trend": trend, "reason_pareto": reason_pareto, "duration": duration, "list": detail, } def _empty_views() -> Dict[str, Any]: return { "trend": {"days": []}, "reason_pareto": {"items": []}, "duration": {"items": []}, "list": { "items": [], "pagination": { "page": 1, "perPage": 50, "total": 0, "totalPages": 1, }, }, } # ============================================================ # Record-type & duration-range filters # ============================================================ def _apply_record_type_filter( df: pd.DataFrame, record_type: str ) -> pd.DataFrame: if df is None or df.empty: return df types = {t.strip().lower() for t in str(record_type or "new").split(",")} if types >= {"new", "on_hold", "released"}: return df mask = pd.Series(False, index=df.index) if "new" in types: if "_QUERY_START" in df.columns and "_QUERY_END" in df.columns: qs = df["_QUERY_START"].iloc[0] qe = df["_QUERY_END"].iloc[0] mask |= (df["HOLD_DAY"] >= qs) & (df["HOLD_DAY"] <= qe) else: mask |= pd.Series(True, index=df.index) if "on_hold" in types: mask |= df["RELEASETXNDATE"].isna() if "released" in types: mask |= df["RELEASETXNDATE"].notna() return df[mask] def _apply_duration_range_filter( df: pd.DataFrame, duration_range: str ) -> pd.DataFrame: if df is None or df.empty or not duration_range: return df hours = df["HOLD_HOURS"] if duration_range == "<4h": return df[hours < 4] if duration_range == "4-24h": return df[(hours >= 4) & (hours < 24)] if duration_range == "1-3d": return df[(hours >= 24) & (hours < 72)] if duration_range == ">3d": return df[hours >= 72] return df # ============================================================ # Derivation: Trend # ============================================================ def _derive_trend(df: pd.DataFrame) -> Dict[str, Any]: """Derive daily trend with quality / non_quality / all variants.""" if df is None or df.empty: return {"days": []} if "_QUERY_START" in df.columns: start = pd.Timestamp(df["_QUERY_START"].iloc[0]) end = pd.Timestamp(df["_QUERY_END"].iloc[0]) else: start = df["HOLD_DAY"].min() end = df["HOLD_DAY"].max() dates = pd.date_range(start, end, freq="D") type_map: List[tuple] = [ ("quality", "quality"), ("non_quality", "non-quality"), ("all", None), ] days: List[Dict[str, Any]] = [] for d in dates: day_data: Dict[str, Any] = {"date": d.strftime("%Y-%m-%d")} for key, type_filter in type_map: tdf = df if type_filter is None else df[df["HOLD_TYPE"] == type_filter] if tdf.empty: day_data[key] = _empty_trend_metrics() continue # holdQty: total QTY on hold as of this day on_hold = (tdf["HOLD_DAY"] <= d) & ( tdf["RELEASE_DAY"].isna() | (tdf["RELEASE_DAY"] > d) ) hold_qty = _safe_int(tdf.loc[on_hold, "QTY"].sum()) # newHoldQty: QTY of new holds arriving this day (dedup) new_mask = (tdf["HOLD_DAY"] == d) & (tdf["RN_HOLD_DAY"] == 1) new_hold_qty = _safe_int(tdf.loc[new_mask, "QTY"].sum()) # releaseQty: QTY released on this day release_mask = tdf["RELEASE_DAY"] == d release_qty = _safe_int(tdf.loc[release_mask, "QTY"].sum()) # futureHoldQty: QTY of future holds on this day future_mask = ( (tdf["HOLD_DAY"] == d) & (tdf["IS_FUTURE_HOLD"] == 1) & (tdf["FUTURE_HOLD_FLAG"] == 1) ) future_hold_qty = _safe_int(tdf.loc[future_mask, "QTY"].sum()) day_data[key] = { "holdQty": hold_qty, "newHoldQty": new_hold_qty, "releaseQty": release_qty, "futureHoldQty": future_hold_qty, } days.append(day_data) return {"days": days} def _empty_trend_metrics() -> Dict[str, int]: return {"holdQty": 0, "newHoldQty": 0, "releaseQty": 0, "futureHoldQty": 0} # ============================================================ # Derivation: Reason Pareto # ============================================================ def _derive_reason_pareto(df: pd.DataFrame) -> Dict[str, Any]: """Group by HOLDREASONNAME -> count, qty, pct, cumPct.""" if df is None or df.empty: return {"items": []} grouped = ( df.groupby("HOLDREASONNAME", sort=False) .agg(count=("CONTAINERID", "count"), qty=("QTY", "sum")) .reset_index() ) grouped = grouped.sort_values("qty", ascending=False) total_qty = grouped["qty"].sum() items: List[Dict[str, Any]] = [] cumulative = 0.0 for _, row in grouped.iterrows(): count = _safe_int(row["count"]) qty = _safe_int(row["qty"]) pct = round((qty / total_qty * 100) if total_qty > 0 else 0, 2) cumulative += pct items.append( { "reason": _clean_text(row["HOLDREASONNAME"]) or "(未填寫)", "count": count, "qty": qty, "pct": pct, "cumPct": round(cumulative, 2), } ) return {"items": items} # ============================================================ # Derivation: Duration # ============================================================ def _derive_duration(df: pd.DataFrame) -> Dict[str, Any]: """Bucket released holds into <4h / 4-24h / 1-3d / >3d.""" if df is None or df.empty: return {"items": []} released = df[df["RELEASETXNDATE"].notna()] if released.empty: return {"items": []} hours = released["HOLD_HOURS"] total_qty = _safe_int(released["QTY"].sum()) buckets = [ ("<4h", hours < 4), ("4-24h", (hours >= 4) & (hours < 24)), ("1-3d", (hours >= 24) & (hours < 72)), (">3d", hours >= 72), ] items: List[Dict[str, Any]] = [] for label, mask in buckets: count = int(mask.sum()) qty = _safe_int(released.loc[mask, "QTY"].sum()) pct = round((qty / total_qty * 100) if total_qty > 0 else 0, 2) items.append({"range": label, "count": count, "qty": qty, "pct": pct}) return {"items": items} # ============================================================ # Derivation: Paginated list # ============================================================ def _derive_list( df: pd.DataFrame, *, page: int = 1, per_page: int = 50, ) -> Dict[str, Any]: """Sort by HOLDTXNDATE desc and paginate.""" if df is None or df.empty: return { "items": [], "pagination": { "page": 1, "perPage": per_page, "total": 0, "totalPages": 1, }, } page = max(int(page), 1) per_page = min(max(int(per_page), 1), 200) sorted_df = df.sort_values("HOLDTXNDATE", ascending=False) total = len(sorted_df) total_pages = max((total + per_page - 1) // per_page, 1) offset = (page - 1) * per_page page_df = sorted_df.iloc[offset : offset + per_page] items: List[Dict[str, Any]] = [] for _, row in page_df.iterrows(): wc_name = _clean_text(row.get("WORKCENTERNAME")) wc_group = _get_wc_group(wc_name) if wc_name else None items.append( { "lotId": _clean_text(row.get("LOT_ID")), "workorder": _clean_text(row.get("PJ_WORKORDER")), "product": _clean_text(row.get("PRODUCTNAME")), "workcenter": wc_group or wc_name, "holdReason": _clean_text(row.get("HOLDREASONNAME")), "qty": _safe_int(row.get("QTY")), "holdDate": _format_datetime(row.get("HOLDTXNDATE")), "holdEmp": _clean_text(row.get("HOLDEMP")), "holdComment": _clean_text(row.get("HOLDCOMMENTS")), "releaseDate": _format_datetime(row.get("RELEASETXNDATE")), "releaseEmp": _clean_text(row.get("RELEASEEMP")), "releaseComment": _clean_text(row.get("RELEASECOMMENTS")), "holdHours": round(_safe_float(row.get("HOLD_HOURS")), 2), "ncr": _clean_text(row.get("NCRID")), "futureHoldComment": _clean_text(row.get("FUTUREHOLDCOMMENTS")), } ) return { "items": items, "pagination": { "page": page, "perPage": per_page, "total": total, "totalPages": total_pages, }, }