feat: dataset cache for hold/resource history + slow connection migration

Two changes combined: 1. historical-query-slow-connection: Migrate all historical query pages to read_sql_df_slow with semaphore concurrency control (max 3), raise DB slow timeout to 300s, gunicorn timeout to 360s, and unify frontend timeouts to 360s for all historical pages. 2. hold-resource-history-dataset-cache: Convert hold-history and resource-history from multi-query to single-query + dataset cache pattern (L1 ProcessLevelCache + L2 Redis parquet/base64, TTL=900s). Replace old GET endpoints with POST /query + GET /view two-phase API. Frontend auto-retries on 410 cache_expired. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 13:15:02 +08:00
parent cd061e0cfd
commit 71c8102de6
64 changed files with 3806 additions and 1442 deletions
--- a/src/mes_dashboard/services/resource_dataset_cache.py
+++ b/src/mes_dashboard/services/resource_dataset_cache.py
@@ -0,0 +1,685 @@
+# -*- coding: utf-8 -*-
+"""Two-phase resource-history dataset cache.
+
+Primary query (POST /query) → Oracle → cache full per-resource × per-day DataFrame.
+Supplementary view (GET /view) → read cache → pandas derive kpi/trend/heatmap/comparison/detail.
+
+Cache layers:
+  L1: ProcessLevelCache (in-process, per-worker)
+  L2: Redis (cross-worker, parquet bytes encoded as base64 string)
+"""
+
+from __future__ import annotations
+
+import base64
+import hashlib
+import io
+import json
+import logging
+from functools import lru_cache
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import pandas as pd
+
+from mes_dashboard.core.cache import ProcessLevelCache, register_process_cache
+from mes_dashboard.core.database import read_sql_df_slow as read_sql_df
+from mes_dashboard.core.redis_client import (
+    REDIS_ENABLED,
+    get_key,
+    get_redis_client,
+)
+
+logger = logging.getLogger("mes_dashboard.resource_dataset_cache")
+
+_CACHE_TTL = 900  # 15 minutes
+_CACHE_MAX_SIZE = 8
+_REDIS_NAMESPACE = "resource_dataset"
+
+_dataset_cache = ProcessLevelCache(ttl_seconds=_CACHE_TTL, max_size=_CACHE_MAX_SIZE)
+register_process_cache(
+    "resource_dataset", _dataset_cache, "Resource Dataset (L1, 15min)"
+)
+
+_SQL_DIR = Path(__file__).resolve().parent.parent / "sql" / "resource_history"
+
+
+# ============================================================
+# SQL loading
+# ============================================================
+
+
+@lru_cache(maxsize=4)
+def _load_sql(name: str) -> str:
+    path = _SQL_DIR / f"{name}.sql"
+    return path.read_text(encoding="utf-8")
+
+
+# ============================================================
+# Query ID
+# ============================================================
+
+
+def _make_query_id(params: dict) -> str:
+    """Deterministic hash from primary query params."""
+    canonical = json.dumps(params, sort_keys=True, ensure_ascii=False, default=str)
+    return hashlib.sha256(canonical.encode("utf-8")).hexdigest()[:16]
+
+
+# ============================================================
+# Redis L2 helpers (parquet <-> base64 string)
+# ============================================================
+
+
+def _redis_key(query_id: str) -> str:
+    return get_key(f"{_REDIS_NAMESPACE}:{query_id}")
+
+
+def _redis_store_df(query_id: str, df: pd.DataFrame) -> None:
+    if not REDIS_ENABLED:
+        return
+    client = get_redis_client()
+    if client is None:
+        return
+    try:
+        buf = io.BytesIO()
+        df.to_parquet(buf, engine="pyarrow", index=False)
+        encoded = base64.b64encode(buf.getvalue()).decode("ascii")
+        client.setex(_redis_key(query_id), _CACHE_TTL, encoded)
+    except Exception as exc:
+        logger.warning("Failed to store DataFrame in Redis: %s", exc)
+
+
+def _redis_load_df(query_id: str) -> Optional[pd.DataFrame]:
+    if not REDIS_ENABLED:
+        return None
+    client = get_redis_client()
+    if client is None:
+        return None
+    try:
+        encoded = client.get(_redis_key(query_id))
+        if encoded is None:
+            return None
+        raw = base64.b64decode(encoded)
+        return pd.read_parquet(io.BytesIO(raw), engine="pyarrow")
+    except Exception as exc:
+        logger.warning("Failed to load DataFrame from Redis: %s", exc)
+        return None
+
+
+# ============================================================
+# Cache read (L1 -> L2 -> None)
+# ============================================================
+
+
+def _get_cached_df(query_id: str) -> Optional[pd.DataFrame]:
+    df = _dataset_cache.get(query_id)
+    if df is not None:
+        return df
+    df = _redis_load_df(query_id)
+    if df is not None:
+        _dataset_cache.set(query_id, df)
+    return df
+
+
+def _store_df(query_id: str, df: pd.DataFrame) -> None:
+    _dataset_cache.set(query_id, df)
+    _redis_store_df(query_id, df)
+
+
+# ============================================================
+# Resource cache integration (reuse existing service)
+# ============================================================
+
+
+def _get_filtered_resources_and_lookup(
+    *,
+    workcenter_groups: Optional[List[str]] = None,
+    families: Optional[List[str]] = None,
+    resource_ids: Optional[List[str]] = None,
+    is_production: bool = False,
+    is_key: bool = False,
+    is_monitor: bool = False,
+) -> tuple:
+    """Returns (resources_list, resource_lookup_dict, historyid_filter_sql)."""
+    from mes_dashboard.services.resource_history_service import (
+        _get_filtered_resources,
+        _build_resource_lookup,
+        _build_historyid_filter,
+    )
+
+    resources = _get_filtered_resources(
+        workcenter_groups=workcenter_groups,
+        families=families,
+        resource_ids=resource_ids,
+        is_production=is_production,
+        is_key=is_key,
+        is_monitor=is_monitor,
+    )
+    lookup = _build_resource_lookup(resources)
+    historyid_filter = _build_historyid_filter(resources)
+    return resources, lookup, historyid_filter
+
+
+def _get_resource_lookup() -> Dict[str, Dict[str, Any]]:
+    """Get current resource lookup from cache (for view-time dimension merge)."""
+    from mes_dashboard.services.resource_history_service import (
+        _get_filtered_resources,
+        _build_resource_lookup,
+    )
+
+    resources = _get_filtered_resources()
+    return _build_resource_lookup(resources)
+
+
+def _get_workcenter_mapping() -> Dict[str, Dict[str, Any]]:
+    from mes_dashboard.services.filter_cache import get_workcenter_mapping
+
+    return get_workcenter_mapping() or {}
+
+
+# ============================================================
+# Primary query
+# ============================================================
+
+
+def execute_primary_query(
+    *,
+    start_date: str,
+    end_date: str,
+    granularity: str = "day",
+    workcenter_groups: Optional[List[str]] = None,
+    families: Optional[List[str]] = None,
+    resource_ids: Optional[List[str]] = None,
+    is_production: bool = False,
+    is_key: bool = False,
+    is_monitor: bool = False,
+) -> Dict[str, Any]:
+    """Execute single Oracle query -> cache DataFrame -> return structured result."""
+
+    query_id_input = {
+        "start_date": start_date,
+        "end_date": end_date,
+        "workcenter_groups": sorted(workcenter_groups or []),
+        "families": sorted(families or []),
+        "resource_ids": sorted(resource_ids or []),
+        "is_production": is_production,
+        "is_key": is_key,
+        "is_monitor": is_monitor,
+    }
+    query_id = _make_query_id(query_id_input)
+
+    cached_df = _get_cached_df(query_id)
+    if cached_df is not None:
+        logger.info("Resource dataset cache hit for query_id=%s", query_id)
+    else:
+        logger.info(
+            "Resource dataset cache miss for query_id=%s, querying Oracle", query_id
+        )
+
+        resources, lookup, historyid_filter = _get_filtered_resources_and_lookup(
+            workcenter_groups=workcenter_groups,
+            families=families,
+            resource_ids=resource_ids,
+            is_production=is_production,
+            is_key=is_key,
+            is_monitor=is_monitor,
+        )
+
+        if not resources:
+            return {
+                "query_id": query_id,
+                "summary": _empty_summary(),
+                "detail": _empty_detail(),
+            }
+
+        sql = _load_sql("base_facts")
+        sql = sql.replace("{{ HISTORYID_FILTER }}", historyid_filter)
+        params = {"start_date": start_date, "end_date": end_date}
+        df = read_sql_df(sql, params)
+        if df is None:
+            df = pd.DataFrame()
+
+        if not df.empty:
+            _store_df(query_id, df)
+
+        cached_df = df
+
+    resource_lookup = _get_resource_lookup()
+    wc_mapping = _get_workcenter_mapping()
+
+    summary = _derive_summary(cached_df, resource_lookup, wc_mapping, granularity)
+    detail = _derive_detail(cached_df, resource_lookup, wc_mapping)
+
+    return {
+        "query_id": query_id,
+        "summary": summary,
+        "detail": detail,
+    }
+
+
+# ============================================================
+# View (supplementary — cache only)
+# ============================================================
+
+
+def apply_view(
+    *,
+    query_id: str,
+    granularity: str = "day",
+) -> Optional[Dict[str, Any]]:
+    """Read cache -> derive views. Returns None if expired."""
+    df = _get_cached_df(query_id)
+    if df is None:
+        return None
+
+    resource_lookup = _get_resource_lookup()
+    wc_mapping = _get_workcenter_mapping()
+
+    summary = _derive_summary(df, resource_lookup, wc_mapping, granularity)
+    detail = _derive_detail(df, resource_lookup, wc_mapping)
+
+    return {
+        "summary": summary,
+        "detail": detail,
+    }
+
+
+# ============================================================
+# Master derivation
+# ============================================================
+
+
+def _derive_summary(
+    df: pd.DataFrame,
+    resource_lookup: Dict[str, Dict[str, Any]],
+    wc_mapping: Dict[str, Dict[str, Any]],
+    granularity: str,
+) -> Dict[str, Any]:
+    if df is None or df.empty:
+        return _empty_summary()
+
+    return {
+        "kpi": _derive_kpi(df),
+        "trend": _derive_trend(df, granularity),
+        "heatmap": _derive_heatmap(df, resource_lookup, wc_mapping, granularity),
+        "workcenter_comparison": _derive_comparison(df, resource_lookup, wc_mapping),
+    }
+
+
+def _empty_summary() -> Dict[str, Any]:
+    return {
+        "kpi": _empty_kpi(),
+        "trend": [],
+        "heatmap": [],
+        "workcenter_comparison": [],
+    }
+
+
+def _empty_detail() -> Dict[str, Any]:
+    return {"data": [], "total": 0, "truncated": False, "max_records": None}
+
+
+def _empty_kpi() -> Dict[str, Any]:
+    return {
+        "ou_pct": 0,
+        "availability_pct": 0,
+        "prd_hours": 0,
+        "prd_pct": 0,
+        "sby_hours": 0,
+        "sby_pct": 0,
+        "udt_hours": 0,
+        "udt_pct": 0,
+        "sdt_hours": 0,
+        "sdt_pct": 0,
+        "egt_hours": 0,
+        "egt_pct": 0,
+        "nst_hours": 0,
+        "nst_pct": 0,
+        "machine_count": 0,
+    }
+
+
+# ============================================================
+# Helpers (reuse existing formulas)
+# ============================================================
+
+
+def _sf(value, default=0.0) -> float:
+    """Safe float."""
+    if value is None or pd.isna(value):
+        return default
+    return float(value)
+
+
+def _calc_ou_pct(prd, sby, udt, sdt, egt) -> float:
+    denom = prd + sby + udt + sdt + egt
+    return round(prd / denom * 100, 1) if denom > 0 else 0
+
+
+def _calc_avail_pct(prd, sby, udt, sdt, egt, nst) -> float:
+    num = prd + sby + egt
+    denom = prd + sby + egt + sdt + udt + nst
+    return round(num / denom * 100, 1) if denom > 0 else 0
+
+
+def _status_pct(val, total) -> float:
+    return round(val / total * 100, 1) if total > 0 else 0
+
+
+def _trunc_date(dt, granularity: str) -> str:
+    """Truncate a date value to the given granularity period string."""
+    if pd.isna(dt):
+        return ""
+    ts = pd.Timestamp(dt)
+    if granularity == "year":
+        return ts.strftime("%Y")
+    if granularity == "month":
+        return ts.strftime("%Y-%m")
+    if granularity == "week":
+        return (ts - pd.Timedelta(days=ts.weekday())).strftime("%Y-%m-%d")
+    return ts.strftime("%Y-%m-%d")
+
+
+# ============================================================
+# Derivation: KPI
+# ============================================================
+
+
+def _derive_kpi(df: pd.DataFrame) -> Dict[str, Any]:
+    if df is None or df.empty:
+        return _empty_kpi()
+
+    prd = _sf(df["PRD_HOURS"].sum())
+    sby = _sf(df["SBY_HOURS"].sum())
+    udt = _sf(df["UDT_HOURS"].sum())
+    sdt = _sf(df["SDT_HOURS"].sum())
+    egt = _sf(df["EGT_HOURS"].sum())
+    nst = _sf(df["NST_HOURS"].sum())
+    total = prd + sby + udt + sdt + egt + nst
+    machine_count = int(df["HISTORYID"].nunique())
+
+    return {
+        "ou_pct": _calc_ou_pct(prd, sby, udt, sdt, egt),
+        "availability_pct": _calc_avail_pct(prd, sby, udt, sdt, egt, nst),
+        "prd_hours": round(prd, 1),
+        "prd_pct": _status_pct(prd, total),
+        "sby_hours": round(sby, 1),
+        "sby_pct": _status_pct(sby, total),
+        "udt_hours": round(udt, 1),
+        "udt_pct": _status_pct(udt, total),
+        "sdt_hours": round(sdt, 1),
+        "sdt_pct": _status_pct(sdt, total),
+        "egt_hours": round(egt, 1),
+        "egt_pct": _status_pct(egt, total),
+        "nst_hours": round(nst, 1),
+        "nst_pct": _status_pct(nst, total),
+        "machine_count": machine_count,
+    }
+
+
+# ============================================================
+# Derivation: Trend
+# ============================================================
+
+
+def _derive_trend(df: pd.DataFrame, granularity: str) -> List[Dict[str, Any]]:
+    if df is None or df.empty:
+        return []
+
+    df = df.copy()
+    df["_period"] = df["DATA_DATE"].apply(lambda d: _trunc_date(d, granularity))
+
+    grouped = (
+        df.groupby("_period", sort=True)
+        .agg(
+            PRD_HOURS=("PRD_HOURS", "sum"),
+            SBY_HOURS=("SBY_HOURS", "sum"),
+            UDT_HOURS=("UDT_HOURS", "sum"),
+            SDT_HOURS=("SDT_HOURS", "sum"),
+            EGT_HOURS=("EGT_HOURS", "sum"),
+            NST_HOURS=("NST_HOURS", "sum"),
+        )
+        .reset_index()
+    )
+
+    items: List[Dict[str, Any]] = []
+    for _, row in grouped.iterrows():
+        prd = _sf(row["PRD_HOURS"])
+        sby = _sf(row["SBY_HOURS"])
+        udt = _sf(row["UDT_HOURS"])
+        sdt = _sf(row["SDT_HOURS"])
+        egt = _sf(row["EGT_HOURS"])
+        nst = _sf(row["NST_HOURS"])
+        items.append(
+            {
+                "date": row["_period"],
+                "ou_pct": _calc_ou_pct(prd, sby, udt, sdt, egt),
+                "availability_pct": _calc_avail_pct(prd, sby, udt, sdt, egt, nst),
+                "prd_hours": round(prd, 1),
+                "sby_hours": round(sby, 1),
+                "udt_hours": round(udt, 1),
+                "sdt_hours": round(sdt, 1),
+                "egt_hours": round(egt, 1),
+                "nst_hours": round(nst, 1),
+            }
+        )
+
+    return items
+
+
+# ============================================================
+# Derivation: Heatmap
+# ============================================================
+
+
+def _derive_heatmap(
+    df: pd.DataFrame,
+    resource_lookup: Dict[str, Dict[str, Any]],
+    wc_mapping: Dict[str, Dict[str, Any]],
+    granularity: str,
+) -> List[Dict[str, Any]]:
+    if df is None or df.empty:
+        return []
+
+    rows: List[Dict[str, Any]] = []
+    wc_seq_map: Dict[str, int] = {}
+
+    for _, row in df.iterrows():
+        historyid = row["HISTORYID"]
+        resource_info = resource_lookup.get(historyid, {})
+        if not resource_info:
+            continue
+
+        wc_name = resource_info.get("WORKCENTERNAME", "")
+        if not wc_name:
+            continue
+
+        wc_info = wc_mapping.get(wc_name, {})
+        wc_group = wc_info.get("group", wc_name)
+        wc_seq = wc_info.get("sequence", 999)
+        wc_seq_map[wc_group] = wc_seq
+        date_str = _trunc_date(row["DATA_DATE"], granularity)
+
+        rows.append(
+            {
+                "wc": wc_group,
+                "date": date_str,
+                "prd": _sf(row["PRD_HOURS"]),
+                "sby": _sf(row["SBY_HOURS"]),
+                "udt": _sf(row["UDT_HOURS"]),
+                "sdt": _sf(row["SDT_HOURS"]),
+                "egt": _sf(row["EGT_HOURS"]),
+            }
+        )
+
+    if not rows:
+        return []
+
+    tmp = pd.DataFrame(rows)
+    agg = (
+        tmp.groupby(["wc", "date"], sort=False)
+        .agg(prd=("prd", "sum"), sby=("sby", "sum"), udt=("udt", "sum"), sdt=("sdt", "sum"), egt=("egt", "sum"))
+        .reset_index()
+    )
+
+    items: List[Dict[str, Any]] = []
+    for _, r in agg.iterrows():
+        items.append(
+            {
+                "workcenter": r["wc"],
+                "workcenter_seq": wc_seq_map.get(r["wc"], 999),
+                "date": r["date"],
+                "ou_pct": _calc_ou_pct(r["prd"], r["sby"], r["udt"], r["sdt"], r["egt"]),
+            }
+        )
+
+    items.sort(key=lambda x: (x["workcenter_seq"], x["date"] or ""))
+    return items
+
+
+# ============================================================
+# Derivation: Workcenter Comparison
+# ============================================================
+
+
+def _derive_comparison(
+    df: pd.DataFrame,
+    resource_lookup: Dict[str, Dict[str, Any]],
+    wc_mapping: Dict[str, Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    if df is None or df.empty:
+        return []
+
+    # Aggregate by HISTORYID first
+    by_resource = (
+        df.groupby("HISTORYID", sort=False)
+        .agg(
+            prd=("PRD_HOURS", "sum"),
+            sby=("SBY_HOURS", "sum"),
+            udt=("UDT_HOURS", "sum"),
+            sdt=("SDT_HOURS", "sum"),
+            egt=("EGT_HOURS", "sum"),
+        )
+        .reset_index()
+    )
+
+    # Then aggregate by workcenter group
+    agg: Dict[str, Dict[str, float]] = {}
+    for _, row in by_resource.iterrows():
+        historyid = row["HISTORYID"]
+        resource_info = resource_lookup.get(historyid, {})
+        if not resource_info:
+            continue
+
+        wc_name = resource_info.get("WORKCENTERNAME", "")
+        if not wc_name:
+            continue
+
+        wc_info = wc_mapping.get(wc_name, {})
+        wc_group = wc_info.get("group", wc_name)
+
+        if wc_group not in agg:
+            agg[wc_group] = {"prd": 0, "sby": 0, "udt": 0, "sdt": 0, "egt": 0, "mc": 0}
+
+        agg[wc_group]["prd"] += _sf(row["prd"])
+        agg[wc_group]["sby"] += _sf(row["sby"])
+        agg[wc_group]["udt"] += _sf(row["udt"])
+        agg[wc_group]["sdt"] += _sf(row["sdt"])
+        agg[wc_group]["egt"] += _sf(row["egt"])
+        agg[wc_group]["mc"] += 1
+
+    items = [
+        {
+            "workcenter": wc,
+            "ou_pct": _calc_ou_pct(d["prd"], d["sby"], d["udt"], d["sdt"], d["egt"]),
+            "prd_hours": round(d["prd"], 1),
+            "machine_count": d["mc"],
+        }
+        for wc, d in agg.items()
+    ]
+    items.sort(key=lambda x: x["ou_pct"], reverse=True)
+    return items
+
+
+# ============================================================
+# Derivation: Detail
+# ============================================================
+
+
+def _derive_detail(
+    df: pd.DataFrame,
+    resource_lookup: Dict[str, Dict[str, Any]],
+    wc_mapping: Dict[str, Dict[str, Any]],
+) -> Dict[str, Any]:
+    if df is None or df.empty:
+        return _empty_detail()
+
+    by_resource = (
+        df.groupby("HISTORYID", sort=False)
+        .agg(
+            PRD_HOURS=("PRD_HOURS", "sum"),
+            SBY_HOURS=("SBY_HOURS", "sum"),
+            UDT_HOURS=("UDT_HOURS", "sum"),
+            SDT_HOURS=("SDT_HOURS", "sum"),
+            EGT_HOURS=("EGT_HOURS", "sum"),
+            NST_HOURS=("NST_HOURS", "sum"),
+            TOTAL_HOURS=("TOTAL_HOURS", "sum"),
+        )
+        .reset_index()
+    )
+
+    data: List[Dict[str, Any]] = []
+    for _, row in by_resource.iterrows():
+        historyid = row["HISTORYID"]
+        resource_info = resource_lookup.get(historyid, {})
+        if not resource_info:
+            continue
+
+        prd = _sf(row["PRD_HOURS"])
+        sby = _sf(row["SBY_HOURS"])
+        udt = _sf(row["UDT_HOURS"])
+        sdt = _sf(row["SDT_HOURS"])
+        egt = _sf(row["EGT_HOURS"])
+        nst = _sf(row["NST_HOURS"])
+        total = _sf(row["TOTAL_HOURS"])
+
+        wc_name = resource_info.get("WORKCENTERNAME", "")
+        wc_info = wc_mapping.get(wc_name, {})
+        wc_group = wc_info.get("group", wc_name)
+        wc_seq = wc_info.get("sequence", 999)
+        family = resource_info.get("RESOURCEFAMILYNAME", "")
+        resource_name = resource_info.get("RESOURCENAME", "")
+
+        data.append(
+            {
+                "workcenter": wc_group,
+                "workcenter_seq": wc_seq,
+                "family": family or "",
+                "resource": resource_name or "",
+                "ou_pct": _calc_ou_pct(prd, sby, udt, sdt, egt),
+                "availability_pct": _calc_avail_pct(prd, sby, udt, sdt, egt, nst),
+                "prd_hours": round(prd, 1),
+                "prd_pct": _status_pct(prd, total),
+                "sby_hours": round(sby, 1),
+                "sby_pct": _status_pct(sby, total),
+                "udt_hours": round(udt, 1),
+                "udt_pct": _status_pct(udt, total),
+                "sdt_hours": round(sdt, 1),
+                "sdt_pct": _status_pct(sdt, total),
+                "egt_hours": round(egt, 1),
+                "egt_pct": _status_pct(egt, total),
+                "nst_hours": round(nst, 1),
+                "nst_pct": _status_pct(nst, total),
+                "machine_count": 1,
+            }
+        )
+
+    data.sort(key=lambda x: (x["workcenter_seq"], x["family"], x["resource"]))
+
+    return {
+        "data": data,
+        "total": len(data),
+        "truncated": False,
+        "max_records": None,
+    }