perf: 移除舊版 resource summary API 並加入 WIP DataFrame 記憶體快取
問題診斷: - 舊版 /api/resource/summary API 使用慢速 SQL (JOIN + ROW_NUMBER),導致 55s 逾時 - 壓力測試持續呼叫此 API,佔滿所有 worker threads - 每次 WIP API 請求都解析 14.8MB JSON,8 個並發請求造成 GIL 競爭 變更內容: - 移除舊版 /api/resource/summary 路由和 query_resource_status_summary 函數 - 刪除未使用的 status_summary.sql - 更新壓力測試和整合測試使用新版 /api/resource/status/summary - 加入 ProcessLevelCache 類別實作 process-level DataFrame 快取 (30s TTL) - 使用 double-check locking 確保只有一個 thread 解析 JSON 效能改善: - 新版 API 使用 Redis 三層快取,回應時間 < 100ms - Process-level 快取避免重複解析 14MB JSON,大幅改善並發效能 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -10,7 +10,9 @@ from __future__ import annotations
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Optional, Protocol
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Optional, Protocol, Tuple
|
||||
|
||||
import pandas as pd
|
||||
from flask import current_app
|
||||
@@ -25,6 +27,54 @@ from mes_dashboard.core.redis_client import (
|
||||
|
||||
logger = logging.getLogger('mes_dashboard.cache')
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Process-Level DataFrame Cache (Prevents redundant JSON parsing)
|
||||
# ============================================================
|
||||
|
||||
class ProcessLevelCache:
|
||||
"""Thread-safe process-level cache for parsed DataFrames.
|
||||
|
||||
Prevents redundant JSON parsing across concurrent requests.
|
||||
Uses a lock to ensure only one thread parses at a time.
|
||||
"""
|
||||
|
||||
def __init__(self, ttl_seconds: int = 30):
|
||||
self._cache: dict[str, Tuple[pd.DataFrame, float]] = {}
|
||||
self._lock = threading.Lock()
|
||||
self._ttl = ttl_seconds
|
||||
|
||||
def get(self, key: str) -> Optional[pd.DataFrame]:
|
||||
"""Get cached DataFrame if not expired."""
|
||||
with self._lock:
|
||||
if key not in self._cache:
|
||||
return None
|
||||
df, timestamp = self._cache[key]
|
||||
if time.time() - timestamp > self._ttl:
|
||||
del self._cache[key]
|
||||
return None
|
||||
return df
|
||||
|
||||
def set(self, key: str, df: pd.DataFrame) -> None:
|
||||
"""Cache a DataFrame with current timestamp."""
|
||||
with self._lock:
|
||||
self._cache[key] = (df, time.time())
|
||||
|
||||
def invalidate(self, key: str) -> None:
|
||||
"""Remove a key from cache."""
|
||||
with self._lock:
|
||||
self._cache.pop(key, None)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear all cached data."""
|
||||
with self._lock:
|
||||
self._cache.clear()
|
||||
|
||||
|
||||
# Global process-level cache for WIP DataFrame (30s TTL)
|
||||
_wip_df_cache = ProcessLevelCache(ttl_seconds=30)
|
||||
_wip_parse_lock = threading.Lock()
|
||||
|
||||
# ============================================================
|
||||
# Legacy Cache Backend Interface (for backwards compatibility)
|
||||
# ============================================================
|
||||
@@ -81,11 +131,27 @@ def make_cache_key(prefix: str, days_back: Optional[int] = None, filters: Option
|
||||
|
||||
|
||||
def get_cached_wip_data() -> Optional[pd.DataFrame]:
|
||||
"""Get cached WIP data from Redis.
|
||||
"""Get cached WIP data from Redis with process-level caching.
|
||||
|
||||
Uses a two-tier cache strategy:
|
||||
1. Process-level cache: Parsed DataFrame (30s TTL) - fast, no parsing
|
||||
2. Redis cache: Raw JSON data - shared across workers
|
||||
|
||||
This prevents redundant JSON parsing of 14+ MB data across
|
||||
concurrent requests, significantly improving response times.
|
||||
|
||||
Returns:
|
||||
DataFrame with full DWH.DW_MES_LOT_V data, or None if cache miss.
|
||||
"""
|
||||
cache_key = "wip_dataframe"
|
||||
|
||||
# Tier 1: Check process-level cache first (fast path)
|
||||
cached_df = _wip_df_cache.get(cache_key)
|
||||
if cached_df is not None:
|
||||
logger.debug(f"Process cache hit: {len(cached_df)} rows")
|
||||
return cached_df
|
||||
|
||||
# Tier 2: Parse from Redis (slow path - needs lock)
|
||||
if not REDIS_ENABLED:
|
||||
return None
|
||||
|
||||
@@ -93,19 +159,33 @@ def get_cached_wip_data() -> Optional[pd.DataFrame]:
|
||||
if client is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
data_json = client.get(get_key("data"))
|
||||
if data_json is None:
|
||||
logger.debug("Cache miss: no data in Redis")
|
||||
return None
|
||||
# Use lock to prevent multiple threads from parsing simultaneously
|
||||
with _wip_parse_lock:
|
||||
# Double-check after acquiring lock (another thread may have parsed)
|
||||
cached_df = _wip_df_cache.get(cache_key)
|
||||
if cached_df is not None:
|
||||
logger.debug(f"Process cache hit (after lock): {len(cached_df)} rows")
|
||||
return cached_df
|
||||
|
||||
# Use StringIO to wrap the JSON string for pd.read_json
|
||||
df = pd.read_json(io.StringIO(data_json), orient='records')
|
||||
logger.debug(f"Cache hit: loaded {len(df)} rows from Redis")
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read cache: {e}")
|
||||
return None
|
||||
try:
|
||||
start_time = time.time()
|
||||
data_json = client.get(get_key("data"))
|
||||
if data_json is None:
|
||||
logger.debug("Cache miss: no data in Redis")
|
||||
return None
|
||||
|
||||
# Parse JSON to DataFrame
|
||||
df = pd.read_json(io.StringIO(data_json), orient='records')
|
||||
parse_time = time.time() - start_time
|
||||
|
||||
# Store in process-level cache
|
||||
_wip_df_cache.set(cache_key, df)
|
||||
|
||||
logger.debug(f"Cache hit: loaded {len(df)} rows from Redis (parsed in {parse_time:.2f}s)")
|
||||
return df
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read cache: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_cached_sys_date() -> Optional[str]:
|
||||
|
||||
@@ -43,7 +43,6 @@ def _clean_nan_values(data):
|
||||
return data
|
||||
from mes_dashboard.core.utils import get_days_back
|
||||
from mes_dashboard.services.resource_service import (
|
||||
query_resource_status_summary,
|
||||
query_resource_by_status,
|
||||
query_resource_by_workcenter,
|
||||
query_resource_detail,
|
||||
@@ -60,21 +59,6 @@ from mes_dashboard.config.constants import STATUS_CATEGORIES
|
||||
resource_bp = Blueprint('resource', __name__, url_prefix='/api/resource')
|
||||
|
||||
|
||||
@resource_bp.route('/summary')
|
||||
def api_resource_summary():
|
||||
"""API: Resource status summary."""
|
||||
days_back = request.args.get('days_back', 30, type=int)
|
||||
cache_key = make_cache_key("resource_summary", days_back)
|
||||
summary = cache_get(cache_key)
|
||||
if summary is None:
|
||||
summary = query_resource_status_summary(days_back)
|
||||
if summary:
|
||||
cache_set(cache_key, summary)
|
||||
if summary:
|
||||
return jsonify({'success': True, 'data': summary})
|
||||
return jsonify({'success': False, 'error': '查詢失敗'}), 500
|
||||
|
||||
|
||||
@resource_bp.route('/by_status')
|
||||
def api_resource_by_status():
|
||||
"""API: Resource count by status."""
|
||||
|
||||
@@ -105,38 +105,6 @@ def get_resource_latest_status_subquery(days_back: int = 30) -> str:
|
||||
# Resource Summary Queries
|
||||
# ============================================================
|
||||
|
||||
def query_resource_status_summary(days_back: int = 30) -> Optional[Dict]:
|
||||
"""Query resource status summary statistics.
|
||||
|
||||
Args:
|
||||
days_back: Number of days to look back
|
||||
|
||||
Returns:
|
||||
Dict with summary stats or None if query fails.
|
||||
"""
|
||||
try:
|
||||
base_sql = get_resource_latest_status_subquery(days_back)
|
||||
|
||||
# Load SQL from file and replace placeholder
|
||||
sql = SQLLoader.load("resource/status_summary")
|
||||
sql = sql.replace("{{ LATEST_STATUS_SUBQUERY }}", base_sql)
|
||||
|
||||
df = read_sql_df(sql)
|
||||
if df is None or df.empty:
|
||||
return None
|
||||
|
||||
row = df.iloc[0]
|
||||
return {
|
||||
'total_count': int(row['TOTAL_COUNT'] or 0),
|
||||
'workcenter_count': int(row['WORKCENTER_COUNT'] or 0),
|
||||
'family_count': int(row['FAMILY_COUNT'] or 0),
|
||||
'dept_count': int(row['DEPT_COUNT'] or 0)
|
||||
}
|
||||
except Exception as exc:
|
||||
print(f"Resource summary query failed: {exc}")
|
||||
return None
|
||||
|
||||
|
||||
def query_resource_by_status(days_back: int = 30) -> Optional[pd.DataFrame]:
|
||||
"""Query resource count grouped by status.
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ Directory Structure:
|
||||
│ └── resource_detail_with_job.sql
|
||||
├── resource/ # Resource status SQL files
|
||||
│ ├── latest_status.sql
|
||||
│ ├── status_summary.sql
|
||||
│ ├── by_status.sql
|
||||
│ ├── by_workcenter.sql
|
||||
│ ├── detail.sql
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
-- Resource Status Summary Query
|
||||
-- Returns aggregate statistics for resources
|
||||
--
|
||||
-- This query wraps the latest_status subquery
|
||||
|
||||
SELECT
|
||||
COUNT(*) as TOTAL_COUNT,
|
||||
COUNT(DISTINCT WORKCENTERNAME) as WORKCENTER_COUNT,
|
||||
COUNT(DISTINCT RESOURCEFAMILYNAME) as FAMILY_COUNT,
|
||||
COUNT(DISTINCT PJ_DEPARTMENT) as DEPT_COUNT
|
||||
FROM ({{ LATEST_STATUS_SUBQUERY }}) rs
|
||||
@@ -109,9 +109,9 @@ class TestAPILoadConcurrent:
|
||||
assert result.avg_response_time < 15.0, f"Avg response time {result.avg_response_time:.2f}s exceeds 15s"
|
||||
|
||||
def test_resource_summary_concurrent_load(self, base_url: str, stress_config: dict, stress_result):
|
||||
"""Test resource summary API under concurrent load."""
|
||||
result = stress_result("Resource Summary Concurrent Load")
|
||||
url = f"{base_url}/api/resource/summary"
|
||||
"""Test resource status summary API under concurrent load."""
|
||||
result = stress_result("Resource Status Summary Concurrent Load")
|
||||
url = f"{base_url}/api/resource/status/summary"
|
||||
concurrent_users = stress_config['concurrent_users']
|
||||
requests_per_user = stress_config['requests_per_user']
|
||||
timeout = stress_config['timeout']
|
||||
@@ -146,7 +146,7 @@ class TestAPILoadConcurrent:
|
||||
f"{base_url}/api/wip/overview/matrix",
|
||||
f"{base_url}/api/wip/overview/hold",
|
||||
f"{base_url}/api/wip/meta/workcenters",
|
||||
f"{base_url}/api/resource/summary",
|
||||
f"{base_url}/api/resource/status/summary",
|
||||
]
|
||||
concurrent_users = stress_config['concurrent_users']
|
||||
timeout = stress_config['timeout']
|
||||
|
||||
@@ -221,19 +221,21 @@ class TestResourceAPIIntegration(unittest.TestCase):
|
||||
self.app.config['TESTING'] = True
|
||||
self.client = self.app.test_client()
|
||||
|
||||
@patch('mes_dashboard.routes.resource_routes.query_resource_status_summary')
|
||||
def test_resource_summary_response_format(self, mock_summary):
|
||||
"""Resource summary should return consistent JSON structure."""
|
||||
@patch('mes_dashboard.routes.resource_routes.get_resource_status_summary')
|
||||
def test_resource_status_summary_response_format(self, mock_summary):
|
||||
"""Resource status summary should return consistent JSON structure."""
|
||||
mock_summary.return_value = {
|
||||
'total_equipment': 100,
|
||||
'by_status': {
|
||||
'RUN': 60,
|
||||
'IDLE': 30,
|
||||
'DOWN': 10
|
||||
}
|
||||
'total_count': 100,
|
||||
'by_status_category': {'PRODUCTIVE': 60, 'STANDBY': 30, 'DOWN': 10},
|
||||
'by_status': {'PRD': 60, 'SBY': 30, 'UDT': 5, 'SDT': 5, 'EGT': 0, 'NST': 0, 'OTHER': 0},
|
||||
'by_workcenter_group': {'焊接': 50, '成型': 50},
|
||||
'with_active_job': 40,
|
||||
'with_wip': 35,
|
||||
'ou_pct': 63.2,
|
||||
'availability_pct': 90.0,
|
||||
}
|
||||
|
||||
response = self.client.get('/api/resource/summary')
|
||||
response = self.client.get('/api/resource/status/summary')
|
||||
|
||||
self.assertEqual(response.status_code, 200)
|
||||
data = json.loads(response.data)
|
||||
@@ -241,6 +243,8 @@ class TestResourceAPIIntegration(unittest.TestCase):
|
||||
# Verify response structure
|
||||
self.assertIn('success', data)
|
||||
self.assertTrue(data['success'])
|
||||
self.assertIn('data', data)
|
||||
self.assertIn('total_count', data['data'])
|
||||
|
||||
|
||||
class TestAPIContentType(unittest.TestCase):
|
||||
|
||||
@@ -42,7 +42,7 @@ class AppFactoryTests(unittest.TestCase):
|
||||
"/api/wip/detail/<workcenter>",
|
||||
"/api/wip/meta/workcenters",
|
||||
"/api/wip/meta/packages",
|
||||
"/api/resource/summary",
|
||||
"/api/resource/status/summary",
|
||||
"/api/dashboard/kpi",
|
||||
"/api/excel-query/upload",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user