Files
DashBoard/src/mes_dashboard/services/job_query_service.py
egg a275c30c0e feat(reject-history): fix silent data loss by propagating partial failure metadata to frontend
Chunk failures in BatchQueryEngine were silently discarded — `has_partial_failure` was tracked
in Redis but never surfaced to the API response or frontend. Users could see incomplete data
without any warning. This commit closes the gap end-to-end:

Backend:
- Track failed chunk time ranges (`failed_ranges`) in batch engine progress metadata
- Add single retry for transient Oracle errors (timeout, connection) in `_execute_single_chunk`
- Read `get_batch_progress()` after merge but before `redis_clear_batch()` cleanup
- Inject `has_partial_failure`, `failed_chunk_count`, `failed_ranges` into API response meta
- Persist partial failure flag to independent Redis key with TTL aligned to data storage layer
- Add shared container-resolution policy module with wildcard/expansion guardrails
- Refactor reason filter from single-value to multi-select (`reason` → `reasons`)

Frontend:
- Add client-side date range validation (730-day limit) before API submission
- Display amber warning banner on partial failure with specific failed date ranges
- Support generic fallback message for container-mode queries without date ranges
- Update FilterPanel to support multi-select reason chips

Specs & tests:
- Create batch-query-resilience spec; update reject-history-api and reject-history-page specs
- Add 7 new tests for retry, memory guard, failed ranges, partial failure propagation, TTL
- Cross-service regression verified (hold, resource, job, msd — 411 tests pass)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 14:00:07 +08:00

487 lines
15 KiB
Python

# -*- coding: utf-8 -*-
"""Job Query Service.
Provides functions for querying maintenance job data:
- Job list by resource IDs
- Job transaction history detail
- CSV export with full history
Architecture:
- Uses resource_cache as the source for equipment master data
- Queries DW_MES_JOB for job current status
- Queries DW_MES_JOBTXNHISTORY for transaction history
- Supports batching for large resource lists (Oracle IN clause limit)
"""
import csv
import io
import logging
from datetime import datetime
from typing import Dict, List, Any, Optional, Generator, Tuple
import pandas as pd
from mes_dashboard.core.database import read_sql_df_slow as read_sql_df, get_db_connection
from mes_dashboard.sql import SQLLoader, QueryBuilder
from mes_dashboard.config.field_contracts import get_export_headers, get_export_api_keys
logger = logging.getLogger('mes_dashboard.job_query')
# Constants
BATCH_SIZE = 1000 # Oracle IN clause limit
MAX_DATE_RANGE_DAYS = 365
QUERY_ERROR_MESSAGE = "查詢服務暫時無法使用"
EXPORT_ERROR_MESSAGE = "匯出服務暫時無法使用"
# ============================================================
# Validation Functions
# ============================================================
def validate_date_range(start_date: str, end_date: str) -> Optional[str]:
"""Validate date range.
Args:
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
Returns:
Error message if validation fails, None if valid.
"""
try:
start = datetime.strptime(start_date, '%Y-%m-%d')
end = datetime.strptime(end_date, '%Y-%m-%d')
if end < start:
return '結束日期不可早於起始日期'
diff = (end - start).days
if diff > MAX_DATE_RANGE_DAYS:
return f'日期範圍不可超過 {MAX_DATE_RANGE_DAYS}'
return None
except ValueError as e:
return f'日期格式錯誤: {e}'
# ============================================================
# Resource Filter Helpers
# ============================================================
def _build_resource_filter(
resource_ids: List[str], max_chunk_size: int = BATCH_SIZE
) -> List[List[str]]:
"""Build chunked resource ID lists for Oracle IN clause limits.
Args:
resource_ids: List of resource IDs.
max_chunk_size: Maximum items per IN clause.
Returns:
Chunked resource ID values.
"""
normalized_ids: List[str] = []
for rid in resource_ids:
if rid is None:
continue
text = str(rid).strip()
if text:
normalized_ids.append(text)
if not normalized_ids:
return []
chunks: List[List[str]] = []
for i in range(0, len(normalized_ids), max_chunk_size):
chunk = normalized_ids[i:i + max_chunk_size]
chunks.append(chunk)
return chunks
def _build_resource_filter_sql(
resource_ids: List[str],
column: str = 'j.RESOURCEID',
max_chunk_size: int = BATCH_SIZE,
return_params: bool = False,
) -> str | Tuple[str, Dict[str, Any]]:
"""Build parameterized SQL condition for resource ID filtering.
Uses bind variables via QueryBuilder and chunks values to satisfy Oracle
IN-clause limits.
Args:
resource_ids: List of resource IDs.
column: Column name to filter on.
max_chunk_size: Maximum items per IN clause.
return_params: If True, return (condition_sql, params).
Returns:
Condition SQL string, or tuple of condition SQL and parameters.
"""
chunks = _build_resource_filter(resource_ids, max_chunk_size=max_chunk_size)
if not chunks:
result: Tuple[str, Dict[str, Any]] = ("1=0", {})
return result if return_params else result[0]
builder = QueryBuilder()
for chunk in chunks:
builder.add_in_condition(column, chunk)
if len(builder.conditions) == 1:
condition_sql = builder.conditions[0]
else:
condition_sql = "(" + " OR ".join(builder.conditions) + ")"
result = (condition_sql, builder.params.copy())
return result if return_params else result[0]
# ============================================================
# Query Functions
# ============================================================
_JOB_CACHE_TTL = 600 # 10 min for job query results
def get_jobs_by_resources(
resource_ids: List[str],
start_date: str,
end_date: str
) -> Dict[str, Any]:
"""Query jobs for selected resources within date range.
For date ranges exceeding BATCH_QUERY_TIME_THRESHOLD_DAYS (default 10),
the query is decomposed into monthly chunks via BatchQueryEngine.
Results are cached in Redis to avoid redundant Oracle queries.
Args:
resource_ids: List of RESOURCEID values to query
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
Returns:
Dict with 'data' (list of job records) and 'total' (count),
or 'error' if query fails.
"""
# Validate inputs
if not resource_ids:
return {'error': '請選擇至少一台設備'}
validation_error = validate_date_range(start_date, end_date)
if validation_error:
return {'error': validation_error}
try:
from mes_dashboard.services.batch_query_engine import (
decompose_by_time_range,
execute_plan,
merge_chunks,
compute_query_hash,
should_decompose_by_time,
)
from mes_dashboard.core.redis_df_store import redis_load_df, redis_store_df
# Check Redis cache first
cache_hash = compute_query_hash({
"resource_ids": sorted(resource_ids),
"start_date": start_date,
"end_date": end_date,
})
cache_key = f"job_query:{cache_hash}"
cached_df = redis_load_df(cache_key)
if cached_df is not None:
logger.info("Job query cache hit (hash=%s)", cache_hash)
df = cached_df
elif should_decompose_by_time(start_date, end_date):
# --- Engine path for long date ranges ---
engine_chunks = decompose_by_time_range(start_date, end_date)
# Build resource filter once (reused across all chunks)
resource_filter, resource_params = _build_resource_filter_sql(
resource_ids, return_params=True
)
sql = SQLLoader.load("job_query/job_list")
sql = sql.replace("{{ RESOURCE_FILTER }}", resource_filter)
def _run_job_chunk(chunk, max_rows_per_chunk=None):
chunk_params = {
'start_date': chunk['chunk_start'],
'end_date': chunk['chunk_end'],
**resource_params,
}
result = read_sql_df(sql, chunk_params)
return result if result is not None else pd.DataFrame()
logger.info(
"Engine activated for job query: %d chunks, %d resources",
len(engine_chunks), len(resource_ids),
)
execute_plan(
engine_chunks, _run_job_chunk,
query_hash=cache_hash,
cache_prefix="job",
chunk_ttl=_JOB_CACHE_TTL,
)
df = merge_chunks("job", cache_hash)
# Store merged result for fast re-access
if not df.empty:
redis_store_df(cache_key, df, ttl=_JOB_CACHE_TTL)
else:
# --- Direct path (short query) ---
resource_filter, resource_params = _build_resource_filter_sql(
resource_ids, return_params=True
)
sql = SQLLoader.load("job_query/job_list")
sql = sql.replace("{{ RESOURCE_FILTER }}", resource_filter)
params = {
'start_date': start_date,
'end_date': end_date,
**resource_params,
}
df = read_sql_df(sql, params)
if df is None:
df = pd.DataFrame()
# Cache the result
if not df.empty:
redis_store_df(cache_key, df, ttl=_JOB_CACHE_TTL)
# Convert to records
data = []
for _, row in df.iterrows():
record = {}
for col in df.columns:
value = row[col]
if pd.isna(value):
record[col] = None
elif isinstance(value, datetime):
record[col] = value.strftime('%Y-%m-%d %H:%M:%S')
else:
record[col] = value
data.append(record)
logger.info(f"Job query returned {len(data)} records for {len(resource_ids)} resources")
return {
'data': data,
'total': len(data),
'resource_count': len(resource_ids)
}
except Exception as exc:
logger.exception("Job query failed: %s", exc)
return {'error': QUERY_ERROR_MESSAGE}
def get_job_txn_history(job_id: str) -> Dict[str, Any]:
"""Query transaction history for a single job.
Args:
job_id: The JOBID to query
Returns:
Dict with 'data' (list of transaction records) and 'total' (count),
or 'error' if query fails.
"""
if not job_id:
return {'error': '請指定工單 ID'}
try:
# Load SQL template
sql = SQLLoader.load("job_query/job_txn_detail")
# Execute query
params = {'job_id': job_id}
df = read_sql_df(sql, params)
# Convert to records
data = []
for _, row in df.iterrows():
record = {}
for col in df.columns:
value = row[col]
if pd.isna(value):
record[col] = None
elif isinstance(value, datetime):
record[col] = value.strftime('%Y-%m-%d %H:%M:%S')
else:
record[col] = value
data.append(record)
logger.debug(f"Transaction history query returned {len(data)} records for job {job_id}")
return {
'data': data,
'total': len(data),
'job_id': job_id
}
except Exception as exc:
logger.exception("Transaction history query failed for job %s: %s", job_id, exc)
return {'error': QUERY_ERROR_MESSAGE}
# ============================================================
# Export Functions
# ============================================================
def export_jobs_with_history(
resource_ids: List[str],
start_date: str,
end_date: str
) -> Generator[str, None, None]:
"""Generate CSV content for jobs with full transaction history.
Uses streaming to handle large datasets without memory issues.
Args:
resource_ids: List of RESOURCEID values to export
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
Yields:
CSV rows as strings (including header row first)
"""
# Validate inputs
if not resource_ids:
yield "Error: 請選擇至少一台設備\n"
return
validation_error = validate_date_range(start_date, end_date)
if validation_error:
yield f"Error: {validation_error}\n"
return
try:
# Build resource filter
resource_filter, resource_params = _build_resource_filter_sql(
resource_ids, return_params=True
)
# Load SQL template
sql = SQLLoader.load("job_query/job_txn_export")
sql = sql.replace("{{ RESOURCE_FILTER }}", resource_filter)
# Execute query
params = {
'start_date': start_date,
'end_date': end_date,
**resource_params,
}
df = read_sql_df(sql, params)
if df is None or len(df) == 0:
yield "Error: 無符合條件的資料\n"
return
# Write CSV header with BOM for Excel UTF-8 compatibility
output = io.StringIO()
output.write('\ufeff') # UTF-8 BOM
export_keys = get_export_api_keys('job_query')
headers = get_export_headers('job_query')
if not export_keys or not headers or len(export_keys) != len(headers):
export_keys = [
'RESOURCENAME', 'JOBID', 'JOB_FINAL_STATUS', 'JOBMODELNAME', 'JOBORDERNAME',
'JOB_CREATEDATE', 'JOB_COMPLETEDATE', 'JOB_CAUSECODENAME', 'JOB_REPAIRCODENAME', 'JOB_SYMPTOMCODENAME',
'TXNDATE', 'FROMJOBSTATUS', 'TXN_JOBSTATUS', 'STAGENAME',
'TXN_CAUSECODENAME', 'TXN_REPAIRCODENAME', 'TXN_SYMPTOMCODENAME',
'USER_NAME', 'EMP_NAME', 'COMMENTS'
]
headers = export_keys
writer = csv.writer(output)
writer.writerow(headers)
yield output.getvalue()
output.truncate(0)
output.seek(0)
# Write data rows
for _, row in df.iterrows():
csv_row = []
for col in export_keys:
value = row.get(col)
if pd.isna(value):
csv_row.append('')
elif isinstance(value, datetime):
csv_row.append(value.strftime('%Y-%m-%d %H:%M:%S'))
else:
csv_row.append(str(value))
writer.writerow(csv_row)
yield output.getvalue()
output.truncate(0)
output.seek(0)
logger.info(f"CSV export completed: {len(df)} records")
except Exception as exc:
logger.exception("CSV export failed: %s", exc)
yield f"Error: {EXPORT_ERROR_MESSAGE}\n"
def get_export_data(
resource_ids: List[str],
start_date: str,
end_date: str
) -> Dict[str, Any]:
"""Get export data as a dict (for non-streaming use cases).
Args:
resource_ids: List of RESOURCEID values to export
start_date: Start date in YYYY-MM-DD format
end_date: End date in YYYY-MM-DD format
Returns:
Dict with 'data', 'columns', 'total', or 'error' if query fails.
"""
# Validate inputs
if not resource_ids:
return {'error': '請選擇至少一台設備'}
validation_error = validate_date_range(start_date, end_date)
if validation_error:
return {'error': validation_error}
try:
# Build resource filter
resource_filter, resource_params = _build_resource_filter_sql(
resource_ids, return_params=True
)
# Load SQL template
sql = SQLLoader.load("job_query/job_txn_export")
sql = sql.replace("{{ RESOURCE_FILTER }}", resource_filter)
# Execute query
params = {
'start_date': start_date,
'end_date': end_date,
**resource_params,
}
df = read_sql_df(sql, params)
# Convert to records
data = []
for _, row in df.iterrows():
record = {}
for col in df.columns:
value = row[col]
if pd.isna(value):
record[col] = None
elif isinstance(value, datetime):
record[col] = value.strftime('%Y-%m-%d %H:%M:%S')
else:
record[col] = value
data.append(record)
return {
'data': data,
'columns': list(df.columns),
'total': len(data)
}
except Exception as exc:
logger.exception("Export data query failed: %s", exc)
return {'error': QUERY_ERROR_MESSAGE}