Files
DashBoard/src/mes_dashboard/services/reject_history_service.py
egg cbb943dfe5 feat(trace-pool-isolation): migrate event_fetcher/lineage_engine to slow connections + fix 51 test failures
Trace pipeline pool isolation:
- Switch event_fetcher and lineage_engine to read_sql_df_slow (non-pooled)
- Reduce EVENT_FETCHER_MAX_WORKERS 4→2, TRACE_EVENTS_MAX_WORKERS 4→2
- Add 60s timeout per batch query, cache skip for CID>10K
- Early del raw_domain_results + gc.collect() for large queries
- Increase DB_SLOW_MAX_CONCURRENT: base 3→5, dev 2→3, prod 3→5

Test fixes (51 pre-existing failures → 0):
- reject_history: WORKFLOW CSV header, strict bool validation, pareto mock path
- portal shell: remove non-existent /tmtt-defect route from tests
- conftest: add --run-stress option to skip stress/load tests by default
- migration tests: skipif baseline directory missing
- performance test: update Vite asset assertion
- wip hold: add firstname/waferdesc mock params
- template integration: add /reject-history canonical route

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 16:13:19 +08:00

1044 lines
38 KiB
Python

# -*- coding: utf-8 -*-
"""Service layer for reject-history page APIs."""
from __future__ import annotations
import csv
import io
import logging
from datetime import date, datetime
from typing import Any, Dict, Generator, Iterable, Optional
import pandas as pd
from mes_dashboard.core.database import read_sql_df_slow as read_sql_df
from mes_dashboard.services.scrap_reason_exclusion_cache import get_excluded_reasons
from mes_dashboard.sql import QueryBuilder, SQLLoader
logger = logging.getLogger("mes_dashboard.reject_history_service")
MAX_QUERY_DAYS = 730
VALID_GRANULARITY = {"day", "week", "month"}
VALID_METRIC_MODE = {"reject_total", "defect"}
MATERIAL_REASON_OPTION = "原物料報廢"
def _parse_date(value: str) -> date:
return datetime.strptime(value, "%Y-%m-%d").date()
def _validate_range(start_date: str, end_date: str) -> None:
start = _parse_date(start_date)
end = _parse_date(end_date)
if end < start:
raise ValueError("end_date 不可早於 start_date")
if (end - start).days > MAX_QUERY_DAYS:
raise ValueError(f"日期範圍不可超過 {MAX_QUERY_DAYS}")
def _normalize_text(value: Any) -> str:
return str(value or "").strip()
def _as_int(value: Any) -> int:
if value is None:
return 0
try:
if pd.isna(value):
return 0
except Exception:
pass
try:
return int(float(value))
except (TypeError, ValueError):
return 0
def _as_float(value: Any) -> float:
if value is None:
return 0.0
try:
if pd.isna(value):
return 0.0
except Exception:
pass
try:
return float(value)
except (TypeError, ValueError):
return 0.0
def _to_date_str(value: Any) -> str:
if isinstance(value, datetime):
return value.strftime("%Y-%m-%d")
if isinstance(value, date):
return value.strftime("%Y-%m-%d")
if isinstance(value, pd.Timestamp):
return value.to_pydatetime().strftime("%Y-%m-%d")
text = _normalize_text(value)
if not text:
return ""
try:
return pd.to_datetime(text).strftime("%Y-%m-%d")
except Exception:
return text
def _to_datetime_str(value: Any) -> str:
if isinstance(value, datetime):
return value.strftime("%Y-%m-%d %H:%M:%S")
if isinstance(value, pd.Timestamp):
return value.to_pydatetime().strftime("%Y-%m-%d %H:%M:%S")
if isinstance(value, date):
return value.strftime("%Y-%m-%d")
text = _normalize_text(value)
if not text:
return ""
try:
return pd.to_datetime(text).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return text
def _date_bucket_expr(granularity: str) -> str:
if granularity == "week":
return "TRUNC(b.TXN_DAY, 'IW')"
if granularity == "month":
return "TRUNC(b.TXN_DAY, 'MM')"
return "TRUNC(b.TXN_DAY)"
def _metric_column(metric_mode: str) -> str:
if metric_mode == "defect":
return "b.DEFECT_QTY"
return "b.REJECT_TOTAL_QTY"
def _load_sql(name: str) -> str:
return SQLLoader.load(f"reject_history/{name}")
def _base_query_sql(variant: str = "") -> str:
sql_name = "performance_daily_lot" if variant == "lot" else "performance_daily"
sql = _load_sql(sql_name).strip().rstrip(";")
# Strip leading comment/blank lines so WITH parsing can detect the first SQL token.
lines = sql.splitlines()
first_sql_line = 0
for index, line in enumerate(lines):
token = line.strip()
if not token or token.startswith("--"):
continue
first_sql_line = index
break
return "\n".join(lines[first_sql_line:]).strip()
def _split_with_query(sql: str) -> tuple[str, str] | None:
"""Split a top-level WITH query into (cte_segment, final_select)."""
text = (sql or "").strip()
if not text.lower().startswith("with "):
return None
depth = 0
in_string = False
i = 0
while i < len(text):
ch = text[i]
if ch == "'":
if in_string and i + 1 < len(text) and text[i + 1] == "'":
i += 2
continue
in_string = not in_string
i += 1
continue
if in_string:
i += 1
continue
if ch == "(":
depth += 1
elif ch == ")":
depth = max(depth - 1, 0)
elif depth == 0:
head = text[i : i + 6]
if head.lower() == "select":
prev_ok = i == 0 or not (text[i - 1].isalnum() or text[i - 1] == "_")
next_idx = i + 6
next_ok = next_idx >= len(text) or not (
text[next_idx].isalnum() or text[next_idx] == "_"
)
if prev_ok and next_ok:
cte_segment = text[5:i].strip().rstrip(",")
final_select = text[i:].strip()
if cte_segment and final_select:
return cte_segment, final_select
return None
i += 1
return None
def _base_with_cte_sql(alias: str = "base", variant: str = "") -> str:
base_sql = _base_query_sql(variant)
split = _split_with_query(base_sql)
if split is None:
return f"WITH {alias} AS (\n{base_sql}\n)"
cte_segment, final_select = split
return f"WITH {cte_segment},\n{alias} AS (\n{final_select}\n)"
def _build_where_clause(
*,
workcenter_groups: Optional[list[str]] = None,
packages: Optional[list[str]] = None,
reasons: Optional[list[str]] = None,
categories: Optional[list[str]] = None,
include_excluded_scrap: bool = False,
exclude_material_scrap: bool = True,
exclude_pb_diode: bool = True,
) -> tuple[str, dict[str, Any], dict[str, Any]]:
builder = QueryBuilder()
normalized_wc_groups = sorted({_normalize_text(v) for v in (workcenter_groups or []) if _normalize_text(v)})
normalized_packages = sorted({_normalize_text(v) for v in (packages or []) if _normalize_text(v)})
normalized_reasons = sorted({_normalize_text(v) for v in (reasons or []) if _normalize_text(v)})
material_reason_selected = MATERIAL_REASON_OPTION in normalized_reasons
reason_name_filters = [value for value in normalized_reasons if value != MATERIAL_REASON_OPTION]
normalized_categories = sorted({_normalize_text(v) for v in (categories or []) if _normalize_text(v)})
if normalized_wc_groups:
from mes_dashboard.services.filter_cache import get_specs_for_groups
specs_in_groups = get_specs_for_groups(normalized_wc_groups)
if specs_in_groups:
# Specs in cache are uppercase; use UPPER() for case-insensitive match
builder.add_in_condition("UPPER(b.SPECNAME)", specs_in_groups)
else:
# Fallback: cache not ready or no specs found for these groups
builder.add_in_condition("b.WORKCENTER_GROUP", normalized_wc_groups)
if normalized_packages:
builder.add_in_condition("b.PRODUCTLINENAME", normalized_packages)
if reason_name_filters:
builder.add_in_condition("b.LOSSREASONNAME", reason_name_filters)
if material_reason_selected:
builder.add_condition("UPPER(NVL(TRIM(b.SCRAP_OBJECTTYPE), '-')) = 'MATERIAL'")
material_exclusion_applied = False
if exclude_material_scrap and not material_reason_selected:
builder.add_condition("UPPER(NVL(TRIM(b.SCRAP_OBJECTTYPE), '-')) <> 'MATERIAL'")
material_exclusion_applied = True
pb_diode_exclusion_applied = False
if exclude_pb_diode and not any(p.startswith("PB_") for p in normalized_packages):
builder.add_condition("b.PRODUCTLINENAME NOT LIKE 'PB\\_%' ESCAPE '\\'")
pb_diode_exclusion_applied = True
if normalized_categories:
builder.add_in_condition("b.REJECTCATEGORYNAME", normalized_categories)
exclusions_applied = False
excluded_reason_codes = []
reason_name_prefix_policy_applied = False
if not include_excluded_scrap:
excluded_reason_codes = sorted(get_excluded_reasons())
reason_name_prefix_policy_applied = True
if excluded_reason_codes:
exclusions_applied = True
# Support exclusion matching by both normalized reason code and full reason text.
builder.add_not_in_condition(
"UPPER(NVL(TRIM(b.LOSSREASON_CODE), '-'))",
excluded_reason_codes,
)
builder.add_not_in_condition(
"UPPER(NVL(TRIM(b.LOSSREASONNAME), '-'))",
excluded_reason_codes,
)
# Exclude reason labels that are not "NNN_*", and always exclude XXX_/ZZZ_ prefixes.
builder.add_condition(
"REGEXP_LIKE(UPPER(NVL(TRIM(b.LOSSREASONNAME), '')), '^[0-9]{3}_')"
)
builder.add_condition(
"NOT REGEXP_LIKE(UPPER(NVL(TRIM(b.LOSSREASONNAME), '')), '^(XXX|ZZZ)_')"
)
exclusions_applied = True
where_clause, params = builder.build_where_only()
meta = {
"include_excluded_scrap": bool(include_excluded_scrap),
"exclusion_applied": exclusions_applied,
"reason_name_prefix_policy_applied": reason_name_prefix_policy_applied,
"exclude_material_scrap": bool(exclude_material_scrap),
"material_exclusion_applied": material_exclusion_applied,
"excluded_reason_count": len(excluded_reason_codes),
"workcenter_group_count": len(normalized_wc_groups),
"package_filter_count": len(normalized_packages),
"reason_filter_count": len(reason_name_filters),
"material_reason_selected": material_reason_selected,
"exclude_pb_diode": bool(exclude_pb_diode),
"pb_diode_exclusion_applied": pb_diode_exclusion_applied,
}
return where_clause, params, meta
_DEFAULT_BASE_WHERE = (
"r.TXNDATE >= TO_DATE(:start_date, 'YYYY-MM-DD')"
" AND r.TXNDATE < TO_DATE(:end_date, 'YYYY-MM-DD') + 1"
)
def _prepare_sql(
name: str,
*,
where_clause: str = "",
bucket_expr: str = "",
metric_column: str = "",
base_variant: str = "",
base_where: str = "",
dimension_column: str = "",
) -> str:
sql = _load_sql(name)
sql = sql.replace("{{ BASE_QUERY }}", _base_query_sql(base_variant))
sql = sql.replace("{{ BASE_WITH_CTE }}", _base_with_cte_sql("base", base_variant))
sql = sql.replace("{{ BASE_WHERE }}", base_where or _DEFAULT_BASE_WHERE)
sql = sql.replace("{{ WHERE_CLAUSE }}", where_clause or "")
sql = sql.replace("{{ BUCKET_EXPR }}", bucket_expr or "TRUNC(b.TXN_DAY)")
sql = sql.replace("{{ METRIC_COLUMN }}", metric_column or "b.REJECT_TOTAL_QTY")
sql = sql.replace("{{ DIMENSION_COLUMN }}", dimension_column or "b.LOSSREASONNAME")
return sql
def _common_params(start_date: str, end_date: str, extra: Optional[dict[str, Any]] = None) -> dict[str, Any]:
params = {"start_date": start_date, "end_date": end_date}
if extra:
params.update(extra)
return params
def _list_to_csv(
rows: Iterable[dict[str, Any]],
headers: list[str],
) -> Generator[str, None, None]:
# BOM for UTF-8 so Excel opens the CSV with correct encoding
yield "\ufeff"
buffer = io.StringIO()
writer = csv.DictWriter(buffer, fieldnames=headers)
writer.writeheader()
yield buffer.getvalue()
buffer.seek(0)
buffer.truncate(0)
for row in rows:
writer.writerow(row)
yield buffer.getvalue()
buffer.seek(0)
buffer.truncate(0)
def _extract_distinct_text_values(df: pd.DataFrame, column: str) -> list[str]:
if df is None or df.empty or column not in df.columns:
return []
return sorted({
_normalize_text(value)
for value in df[column].dropna()
if _normalize_text(value)
})
def _extract_workcenter_group_options(df: pd.DataFrame) -> list[dict[str, Any]]:
if df is None or df.empty or "WORKCENTER_GROUP" not in df.columns:
return []
sequence_by_name: dict[str, int] = {}
for _, row in df.iterrows():
name = _normalize_text(row.get("WORKCENTER_GROUP"))
if not name:
continue
sequence = _as_int(row.get("WORKCENTERSEQUENCE_GROUP"))
if name not in sequence_by_name:
sequence_by_name[name] = sequence
continue
sequence_by_name[name] = min(sequence_by_name[name], sequence)
ordered_names = sorted(
sequence_by_name.keys(),
key=lambda item: (sequence_by_name[item], item),
)
return [{"name": name, "sequence": sequence_by_name[name]} for name in ordered_names]
def _has_material_scrap(df: pd.DataFrame) -> bool:
if df is None or df.empty or "SCRAP_OBJECTTYPE" not in df.columns:
return False
return (
df["SCRAP_OBJECTTYPE"]
.map(lambda value: _normalize_text(value).upper())
.eq("MATERIAL")
.any()
)
def get_filter_options(
*,
start_date: str,
end_date: str,
workcenter_groups: Optional[list[str]] = None,
packages: Optional[list[str]] = None,
reasons: Optional[list[str]] = None,
categories: Optional[list[str]] = None,
include_excluded_scrap: bool = False,
exclude_material_scrap: bool = True,
exclude_pb_diode: bool = True,
) -> dict[str, Any]:
"""Return options under current draft filter context with one SQL round-trip."""
_validate_range(start_date, end_date)
where_clause, params, meta = _build_where_clause(
workcenter_groups=workcenter_groups,
packages=packages,
reasons=reasons,
categories=categories,
include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap,
exclude_pb_diode=exclude_pb_diode,
)
sql = _prepare_sql("filter_options", where_clause=where_clause)
df = read_sql_df(sql, _common_params(start_date, end_date, params))
if df is None:
df = pd.DataFrame()
reason_options = _extract_distinct_text_values(df, "REASON")
if _has_material_scrap(df) and MATERIAL_REASON_OPTION not in reason_options:
reason_options.append(MATERIAL_REASON_OPTION)
return {
"workcenter_groups": _extract_workcenter_group_options(df),
"packages": _extract_distinct_text_values(df, "PACKAGE"),
"reasons": reason_options,
"meta": meta,
}
def query_summary(
*,
start_date: str,
end_date: str,
workcenter_groups: Optional[list[str]] = None,
packages: Optional[list[str]] = None,
reasons: Optional[list[str]] = None,
categories: Optional[list[str]] = None,
include_excluded_scrap: bool = False,
exclude_material_scrap: bool = True,
exclude_pb_diode: bool = True,
) -> dict[str, Any]:
_validate_range(start_date, end_date)
where_clause, params, meta = _build_where_clause(
workcenter_groups=workcenter_groups,
packages=packages,
reasons=reasons,
categories=categories,
include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap,
exclude_pb_diode=exclude_pb_diode,
)
sql = _prepare_sql("summary", where_clause=where_clause)
df = read_sql_df(sql, _common_params(start_date, end_date, params))
row = (df.iloc[0] if df is not None and not df.empty else {})
return {
"MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
"REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
"DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
"REJECT_RATE_PCT": round(_as_float(row.get("REJECT_RATE_PCT")), 4),
"DEFECT_RATE_PCT": round(_as_float(row.get("DEFECT_RATE_PCT")), 4),
"REJECT_SHARE_PCT": round(_as_float(row.get("REJECT_SHARE_PCT")), 4),
"AFFECTED_LOT_COUNT": _as_int(row.get("AFFECTED_LOT_COUNT")),
"AFFECTED_WORKORDER_COUNT": _as_int(row.get("AFFECTED_WORKORDER_COUNT")),
"meta": meta,
}
def query_trend(
*,
start_date: str,
end_date: str,
granularity: str = "day",
workcenter_groups: Optional[list[str]] = None,
packages: Optional[list[str]] = None,
reasons: Optional[list[str]] = None,
categories: Optional[list[str]] = None,
include_excluded_scrap: bool = False,
exclude_material_scrap: bool = True,
exclude_pb_diode: bool = True,
) -> dict[str, Any]:
_validate_range(start_date, end_date)
normalized_granularity = _normalize_text(granularity).lower() or "day"
if normalized_granularity not in VALID_GRANULARITY:
raise ValueError("Invalid granularity. Use day, week, or month")
where_clause, params, meta = _build_where_clause(
workcenter_groups=workcenter_groups,
packages=packages,
reasons=reasons,
categories=categories,
include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap,
exclude_pb_diode=exclude_pb_diode,
)
sql = _prepare_sql(
"trend",
where_clause=where_clause,
bucket_expr=_date_bucket_expr(normalized_granularity),
)
df = read_sql_df(sql, _common_params(start_date, end_date, params))
items = []
if df is not None and not df.empty:
for _, row in df.iterrows():
items.append(
{
"bucket_date": _to_date_str(row.get("BUCKET_DATE")),
"MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
"REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
"DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
"REJECT_RATE_PCT": round(_as_float(row.get("REJECT_RATE_PCT")), 4),
"DEFECT_RATE_PCT": round(_as_float(row.get("DEFECT_RATE_PCT")), 4),
}
)
return {
"items": items,
"granularity": normalized_granularity,
"meta": meta,
}
def query_reason_pareto(
*,
start_date: str,
end_date: str,
metric_mode: str = "reject_total",
pareto_scope: str = "top80",
workcenter_groups: Optional[list[str]] = None,
packages: Optional[list[str]] = None,
reasons: Optional[list[str]] = None,
categories: Optional[list[str]] = None,
include_excluded_scrap: bool = False,
exclude_material_scrap: bool = True,
exclude_pb_diode: bool = True,
) -> dict[str, Any]:
_validate_range(start_date, end_date)
normalized_metric = _normalize_text(metric_mode).lower() or "reject_total"
if normalized_metric not in VALID_METRIC_MODE:
raise ValueError("Invalid metric_mode. Use reject_total or defect")
normalized_scope = _normalize_text(pareto_scope).lower() or "top80"
if normalized_scope not in {"top80", "all"}:
raise ValueError("Invalid pareto_scope. Use top80 or all")
where_clause, params, meta = _build_where_clause(
workcenter_groups=workcenter_groups,
packages=packages,
reasons=reasons,
categories=categories,
include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap,
exclude_pb_diode=exclude_pb_diode,
)
sql = _prepare_sql(
"reason_pareto",
where_clause=where_clause,
metric_column=_metric_column(normalized_metric),
)
df = read_sql_df(sql, _common_params(start_date, end_date, params))
all_items = []
if df is not None and not df.empty:
for _, row in df.iterrows():
all_items.append(
{
"reason": _normalize_text(row.get("REASON")) or "(未填寫)",
"metric_value": _as_float(row.get("METRIC_VALUE")),
"MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
"REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
"DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
"count": _as_int(row.get("AFFECTED_LOT_COUNT")),
"pct": round(_as_float(row.get("PCT")), 4),
"cumPct": round(_as_float(row.get("CUM_PCT")), 4),
}
)
items = list(all_items)
if normalized_scope == "top80" and items:
top_items = [item for item in items if _as_float(item.get("cumPct")) <= 80.0]
# Keep strict top-80% behavior, but still return one row when first item already exceeds 80%.
if not top_items:
top_items = [items[0]]
items = top_items
return {
"items": items,
"metric_mode": normalized_metric,
"pareto_scope": normalized_scope,
"meta": {
**meta,
"total_items_after_filter": len(all_items),
"displayed_items": len(items),
},
}
# Allowed dimension → SQL column mapping for dimension_pareto
_DIMENSION_COLUMN_MAP = {
"reason": "b.LOSSREASONNAME",
"package": "b.PRODUCTLINENAME",
"type": "b.PJ_TYPE",
"workflow": "b.WORKFLOWNAME",
"workcenter": "b.WORKCENTER_GROUP",
"equipment": "b.PRIMARY_EQUIPMENTNAME",
}
def query_dimension_pareto(
*,
start_date: str,
end_date: str,
dimension: str = "reason",
metric_mode: str = "reject_total",
pareto_scope: str = "top80",
workcenter_groups: Optional[list[str]] = None,
packages: Optional[list[str]] = None,
reasons: Optional[list[str]] = None,
categories: Optional[list[str]] = None,
include_excluded_scrap: bool = False,
exclude_material_scrap: bool = True,
exclude_pb_diode: bool = True,
) -> dict[str, Any]:
"""Pareto chart grouped by an arbitrary dimension (reason, package, type, workcenter, equipment)."""
_validate_range(start_date, end_date)
normalized_dim = _normalize_text(dimension).lower() or "reason"
if normalized_dim not in _DIMENSION_COLUMN_MAP:
raise ValueError(f"Invalid dimension '{dimension}'. Use: {', '.join(_DIMENSION_COLUMN_MAP)}")
# For reason dimension, delegate to existing optimized function
if normalized_dim == "reason":
return query_reason_pareto(
start_date=start_date, end_date=end_date,
metric_mode=metric_mode, pareto_scope=pareto_scope,
workcenter_groups=workcenter_groups, packages=packages,
reasons=reasons, categories=categories,
include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap,
exclude_pb_diode=exclude_pb_diode,
)
normalized_metric = _normalize_text(metric_mode).lower() or "reject_total"
if normalized_metric not in VALID_METRIC_MODE:
raise ValueError("Invalid metric_mode. Use reject_total or defect")
normalized_scope = _normalize_text(pareto_scope).lower() or "top80"
if normalized_scope not in {"top80", "all"}:
raise ValueError("Invalid pareto_scope. Use top80 or all")
dim_col = _DIMENSION_COLUMN_MAP[normalized_dim]
where_clause, params, meta = _build_where_clause(
workcenter_groups=workcenter_groups,
packages=packages,
reasons=reasons,
categories=categories,
include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap,
exclude_pb_diode=exclude_pb_diode,
)
sql = _prepare_sql(
"dimension_pareto",
where_clause=where_clause,
metric_column=_metric_column(normalized_metric),
dimension_column=dim_col,
)
df = read_sql_df(sql, _common_params(start_date, end_date, params))
all_items = []
if df is not None and not df.empty:
for _, row in df.iterrows():
all_items.append({
"reason": _normalize_text(row.get("DIMENSION_VALUE")) or "(未知)",
"metric_value": _as_float(row.get("METRIC_VALUE")),
"MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
"REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
"DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
"count": _as_int(row.get("AFFECTED_LOT_COUNT")),
"pct": round(_as_float(row.get("PCT")), 4),
"cumPct": round(_as_float(row.get("CUM_PCT")), 4),
})
items = list(all_items)
if normalized_scope == "top80" and items:
top_items = [item for item in items if _as_float(item.get("cumPct")) <= 80.0]
if not top_items:
top_items = [items[0]]
items = top_items
return {
"items": items,
"dimension": normalized_dim,
"metric_mode": normalized_metric,
"pareto_scope": normalized_scope,
"meta": {
**meta,
"total_items_after_filter": len(all_items),
"displayed_items": len(items),
},
}
def _apply_metric_filter(where_clause: str, metric_filter: str) -> str:
"""Append metric-type filter (reject / defect) to an existing WHERE clause."""
if metric_filter == "reject":
cond = "b.REJECT_TOTAL_QTY > 0"
elif metric_filter == "defect":
cond = "b.DEFECT_QTY > 0"
else:
return where_clause
if where_clause.strip():
return f"{where_clause} AND {cond}"
return f"WHERE {cond}"
def query_list(
*,
start_date: str,
end_date: str,
page: int = 1,
per_page: int = 50,
workcenter_groups: Optional[list[str]] = None,
packages: Optional[list[str]] = None,
reasons: Optional[list[str]] = None,
categories: Optional[list[str]] = None,
include_excluded_scrap: bool = False,
exclude_material_scrap: bool = True,
exclude_pb_diode: bool = True,
metric_filter: str = "all",
) -> dict[str, Any]:
_validate_range(start_date, end_date)
page = max(int(page or 1), 1)
per_page = min(max(int(per_page or 50), 1), 200)
offset = (page - 1) * per_page
where_clause, params, meta = _build_where_clause(
workcenter_groups=workcenter_groups,
packages=packages,
reasons=reasons,
categories=categories,
include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap,
exclude_pb_diode=exclude_pb_diode,
)
where_clause = _apply_metric_filter(where_clause, metric_filter)
sql = _prepare_sql("list", where_clause=where_clause, base_variant="lot")
query_params = _common_params(
start_date,
end_date,
{
**params,
"offset": offset,
"limit": per_page,
},
)
df = read_sql_df(sql, query_params)
items = []
total = 0
if df is not None and not df.empty:
total = _as_int(df.iloc[0].get("TOTAL_COUNT"))
for _, row in df.iterrows():
items.append(
{
"TXN_TIME": _to_datetime_str(row.get("TXN_TIME")),
"TXN_DAY": _to_date_str(row.get("TXN_DAY")),
"TXN_MONTH": _normalize_text(row.get("TXN_MONTH")),
"WORKCENTER_GROUP": _normalize_text(row.get("WORKCENTER_GROUP")),
"WORKCENTERNAME": _normalize_text(row.get("WORKCENTERNAME")),
"SPECNAME": _normalize_text(row.get("SPECNAME")),
"WORKFLOWNAME": _normalize_text(row.get("WORKFLOWNAME")),
"EQUIPMENTNAME": _normalize_text(row.get("EQUIPMENTNAME")),
"PRODUCTLINENAME": _normalize_text(row.get("PRODUCTLINENAME")),
"PJ_TYPE": _normalize_text(row.get("PJ_TYPE")),
"CONTAINERNAME": _normalize_text(row.get("CONTAINERNAME")),
"PJ_FUNCTION": _normalize_text(row.get("PJ_FUNCTION")),
"PRODUCTNAME": _normalize_text(row.get("PRODUCTNAME")),
"LOSSREASONNAME": _normalize_text(row.get("LOSSREASONNAME")),
"LOSSREASON_CODE": _normalize_text(row.get("LOSSREASON_CODE")),
"REJECTCOMMENT": _normalize_text(row.get("REJECTCOMMENT")),
"MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
"REJECT_QTY": _as_int(row.get("REJECT_QTY")),
"STANDBY_QTY": _as_int(row.get("STANDBY_QTY")),
"QTYTOPROCESS_QTY": _as_int(row.get("QTYTOPROCESS_QTY")),
"INPROCESS_QTY": _as_int(row.get("INPROCESS_QTY")),
"PROCESSED_QTY": _as_int(row.get("PROCESSED_QTY")),
"REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
"DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
"REJECT_RATE_PCT": round(_as_float(row.get("REJECT_RATE_PCT")), 4),
"DEFECT_RATE_PCT": round(_as_float(row.get("DEFECT_RATE_PCT")), 4),
"REJECT_SHARE_PCT": round(_as_float(row.get("REJECT_SHARE_PCT")), 4),
"AFFECTED_WORKORDER_COUNT": _as_int(row.get("AFFECTED_WORKORDER_COUNT")),
}
)
total_pages = max((total + per_page - 1) // per_page, 1) if total else 1
return {
"items": items,
"pagination": {
"page": page,
"perPage": per_page,
"total": total,
"totalPages": total_pages,
},
"meta": meta,
}
def export_csv(
*,
start_date: str,
end_date: str,
workcenter_groups: Optional[list[str]] = None,
packages: Optional[list[str]] = None,
reasons: Optional[list[str]] = None,
categories: Optional[list[str]] = None,
include_excluded_scrap: bool = False,
exclude_material_scrap: bool = True,
exclude_pb_diode: bool = True,
metric_filter: str = "all",
) -> Generator[str, None, None]:
_validate_range(start_date, end_date)
where_clause, params, _meta = _build_where_clause(
workcenter_groups=workcenter_groups,
packages=packages,
reasons=reasons,
categories=categories,
include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap,
exclude_pb_diode=exclude_pb_diode,
)
where_clause = _apply_metric_filter(where_clause, metric_filter)
sql = _prepare_sql("export", where_clause=where_clause, base_variant="lot")
df = read_sql_df(sql, _common_params(start_date, end_date, params))
rows = []
if df is not None and not df.empty:
for _, row in df.iterrows():
rows.append(
{
"LOT": _normalize_text(row.get("CONTAINERNAME")),
"WORKCENTER": _normalize_text(row.get("WORKCENTERNAME")),
"WORKCENTER_GROUP": _normalize_text(row.get("WORKCENTER_GROUP")),
"Package": _normalize_text(row.get("PRODUCTLINENAME")),
"FUNCTION": _normalize_text(row.get("PJ_FUNCTION")),
"TYPE": _normalize_text(row.get("PJ_TYPE")),
"WORKFLOW": _normalize_text(row.get("WORKFLOWNAME")),
"PRODUCT": _normalize_text(row.get("PRODUCTNAME")),
"原因": _normalize_text(row.get("LOSSREASONNAME")),
"EQUIPMENT": _normalize_text(row.get("EQUIPMENTNAME")),
"COMMENT": _normalize_text(row.get("REJECTCOMMENT")),
"SPEC": _normalize_text(row.get("SPECNAME")),
"REJECT_QTY": _as_int(row.get("REJECT_QTY")),
"STANDBY_QTY": _as_int(row.get("STANDBY_QTY")),
"QTYTOPROCESS_QTY": _as_int(row.get("QTYTOPROCESS_QTY")),
"INPROCESS_QTY": _as_int(row.get("INPROCESS_QTY")),
"PROCESSED_QTY": _as_int(row.get("PROCESSED_QTY")),
"扣帳報廢量": _as_int(row.get("REJECT_TOTAL_QTY")),
"不扣帳報廢量": _as_int(row.get("DEFECT_QTY")),
"MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
"報廢時間": _to_datetime_str(row.get("TXN_TIME")),
"日期": _to_date_str(row.get("TXN_DAY")),
}
)
headers = [
"LOT",
"WORKCENTER",
"WORKCENTER_GROUP",
"Package",
"FUNCTION",
"TYPE",
"WORKFLOW",
"PRODUCT",
"原因",
"EQUIPMENT",
"COMMENT",
"SPEC",
"REJECT_QTY",
"STANDBY_QTY",
"QTYTOPROCESS_QTY",
"INPROCESS_QTY",
"PROCESSED_QTY",
"扣帳報廢量",
"不扣帳報廢量",
"MOVEIN_QTY",
"報廢時間",
"日期",
]
return _list_to_csv(rows, headers=headers)
def _derive_summary(df: pd.DataFrame) -> dict[str, Any]:
"""Aggregate analytics rows into a single summary dict."""
if df is None or df.empty:
return {
"MOVEIN_QTY": 0,
"REJECT_TOTAL_QTY": 0,
"DEFECT_QTY": 0,
"REJECT_RATE_PCT": 0,
"DEFECT_RATE_PCT": 0,
"REJECT_SHARE_PCT": 0,
"AFFECTED_LOT_COUNT": 0,
"AFFECTED_WORKORDER_COUNT": 0,
}
movein = _as_int(df["MOVEIN_QTY"].sum())
reject_total = _as_int(df["REJECT_TOTAL_QTY"].sum())
defect = _as_int(df["DEFECT_QTY"].sum())
affected_lot = _as_int(df["AFFECTED_LOT_COUNT"].sum())
affected_wo = _as_int(df["AFFECTED_WORKORDER_COUNT"].sum())
total_scrap = reject_total + defect
reject_rate = round((reject_total / movein * 100) if movein else 0, 4)
defect_rate = round((defect / movein * 100) if movein else 0, 4)
reject_share = round((reject_total / total_scrap * 100) if total_scrap else 0, 4)
return {
"MOVEIN_QTY": movein,
"REJECT_TOTAL_QTY": reject_total,
"DEFECT_QTY": defect,
"REJECT_RATE_PCT": reject_rate,
"DEFECT_RATE_PCT": defect_rate,
"REJECT_SHARE_PCT": reject_share,
"AFFECTED_LOT_COUNT": affected_lot,
"AFFECTED_WORKORDER_COUNT": affected_wo,
}
def _derive_trend(df: pd.DataFrame) -> list[dict[str, Any]]:
"""Group analytics rows by BUCKET_DATE into daily trend items."""
if df is None or df.empty:
return []
grouped = df.groupby("BUCKET_DATE", sort=True).agg(
MOVEIN_QTY=("MOVEIN_QTY", "sum"),
REJECT_TOTAL_QTY=("REJECT_TOTAL_QTY", "sum"),
DEFECT_QTY=("DEFECT_QTY", "sum"),
).reset_index()
items = []
for _, row in grouped.iterrows():
movein = _as_int(row["MOVEIN_QTY"])
reject_total = _as_int(row["REJECT_TOTAL_QTY"])
defect = _as_int(row["DEFECT_QTY"])
items.append({
"bucket_date": _to_date_str(row["BUCKET_DATE"]),
"MOVEIN_QTY": movein,
"REJECT_TOTAL_QTY": reject_total,
"DEFECT_QTY": defect,
"REJECT_RATE_PCT": round((reject_total / movein * 100) if movein else 0, 4),
"DEFECT_RATE_PCT": round((defect / movein * 100) if movein else 0, 4),
})
return items
def _derive_pareto(df: pd.DataFrame, metric_mode: str = "reject_total") -> list[dict[str, Any]]:
"""Group analytics rows by REASON into pareto items with PCT/CUM_PCT."""
if df is None or df.empty:
return []
metric_col = "REJECT_TOTAL_QTY" if metric_mode == "reject_total" else "DEFECT_QTY"
grouped = df.groupby("REASON", sort=False).agg(
MOVEIN_QTY=("MOVEIN_QTY", "sum"),
REJECT_TOTAL_QTY=("REJECT_TOTAL_QTY", "sum"),
DEFECT_QTY=("DEFECT_QTY", "sum"),
AFFECTED_LOT_COUNT=("AFFECTED_LOT_COUNT", "sum"),
).reset_index()
grouped = grouped.sort_values(metric_col, ascending=False).reset_index(drop=True)
total_metric = _as_float(grouped[metric_col].sum())
items = []
cum = 0.0
for _, row in grouped.iterrows():
metric_value = _as_float(row[metric_col])
pct = round((metric_value / total_metric * 100) if total_metric else 0, 4)
cum += pct
reason_text = _normalize_text(row["REASON"]) or "(未填寫)"
items.append({
"reason": reason_text,
"metric_value": metric_value,
"MOVEIN_QTY": _as_int(row["MOVEIN_QTY"]),
"REJECT_TOTAL_QTY": _as_int(row["REJECT_TOTAL_QTY"]),
"DEFECT_QTY": _as_int(row["DEFECT_QTY"]),
"count": _as_int(row["AFFECTED_LOT_COUNT"]),
"pct": pct,
"cumPct": round(cum, 4),
})
return items
def _derive_raw_items(df: pd.DataFrame) -> list[dict[str, Any]]:
"""Return per-(date, reason) rows for client-side re-derivation."""
if df is None or df.empty:
return []
items = []
for _, row in df.iterrows():
items.append({
"bucket_date": _to_date_str(row["BUCKET_DATE"]),
"reason": _normalize_text(row["REASON"]) or "(未填寫)",
"MOVEIN_QTY": _as_int(row["MOVEIN_QTY"]),
"REJECT_TOTAL_QTY": _as_int(row["REJECT_TOTAL_QTY"]),
"DEFECT_QTY": _as_int(row["DEFECT_QTY"]),
"AFFECTED_LOT_COUNT": _as_int(row["AFFECTED_LOT_COUNT"]),
"AFFECTED_WORKORDER_COUNT": _as_int(row["AFFECTED_WORKORDER_COUNT"]),
})
return items
def query_analytics(
*,
start_date: str,
end_date: str,
metric_mode: str = "reject_total",
workcenter_groups: Optional[list[str]] = None,
packages: Optional[list[str]] = None,
reasons: Optional[list[str]] = None,
categories: Optional[list[str]] = None,
include_excluded_scrap: bool = False,
exclude_material_scrap: bool = True,
exclude_pb_diode: bool = True,
) -> dict[str, Any]:
"""Single DB query → summary + trend + pareto (replaces 3 separate queries)."""
_validate_range(start_date, end_date)
normalized_metric = _normalize_text(metric_mode).lower() or "reject_total"
if normalized_metric not in VALID_METRIC_MODE:
raise ValueError("Invalid metric_mode. Use reject_total or defect")
where_clause, params, meta = _build_where_clause(
workcenter_groups=workcenter_groups,
packages=packages,
reasons=reasons,
categories=categories,
include_excluded_scrap=include_excluded_scrap,
exclude_material_scrap=exclude_material_scrap,
exclude_pb_diode=exclude_pb_diode,
)
sql = _prepare_sql("analytics", where_clause=where_clause)
df = read_sql_df(sql, _common_params(start_date, end_date, params))
return {
"summary": _derive_summary(df),
"trend": {
"items": _derive_trend(df),
"granularity": "day",
},
"pareto": {
"items": _derive_pareto(df, normalized_metric),
"metric_mode": normalized_metric,
},
"raw_items": _derive_raw_items(df),
"meta": meta,
}