DashBoard/src/mes_dashboard/services/reject_history_service.py

# -*- coding: utf-8 -*-
"""Service layer for reject-history page APIs."""

from __future__ import annotations

import csv
import io
import logging
from datetime import date, datetime
from typing import Any, Dict, Generator, Iterable, Optional

import pandas as pd

from mes_dashboard.core.database import read_sql_df_slow as read_sql_df
from mes_dashboard.services.scrap_reason_exclusion_cache import get_excluded_reasons
from mes_dashboard.sql import QueryBuilder, SQLLoader

logger = logging.getLogger("mes_dashboard.reject_history_service")

MAX_QUERY_DAYS = 730
VALID_GRANULARITY = {"day", "week", "month"}
VALID_METRIC_MODE = {"reject_total", "defect"}
MATERIAL_REASON_OPTION = "原物料報廢"


def _parse_date(value: str) -> date:
    return datetime.strptime(value, "%Y-%m-%d").date()


def _validate_range(start_date: str, end_date: str) -> None:
    start = _parse_date(start_date)
    end = _parse_date(end_date)
    if end < start:
        raise ValueError("end_date 不可早於 start_date")
    if (end - start).days > MAX_QUERY_DAYS:
        raise ValueError(f"日期範圍不可超過 {MAX_QUERY_DAYS} 天")


def _normalize_text(value: Any) -> str:
    return str(value or "").strip()


def _as_int(value: Any) -> int:
    if value is None:
        return 0
    try:
        if pd.isna(value):
            return 0
    except Exception:
        pass
    try:
        return int(float(value))
    except (TypeError, ValueError):
        return 0


def _as_float(value: Any) -> float:
    if value is None:
        return 0.0
    try:
        if pd.isna(value):
            return 0.0
    except Exception:
        pass
    try:
        return float(value)
    except (TypeError, ValueError):
        return 0.0


def _to_date_str(value: Any) -> str:
    if isinstance(value, datetime):
        return value.strftime("%Y-%m-%d")
    if isinstance(value, date):
        return value.strftime("%Y-%m-%d")
    if isinstance(value, pd.Timestamp):
        return value.to_pydatetime().strftime("%Y-%m-%d")
    text = _normalize_text(value)
    if not text:
        return ""
    try:
        return pd.to_datetime(text).strftime("%Y-%m-%d")
    except Exception:
        return text


def _to_datetime_str(value: Any) -> str:
    if isinstance(value, datetime):
        return value.strftime("%Y-%m-%d %H:%M:%S")
    if isinstance(value, pd.Timestamp):
        return value.to_pydatetime().strftime("%Y-%m-%d %H:%M:%S")
    if isinstance(value, date):
        return value.strftime("%Y-%m-%d")
    text = _normalize_text(value)
    if not text:
        return ""
    try:
        return pd.to_datetime(text).strftime("%Y-%m-%d %H:%M:%S")
    except Exception:
        return text


def _date_bucket_expr(granularity: str) -> str:
    if granularity == "week":
        return "TRUNC(b.TXN_DAY, 'IW')"
    if granularity == "month":
        return "TRUNC(b.TXN_DAY, 'MM')"
    return "TRUNC(b.TXN_DAY)"


def _metric_column(metric_mode: str) -> str:
    if metric_mode == "defect":
        return "b.DEFECT_QTY"
    return "b.REJECT_TOTAL_QTY"


def _load_sql(name: str) -> str:
    return SQLLoader.load(f"reject_history/{name}")


def _base_query_sql(variant: str = "") -> str:
    sql_name = "performance_daily_lot" if variant == "lot" else "performance_daily"
    sql = _load_sql(sql_name).strip().rstrip(";")
    # Strip leading comment/blank lines so WITH parsing can detect the first SQL token.
    lines = sql.splitlines()
    first_sql_line = 0
    for index, line in enumerate(lines):
        token = line.strip()
        if not token or token.startswith("--"):
            continue
        first_sql_line = index
        break
    return "\n".join(lines[first_sql_line:]).strip()


def _split_with_query(sql: str) -> tuple[str, str] | None:
    """Split a top-level WITH query into (cte_segment, final_select)."""
    text = (sql or "").strip()
    if not text.lower().startswith("with "):
        return None

    depth = 0
    in_string = False
    i = 0
    while i < len(text):
        ch = text[i]
        if ch == "'":
            if in_string and i + 1 < len(text) and text[i + 1] == "'":
                i += 2
                continue
            in_string = not in_string
            i += 1
            continue
        if in_string:
            i += 1
            continue

        if ch == "(":
            depth += 1
        elif ch == ")":
            depth = max(depth - 1, 0)
        elif depth == 0:
            head = text[i : i + 6]
            if head.lower() == "select":
                prev_ok = i == 0 or not (text[i - 1].isalnum() or text[i - 1] == "_")
                next_idx = i + 6
                next_ok = next_idx >= len(text) or not (
                    text[next_idx].isalnum() or text[next_idx] == "_"
                )
                if prev_ok and next_ok:
                    cte_segment = text[5:i].strip().rstrip(",")
                    final_select = text[i:].strip()
                    if cte_segment and final_select:
                        return cte_segment, final_select
                    return None
        i += 1
    return None


def _base_with_cte_sql(alias: str = "base", variant: str = "") -> str:
    base_sql = _base_query_sql(variant)
    split = _split_with_query(base_sql)
    if split is None:
        return f"WITH {alias} AS (\n{base_sql}\n)"
    cte_segment, final_select = split
    return f"WITH {cte_segment},\n{alias} AS (\n{final_select}\n)"


def _build_where_clause(
    *,
    workcenter_groups: Optional[list[str]] = None,
    packages: Optional[list[str]] = None,
    reasons: Optional[list[str]] = None,
    categories: Optional[list[str]] = None,
    include_excluded_scrap: bool = False,
    exclude_material_scrap: bool = True,
    exclude_pb_diode: bool = True,
) -> tuple[str, dict[str, Any], dict[str, Any]]:
    builder = QueryBuilder()

    normalized_wc_groups = sorted({_normalize_text(v) for v in (workcenter_groups or []) if _normalize_text(v)})
    normalized_packages = sorted({_normalize_text(v) for v in (packages or []) if _normalize_text(v)})
    normalized_reasons = sorted({_normalize_text(v) for v in (reasons or []) if _normalize_text(v)})
    material_reason_selected = MATERIAL_REASON_OPTION in normalized_reasons
    reason_name_filters = [value for value in normalized_reasons if value != MATERIAL_REASON_OPTION]
    normalized_categories = sorted({_normalize_text(v) for v in (categories or []) if _normalize_text(v)})

    if normalized_wc_groups:
        from mes_dashboard.services.filter_cache import get_specs_for_groups
        specs_in_groups = get_specs_for_groups(normalized_wc_groups)
        if specs_in_groups:
            # Specs in cache are uppercase; use UPPER() for case-insensitive match
            builder.add_in_condition("UPPER(b.SPECNAME)", specs_in_groups)
        else:
            # Fallback: cache not ready or no specs found for these groups
            builder.add_in_condition("b.WORKCENTER_GROUP", normalized_wc_groups)
    if normalized_packages:
        builder.add_in_condition("b.PRODUCTLINENAME", normalized_packages)
    if reason_name_filters:
        builder.add_in_condition("b.LOSSREASONNAME", reason_name_filters)
    if material_reason_selected:
        builder.add_condition("UPPER(NVL(TRIM(b.SCRAP_OBJECTTYPE), '-')) = 'MATERIAL'")
    material_exclusion_applied = False
    if exclude_material_scrap and not material_reason_selected:
        builder.add_condition("UPPER(NVL(TRIM(b.SCRAP_OBJECTTYPE), '-')) <> 'MATERIAL'")
        material_exclusion_applied = True
    pb_diode_exclusion_applied = False
    if exclude_pb_diode and not any(p.startswith("PB_") for p in normalized_packages):
        builder.add_condition("b.PRODUCTLINENAME NOT LIKE 'PB\\_%' ESCAPE '\\'")
        pb_diode_exclusion_applied = True
    if normalized_categories:
        builder.add_in_condition("b.REJECTCATEGORYNAME", normalized_categories)

    exclusions_applied = False
    excluded_reason_codes = []
    reason_name_prefix_policy_applied = False
    if not include_excluded_scrap:
        excluded_reason_codes = sorted(get_excluded_reasons())
        reason_name_prefix_policy_applied = True
        if excluded_reason_codes:
            exclusions_applied = True
            # Support exclusion matching by both normalized reason code and full reason text.
            builder.add_not_in_condition(
                "UPPER(NVL(TRIM(b.LOSSREASON_CODE), '-'))",
                excluded_reason_codes,
            )
            builder.add_not_in_condition(
                "UPPER(NVL(TRIM(b.LOSSREASONNAME), '-'))",
                excluded_reason_codes,
            )
        # Exclude reason labels that are not "NNN_*", and always exclude XXX_/ZZZ_ prefixes.
        builder.add_condition(
            "REGEXP_LIKE(UPPER(NVL(TRIM(b.LOSSREASONNAME), '')), '^[0-9]{3}_')"
        )
        builder.add_condition(
            "NOT REGEXP_LIKE(UPPER(NVL(TRIM(b.LOSSREASONNAME), '')), '^(XXX|ZZZ)_')"
        )
        exclusions_applied = True

    where_clause, params = builder.build_where_only()
    meta = {
        "include_excluded_scrap": bool(include_excluded_scrap),
        "exclusion_applied": exclusions_applied,
        "reason_name_prefix_policy_applied": reason_name_prefix_policy_applied,
        "exclude_material_scrap": bool(exclude_material_scrap),
        "material_exclusion_applied": material_exclusion_applied,
        "excluded_reason_count": len(excluded_reason_codes),
        "workcenter_group_count": len(normalized_wc_groups),
        "package_filter_count": len(normalized_packages),
        "reason_filter_count": len(reason_name_filters),
        "material_reason_selected": material_reason_selected,
        "exclude_pb_diode": bool(exclude_pb_diode),
        "pb_diode_exclusion_applied": pb_diode_exclusion_applied,
    }
    return where_clause, params, meta


_DEFAULT_BASE_WHERE = (
    "r.TXNDATE >= TO_DATE(:start_date, 'YYYY-MM-DD')"
    " AND r.TXNDATE < TO_DATE(:end_date, 'YYYY-MM-DD') + 1"
)


def _prepare_sql(
    name: str,
    *,
    where_clause: str = "",
    bucket_expr: str = "",
    metric_column: str = "",
    base_variant: str = "",
    base_where: str = "",
    dimension_column: str = "",
) -> str:
    sql = _load_sql(name)
    sql = sql.replace("{{ BASE_QUERY }}", _base_query_sql(base_variant))
    sql = sql.replace("{{ BASE_WITH_CTE }}", _base_with_cte_sql("base", base_variant))
    sql = sql.replace("{{ BASE_WHERE }}", base_where or _DEFAULT_BASE_WHERE)
    sql = sql.replace("{{ WHERE_CLAUSE }}", where_clause or "")
    sql = sql.replace("{{ BUCKET_EXPR }}", bucket_expr or "TRUNC(b.TXN_DAY)")
    sql = sql.replace("{{ METRIC_COLUMN }}", metric_column or "b.REJECT_TOTAL_QTY")
    sql = sql.replace("{{ DIMENSION_COLUMN }}", dimension_column or "b.LOSSREASONNAME")
    return sql


def _common_params(start_date: str, end_date: str, extra: Optional[dict[str, Any]] = None) -> dict[str, Any]:
    params = {"start_date": start_date, "end_date": end_date}
    if extra:
        params.update(extra)
    return params


def _list_to_csv(
    rows: Iterable[dict[str, Any]],
    headers: list[str],
) -> Generator[str, None, None]:
    # BOM for UTF-8 so Excel opens the CSV with correct encoding
    yield "\ufeff"

    buffer = io.StringIO()
    writer = csv.DictWriter(buffer, fieldnames=headers)
    writer.writeheader()
    yield buffer.getvalue()
    buffer.seek(0)
    buffer.truncate(0)

    for row in rows:
        writer.writerow(row)
        yield buffer.getvalue()
        buffer.seek(0)
        buffer.truncate(0)


def _extract_distinct_text_values(df: pd.DataFrame, column: str) -> list[str]:
    if df is None or df.empty or column not in df.columns:
        return []
    return sorted({
        _normalize_text(value)
        for value in df[column].dropna()
        if _normalize_text(value)
    })


def _extract_workcenter_group_options(df: pd.DataFrame) -> list[dict[str, Any]]:
    if df is None or df.empty or "WORKCENTER_GROUP" not in df.columns:
        return []

    sequence_by_name: dict[str, int] = {}
    for _, row in df.iterrows():
        name = _normalize_text(row.get("WORKCENTER_GROUP"))
        if not name:
            continue
        sequence = _as_int(row.get("WORKCENTERSEQUENCE_GROUP"))
        if name not in sequence_by_name:
            sequence_by_name[name] = sequence
            continue
        sequence_by_name[name] = min(sequence_by_name[name], sequence)

    ordered_names = sorted(
        sequence_by_name.keys(),
        key=lambda item: (sequence_by_name[item], item),
    )
    return [{"name": name, "sequence": sequence_by_name[name]} for name in ordered_names]


def _has_material_scrap(df: pd.DataFrame) -> bool:
    if df is None or df.empty or "SCRAP_OBJECTTYPE" not in df.columns:
        return False
    return (
        df["SCRAP_OBJECTTYPE"]
        .map(lambda value: _normalize_text(value).upper())
        .eq("MATERIAL")
        .any()
    )


def get_filter_options(
    *,
    start_date: str,
    end_date: str,
    workcenter_groups: Optional[list[str]] = None,
    packages: Optional[list[str]] = None,
    reasons: Optional[list[str]] = None,
    categories: Optional[list[str]] = None,
    include_excluded_scrap: bool = False,
    exclude_material_scrap: bool = True,
    exclude_pb_diode: bool = True,
) -> dict[str, Any]:
    """Return options under current draft filter context with one SQL round-trip."""
    _validate_range(start_date, end_date)

    where_clause, params, meta = _build_where_clause(
        workcenter_groups=workcenter_groups,
        packages=packages,
        reasons=reasons,
        categories=categories,
        include_excluded_scrap=include_excluded_scrap,
        exclude_material_scrap=exclude_material_scrap,
        exclude_pb_diode=exclude_pb_diode,
    )
    sql = _prepare_sql("filter_options", where_clause=where_clause)
    df = read_sql_df(sql, _common_params(start_date, end_date, params))
    if df is None:
        df = pd.DataFrame()

    reason_options = _extract_distinct_text_values(df, "REASON")
    if _has_material_scrap(df) and MATERIAL_REASON_OPTION not in reason_options:
        reason_options.append(MATERIAL_REASON_OPTION)

    return {
        "workcenter_groups": _extract_workcenter_group_options(df),
        "packages": _extract_distinct_text_values(df, "PACKAGE"),
        "reasons": reason_options,
        "meta": meta,
    }


def query_summary(
    *,
    start_date: str,
    end_date: str,
    workcenter_groups: Optional[list[str]] = None,
    packages: Optional[list[str]] = None,
    reasons: Optional[list[str]] = None,
    categories: Optional[list[str]] = None,
    include_excluded_scrap: bool = False,
    exclude_material_scrap: bool = True,
    exclude_pb_diode: bool = True,
) -> dict[str, Any]:
    _validate_range(start_date, end_date)
    where_clause, params, meta = _build_where_clause(
        workcenter_groups=workcenter_groups,
        packages=packages,
        reasons=reasons,
        categories=categories,
        include_excluded_scrap=include_excluded_scrap,
        exclude_material_scrap=exclude_material_scrap,
        exclude_pb_diode=exclude_pb_diode,
    )
    sql = _prepare_sql("summary", where_clause=where_clause)
    df = read_sql_df(sql, _common_params(start_date, end_date, params))
    row = (df.iloc[0] if df is not None and not df.empty else {})

    return {
        "MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
        "REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
        "DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
        "REJECT_RATE_PCT": round(_as_float(row.get("REJECT_RATE_PCT")), 4),
        "DEFECT_RATE_PCT": round(_as_float(row.get("DEFECT_RATE_PCT")), 4),
        "REJECT_SHARE_PCT": round(_as_float(row.get("REJECT_SHARE_PCT")), 4),
        "AFFECTED_LOT_COUNT": _as_int(row.get("AFFECTED_LOT_COUNT")),
        "AFFECTED_WORKORDER_COUNT": _as_int(row.get("AFFECTED_WORKORDER_COUNT")),
        "meta": meta,
    }


def query_trend(
    *,
    start_date: str,
    end_date: str,
    granularity: str = "day",
    workcenter_groups: Optional[list[str]] = None,
    packages: Optional[list[str]] = None,
    reasons: Optional[list[str]] = None,
    categories: Optional[list[str]] = None,
    include_excluded_scrap: bool = False,
    exclude_material_scrap: bool = True,
    exclude_pb_diode: bool = True,
) -> dict[str, Any]:
    _validate_range(start_date, end_date)
    normalized_granularity = _normalize_text(granularity).lower() or "day"
    if normalized_granularity not in VALID_GRANULARITY:
        raise ValueError("Invalid granularity. Use day, week, or month")

    where_clause, params, meta = _build_where_clause(
        workcenter_groups=workcenter_groups,
        packages=packages,
        reasons=reasons,
        categories=categories,
        include_excluded_scrap=include_excluded_scrap,
        exclude_material_scrap=exclude_material_scrap,
        exclude_pb_diode=exclude_pb_diode,
    )
    sql = _prepare_sql(
        "trend",
        where_clause=where_clause,
        bucket_expr=_date_bucket_expr(normalized_granularity),
    )
    df = read_sql_df(sql, _common_params(start_date, end_date, params))
    items = []
    if df is not None and not df.empty:
        for _, row in df.iterrows():
            items.append(
                {
                    "bucket_date": _to_date_str(row.get("BUCKET_DATE")),
                    "MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
                    "REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
                    "DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
                    "REJECT_RATE_PCT": round(_as_float(row.get("REJECT_RATE_PCT")), 4),
                    "DEFECT_RATE_PCT": round(_as_float(row.get("DEFECT_RATE_PCT")), 4),
                }
            )

    return {
        "items": items,
        "granularity": normalized_granularity,
        "meta": meta,
    }


def query_reason_pareto(
    *,
    start_date: str,
    end_date: str,
    metric_mode: str = "reject_total",
    pareto_scope: str = "top80",
    workcenter_groups: Optional[list[str]] = None,
    packages: Optional[list[str]] = None,
    reasons: Optional[list[str]] = None,
    categories: Optional[list[str]] = None,
    include_excluded_scrap: bool = False,
    exclude_material_scrap: bool = True,
    exclude_pb_diode: bool = True,
) -> dict[str, Any]:
    _validate_range(start_date, end_date)
    normalized_metric = _normalize_text(metric_mode).lower() or "reject_total"
    if normalized_metric not in VALID_METRIC_MODE:
        raise ValueError("Invalid metric_mode. Use reject_total or defect")

    normalized_scope = _normalize_text(pareto_scope).lower() or "top80"
    if normalized_scope not in {"top80", "all"}:
        raise ValueError("Invalid pareto_scope. Use top80 or all")

    where_clause, params, meta = _build_where_clause(
        workcenter_groups=workcenter_groups,
        packages=packages,
        reasons=reasons,
        categories=categories,
        include_excluded_scrap=include_excluded_scrap,
        exclude_material_scrap=exclude_material_scrap,
        exclude_pb_diode=exclude_pb_diode,
    )
    sql = _prepare_sql(
        "reason_pareto",
        where_clause=where_clause,
        metric_column=_metric_column(normalized_metric),
    )
    df = read_sql_df(sql, _common_params(start_date, end_date, params))
    all_items = []
    if df is not None and not df.empty:
        for _, row in df.iterrows():
            all_items.append(
                {
                    "reason": _normalize_text(row.get("REASON")) or "(未填寫)",
                    "metric_value": _as_float(row.get("METRIC_VALUE")),
                    "MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
                    "REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
                    "DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
                    "count": _as_int(row.get("AFFECTED_LOT_COUNT")),
                    "pct": round(_as_float(row.get("PCT")), 4),
                    "cumPct": round(_as_float(row.get("CUM_PCT")), 4),
                }
            )

    items = list(all_items)
    if normalized_scope == "top80" and items:
        top_items = [item for item in items if _as_float(item.get("cumPct")) <= 80.0]
        # Keep strict top-80% behavior, but still return one row when first item already exceeds 80%.
        if not top_items:
            top_items = [items[0]]
        items = top_items

    return {
        "items": items,
        "metric_mode": normalized_metric,
        "pareto_scope": normalized_scope,
        "meta": {
            **meta,
            "total_items_after_filter": len(all_items),
            "displayed_items": len(items),
        },
    }


# Allowed dimension → SQL column mapping for dimension_pareto
_DIMENSION_COLUMN_MAP = {
    "reason": "b.LOSSREASONNAME",
    "package": "b.PRODUCTLINENAME",
    "type": "b.PJ_TYPE",
    "workflow": "b.WORKFLOWNAME",
    "workcenter": "b.WORKCENTER_GROUP",
    "equipment": "b.PRIMARY_EQUIPMENTNAME",
}


def query_dimension_pareto(
    *,
    start_date: str,
    end_date: str,
    dimension: str = "reason",
    metric_mode: str = "reject_total",
    pareto_scope: str = "top80",
    workcenter_groups: Optional[list[str]] = None,
    packages: Optional[list[str]] = None,
    reasons: Optional[list[str]] = None,
    categories: Optional[list[str]] = None,
    include_excluded_scrap: bool = False,
    exclude_material_scrap: bool = True,
    exclude_pb_diode: bool = True,
) -> dict[str, Any]:
    """Pareto chart grouped by an arbitrary dimension (reason, package, type, workcenter, equipment)."""
    _validate_range(start_date, end_date)

    normalized_dim = _normalize_text(dimension).lower() or "reason"
    if normalized_dim not in _DIMENSION_COLUMN_MAP:
        raise ValueError(f"Invalid dimension '{dimension}'. Use: {', '.join(_DIMENSION_COLUMN_MAP)}")

    # For reason dimension, delegate to existing optimized function
    if normalized_dim == "reason":
        return query_reason_pareto(
            start_date=start_date, end_date=end_date,
            metric_mode=metric_mode, pareto_scope=pareto_scope,
            workcenter_groups=workcenter_groups, packages=packages,
            reasons=reasons, categories=categories,
            include_excluded_scrap=include_excluded_scrap,
            exclude_material_scrap=exclude_material_scrap,
            exclude_pb_diode=exclude_pb_diode,
        )

    normalized_metric = _normalize_text(metric_mode).lower() or "reject_total"
    if normalized_metric not in VALID_METRIC_MODE:
        raise ValueError("Invalid metric_mode. Use reject_total or defect")

    normalized_scope = _normalize_text(pareto_scope).lower() or "top80"
    if normalized_scope not in {"top80", "all"}:
        raise ValueError("Invalid pareto_scope. Use top80 or all")

    dim_col = _DIMENSION_COLUMN_MAP[normalized_dim]

    where_clause, params, meta = _build_where_clause(
        workcenter_groups=workcenter_groups,
        packages=packages,
        reasons=reasons,
        categories=categories,
        include_excluded_scrap=include_excluded_scrap,
        exclude_material_scrap=exclude_material_scrap,
        exclude_pb_diode=exclude_pb_diode,
    )
    sql = _prepare_sql(
        "dimension_pareto",
        where_clause=where_clause,
        metric_column=_metric_column(normalized_metric),
        dimension_column=dim_col,
    )
    df = read_sql_df(sql, _common_params(start_date, end_date, params))
    all_items = []
    if df is not None and not df.empty:
        for _, row in df.iterrows():
            all_items.append({
                "reason": _normalize_text(row.get("DIMENSION_VALUE")) or "(未知)",
                "metric_value": _as_float(row.get("METRIC_VALUE")),
                "MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
                "REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
                "DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
                "count": _as_int(row.get("AFFECTED_LOT_COUNT")),
                "pct": round(_as_float(row.get("PCT")), 4),
                "cumPct": round(_as_float(row.get("CUM_PCT")), 4),
            })

    items = list(all_items)
    if normalized_scope == "top80" and items:
        top_items = [item for item in items if _as_float(item.get("cumPct")) <= 80.0]
        if not top_items:
            top_items = [items[0]]
        items = top_items

    return {
        "items": items,
        "dimension": normalized_dim,
        "metric_mode": normalized_metric,
        "pareto_scope": normalized_scope,
        "meta": {
            **meta,
            "total_items_after_filter": len(all_items),
            "displayed_items": len(items),
        },
    }


def _apply_metric_filter(where_clause: str, metric_filter: str) -> str:
    """Append metric-type filter (reject / defect) to an existing WHERE clause."""
    if metric_filter == "reject":
        cond = "b.REJECT_TOTAL_QTY > 0"
    elif metric_filter == "defect":
        cond = "b.DEFECT_QTY > 0"
    else:
        return where_clause
    if where_clause.strip():
        return f"{where_clause} AND {cond}"
    return f"WHERE {cond}"


def query_list(
    *,
    start_date: str,
    end_date: str,
    page: int = 1,
    per_page: int = 50,
    workcenter_groups: Optional[list[str]] = None,
    packages: Optional[list[str]] = None,
    reasons: Optional[list[str]] = None,
    categories: Optional[list[str]] = None,
    include_excluded_scrap: bool = False,
    exclude_material_scrap: bool = True,
    exclude_pb_diode: bool = True,
    metric_filter: str = "all",
) -> dict[str, Any]:
    _validate_range(start_date, end_date)

    page = max(int(page or 1), 1)
    per_page = min(max(int(per_page or 50), 1), 200)
    offset = (page - 1) * per_page

    where_clause, params, meta = _build_where_clause(
        workcenter_groups=workcenter_groups,
        packages=packages,
        reasons=reasons,
        categories=categories,
        include_excluded_scrap=include_excluded_scrap,
        exclude_material_scrap=exclude_material_scrap,
        exclude_pb_diode=exclude_pb_diode,
    )
    where_clause = _apply_metric_filter(where_clause, metric_filter)
    sql = _prepare_sql("list", where_clause=where_clause, base_variant="lot")
    query_params = _common_params(
        start_date,
        end_date,
        {
            **params,
            "offset": offset,
            "limit": per_page,
        },
    )
    df = read_sql_df(sql, query_params)

    items = []
    total = 0
    if df is not None and not df.empty:
        total = _as_int(df.iloc[0].get("TOTAL_COUNT"))
        for _, row in df.iterrows():
            items.append(
                {
                    "TXN_TIME": _to_datetime_str(row.get("TXN_TIME")),
                    "TXN_DAY": _to_date_str(row.get("TXN_DAY")),
                    "TXN_MONTH": _normalize_text(row.get("TXN_MONTH")),
                    "WORKCENTER_GROUP": _normalize_text(row.get("WORKCENTER_GROUP")),
                    "WORKCENTERNAME": _normalize_text(row.get("WORKCENTERNAME")),
                    "SPECNAME": _normalize_text(row.get("SPECNAME")),
                    "WORKFLOWNAME": _normalize_text(row.get("WORKFLOWNAME")),
                    "EQUIPMENTNAME": _normalize_text(row.get("EQUIPMENTNAME")),
                    "PRODUCTLINENAME": _normalize_text(row.get("PRODUCTLINENAME")),
                    "PJ_TYPE": _normalize_text(row.get("PJ_TYPE")),
                    "CONTAINERNAME": _normalize_text(row.get("CONTAINERNAME")),
                    "PJ_FUNCTION": _normalize_text(row.get("PJ_FUNCTION")),
                    "PRODUCTNAME": _normalize_text(row.get("PRODUCTNAME")),
                    "LOSSREASONNAME": _normalize_text(row.get("LOSSREASONNAME")),
                    "LOSSREASON_CODE": _normalize_text(row.get("LOSSREASON_CODE")),
                    "REJECTCOMMENT": _normalize_text(row.get("REJECTCOMMENT")),
                    "MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
                    "REJECT_QTY": _as_int(row.get("REJECT_QTY")),
                    "STANDBY_QTY": _as_int(row.get("STANDBY_QTY")),
                    "QTYTOPROCESS_QTY": _as_int(row.get("QTYTOPROCESS_QTY")),
                    "INPROCESS_QTY": _as_int(row.get("INPROCESS_QTY")),
                    "PROCESSED_QTY": _as_int(row.get("PROCESSED_QTY")),
                    "REJECT_TOTAL_QTY": _as_int(row.get("REJECT_TOTAL_QTY")),
                    "DEFECT_QTY": _as_int(row.get("DEFECT_QTY")),
                    "REJECT_RATE_PCT": round(_as_float(row.get("REJECT_RATE_PCT")), 4),
                    "DEFECT_RATE_PCT": round(_as_float(row.get("DEFECT_RATE_PCT")), 4),
                    "REJECT_SHARE_PCT": round(_as_float(row.get("REJECT_SHARE_PCT")), 4),
                    "AFFECTED_WORKORDER_COUNT": _as_int(row.get("AFFECTED_WORKORDER_COUNT")),
                }
            )

    total_pages = max((total + per_page - 1) // per_page, 1) if total else 1
    return {
        "items": items,
        "pagination": {
            "page": page,
            "perPage": per_page,
            "total": total,
            "totalPages": total_pages,
        },
        "meta": meta,
    }


def export_csv(
    *,
    start_date: str,
    end_date: str,
    workcenter_groups: Optional[list[str]] = None,
    packages: Optional[list[str]] = None,
    reasons: Optional[list[str]] = None,
    categories: Optional[list[str]] = None,
    include_excluded_scrap: bool = False,
    exclude_material_scrap: bool = True,
    exclude_pb_diode: bool = True,
    metric_filter: str = "all",
) -> Generator[str, None, None]:
    _validate_range(start_date, end_date)

    where_clause, params, _meta = _build_where_clause(
        workcenter_groups=workcenter_groups,
        packages=packages,
        reasons=reasons,
        categories=categories,
        include_excluded_scrap=include_excluded_scrap,
        exclude_material_scrap=exclude_material_scrap,
        exclude_pb_diode=exclude_pb_diode,
    )
    where_clause = _apply_metric_filter(where_clause, metric_filter)
    sql = _prepare_sql("export", where_clause=where_clause, base_variant="lot")
    df = read_sql_df(sql, _common_params(start_date, end_date, params))

    rows = []
    if df is not None and not df.empty:
        for _, row in df.iterrows():
            rows.append(
                {
                    "LOT": _normalize_text(row.get("CONTAINERNAME")),
                    "WORKCENTER": _normalize_text(row.get("WORKCENTERNAME")),
                    "WORKCENTER_GROUP": _normalize_text(row.get("WORKCENTER_GROUP")),
                    "Package": _normalize_text(row.get("PRODUCTLINENAME")),
                    "FUNCTION": _normalize_text(row.get("PJ_FUNCTION")),
                    "TYPE": _normalize_text(row.get("PJ_TYPE")),
                    "WORKFLOW": _normalize_text(row.get("WORKFLOWNAME")),
                    "PRODUCT": _normalize_text(row.get("PRODUCTNAME")),
                    "原因": _normalize_text(row.get("LOSSREASONNAME")),
                    "EQUIPMENT": _normalize_text(row.get("EQUIPMENTNAME")),
                    "COMMENT": _normalize_text(row.get("REJECTCOMMENT")),
                    "SPEC": _normalize_text(row.get("SPECNAME")),
                    "REJECT_QTY": _as_int(row.get("REJECT_QTY")),
                    "STANDBY_QTY": _as_int(row.get("STANDBY_QTY")),
                    "QTYTOPROCESS_QTY": _as_int(row.get("QTYTOPROCESS_QTY")),
                    "INPROCESS_QTY": _as_int(row.get("INPROCESS_QTY")),
                    "PROCESSED_QTY": _as_int(row.get("PROCESSED_QTY")),
                    "扣帳報廢量": _as_int(row.get("REJECT_TOTAL_QTY")),
                    "不扣帳報廢量": _as_int(row.get("DEFECT_QTY")),
                    "MOVEIN_QTY": _as_int(row.get("MOVEIN_QTY")),
                    "報廢時間": _to_datetime_str(row.get("TXN_TIME")),
                    "日期": _to_date_str(row.get("TXN_DAY")),
                }
            )

    headers = [
        "LOT",
        "WORKCENTER",
        "WORKCENTER_GROUP",
        "Package",
        "FUNCTION",
        "TYPE",
        "WORKFLOW",
        "PRODUCT",
        "原因",
        "EQUIPMENT",
        "COMMENT",
        "SPEC",
        "REJECT_QTY",
        "STANDBY_QTY",
        "QTYTOPROCESS_QTY",
        "INPROCESS_QTY",
        "PROCESSED_QTY",
        "扣帳報廢量",
        "不扣帳報廢量",
        "MOVEIN_QTY",
        "報廢時間",
        "日期",
    ]
    return _list_to_csv(rows, headers=headers)


def _derive_summary(df: pd.DataFrame) -> dict[str, Any]:
    """Aggregate analytics rows into a single summary dict."""
    if df is None or df.empty:
        return {
            "MOVEIN_QTY": 0,
            "REJECT_TOTAL_QTY": 0,
            "DEFECT_QTY": 0,
            "REJECT_RATE_PCT": 0,
            "DEFECT_RATE_PCT": 0,
            "REJECT_SHARE_PCT": 0,
            "AFFECTED_LOT_COUNT": 0,
            "AFFECTED_WORKORDER_COUNT": 0,
        }

    movein = _as_int(df["MOVEIN_QTY"].sum())
    reject_total = _as_int(df["REJECT_TOTAL_QTY"].sum())
    defect = _as_int(df["DEFECT_QTY"].sum())
    affected_lot = _as_int(df["AFFECTED_LOT_COUNT"].sum())
    affected_wo = _as_int(df["AFFECTED_WORKORDER_COUNT"].sum())

    total_scrap = reject_total + defect
    reject_rate = round((reject_total / movein * 100) if movein else 0, 4)
    defect_rate = round((defect / movein * 100) if movein else 0, 4)
    reject_share = round((reject_total / total_scrap * 100) if total_scrap else 0, 4)

    return {
        "MOVEIN_QTY": movein,
        "REJECT_TOTAL_QTY": reject_total,
        "DEFECT_QTY": defect,
        "REJECT_RATE_PCT": reject_rate,
        "DEFECT_RATE_PCT": defect_rate,
        "REJECT_SHARE_PCT": reject_share,
        "AFFECTED_LOT_COUNT": affected_lot,
        "AFFECTED_WORKORDER_COUNT": affected_wo,
    }


def _derive_trend(df: pd.DataFrame) -> list[dict[str, Any]]:
    """Group analytics rows by BUCKET_DATE into daily trend items."""
    if df is None or df.empty:
        return []

    grouped = df.groupby("BUCKET_DATE", sort=True).agg(
        MOVEIN_QTY=("MOVEIN_QTY", "sum"),
        REJECT_TOTAL_QTY=("REJECT_TOTAL_QTY", "sum"),
        DEFECT_QTY=("DEFECT_QTY", "sum"),
    ).reset_index()

    items = []
    for _, row in grouped.iterrows():
        movein = _as_int(row["MOVEIN_QTY"])
        reject_total = _as_int(row["REJECT_TOTAL_QTY"])
        defect = _as_int(row["DEFECT_QTY"])
        items.append({
            "bucket_date": _to_date_str(row["BUCKET_DATE"]),
            "MOVEIN_QTY": movein,
            "REJECT_TOTAL_QTY": reject_total,
            "DEFECT_QTY": defect,
            "REJECT_RATE_PCT": round((reject_total / movein * 100) if movein else 0, 4),
            "DEFECT_RATE_PCT": round((defect / movein * 100) if movein else 0, 4),
        })
    return items


def _derive_pareto(df: pd.DataFrame, metric_mode: str = "reject_total") -> list[dict[str, Any]]:
    """Group analytics rows by REASON into pareto items with PCT/CUM_PCT."""
    if df is None or df.empty:
        return []

    metric_col = "REJECT_TOTAL_QTY" if metric_mode == "reject_total" else "DEFECT_QTY"

    grouped = df.groupby("REASON", sort=False).agg(
        MOVEIN_QTY=("MOVEIN_QTY", "sum"),
        REJECT_TOTAL_QTY=("REJECT_TOTAL_QTY", "sum"),
        DEFECT_QTY=("DEFECT_QTY", "sum"),
        AFFECTED_LOT_COUNT=("AFFECTED_LOT_COUNT", "sum"),
    ).reset_index()

    grouped = grouped.sort_values(metric_col, ascending=False).reset_index(drop=True)
    total_metric = _as_float(grouped[metric_col].sum())

    items = []
    cum = 0.0
    for _, row in grouped.iterrows():
        metric_value = _as_float(row[metric_col])
        pct = round((metric_value / total_metric * 100) if total_metric else 0, 4)
        cum += pct
        reason_text = _normalize_text(row["REASON"]) or "(未填寫)"
        items.append({
            "reason": reason_text,
            "metric_value": metric_value,
            "MOVEIN_QTY": _as_int(row["MOVEIN_QTY"]),
            "REJECT_TOTAL_QTY": _as_int(row["REJECT_TOTAL_QTY"]),
            "DEFECT_QTY": _as_int(row["DEFECT_QTY"]),
            "count": _as_int(row["AFFECTED_LOT_COUNT"]),
            "pct": pct,
            "cumPct": round(cum, 4),
        })
    return items


def _derive_raw_items(df: pd.DataFrame) -> list[dict[str, Any]]:
    """Return per-(date, reason) rows for client-side re-derivation."""
    if df is None or df.empty:
        return []
    items = []
    for _, row in df.iterrows():
        items.append({
            "bucket_date": _to_date_str(row["BUCKET_DATE"]),
            "reason": _normalize_text(row["REASON"]) or "(未填寫)",
            "MOVEIN_QTY": _as_int(row["MOVEIN_QTY"]),
            "REJECT_TOTAL_QTY": _as_int(row["REJECT_TOTAL_QTY"]),
            "DEFECT_QTY": _as_int(row["DEFECT_QTY"]),
            "AFFECTED_LOT_COUNT": _as_int(row["AFFECTED_LOT_COUNT"]),
            "AFFECTED_WORKORDER_COUNT": _as_int(row["AFFECTED_WORKORDER_COUNT"]),
        })
    return items


def query_analytics(
    *,
    start_date: str,
    end_date: str,
    metric_mode: str = "reject_total",
    workcenter_groups: Optional[list[str]] = None,
    packages: Optional[list[str]] = None,
    reasons: Optional[list[str]] = None,
    categories: Optional[list[str]] = None,
    include_excluded_scrap: bool = False,
    exclude_material_scrap: bool = True,
    exclude_pb_diode: bool = True,
) -> dict[str, Any]:
    """Single DB query → summary + trend + pareto (replaces 3 separate queries)."""
    _validate_range(start_date, end_date)
    normalized_metric = _normalize_text(metric_mode).lower() or "reject_total"
    if normalized_metric not in VALID_METRIC_MODE:
        raise ValueError("Invalid metric_mode. Use reject_total or defect")

    where_clause, params, meta = _build_where_clause(
        workcenter_groups=workcenter_groups,
        packages=packages,
        reasons=reasons,
        categories=categories,
        include_excluded_scrap=include_excluded_scrap,
        exclude_material_scrap=exclude_material_scrap,
        exclude_pb_diode=exclude_pb_diode,
    )
    sql = _prepare_sql("analytics", where_clause=where_clause)
    df = read_sql_df(sql, _common_params(start_date, end_date, params))

    return {
        "summary": _derive_summary(df),
        "trend": {
            "items": _derive_trend(df),
            "granularity": "day",
        },
        "pareto": {
            "items": _derive_pareto(df, normalized_metric),
            "metric_mode": normalized_metric,
        },
        "raw_items": _derive_raw_items(df),
        "meta": meta,
    }