SalesPipeline/backend/app/routers/lab.py

from typing import List, Optional, Tuple, Dict, Set
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session
from sqlalchemy import func
from pydantic import BaseModel
from app.models import get_db
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus, TargetType
from app.services.fuzzy_matcher import normalize_pn_for_matching, normalize_customer_name

router = APIRouter(prefix="/lab", tags=["Lab"])

# --- Pydantic Models ---

class LabKPI(BaseModel):
    converted_count: int
    avg_velocity: float
    conversion_rate: float
    orphan_count: int
    no_dit_count: int
    high_qty_no_order_count: int

class ConversionRecord(BaseModel):
    customer: str
    pn: str
    sample_date: str
    sample_qty: int
    order_date: str
    order_qty: int
    total_order_qty: int
    days_to_convert: int

class ScatterPoint(BaseModel):
    customer: str
    pn: str
    sample_qty: int
    order_qty: int

class OrphanSample(BaseModel):
    customer: str
    pn: str
    days_since_sent: int
    order_no: Optional[str] = None
    date: Optional[str] = None
    sample_qty: int = 0

class NoDitSample(BaseModel):
    sample_id: str
    customer: str
    pn: str
    order_no: Optional[str]
    date: Optional[str]
    qty: int

class HighQtyNoOrderSample(BaseModel):
    sample_id: str
    customer: str
    pn: str
    order_no: Optional[str]
    date: Optional[str]
    qty: int
    days_since_sent: int

# --- Helper Functions ---

def parse_date_fast(date_val) -> Optional[datetime]:
    if not date_val:
        return None
    if isinstance(date_val, datetime):
        return date_val
    if isinstance(date_val, str):
        s = date_val.strip().split('.')[0]
        if "T" in s:
             try:
                 return datetime.fromisoformat(s.replace("Z", "+00:00"))
             except ValueError:
                 pass
        try:
             return datetime.strptime(s[:10], "%Y-%m-%d")
        except ValueError:
             pass
        for fmt in ["%Y/%m/%d", "%Y.%m.%d", "%d-%m-%Y", "%Y%m%d"]:
            try:
                return datetime.strptime(s, fmt)
            except ValueError:
                continue
    return None

def normalize_id(val: any) -> str:
    if val is None:
        return ""
    s = str(val).strip()
    s = s.lstrip("'")
    if s.endswith(".0"):
        s = s[:-2]
    return s.upper()

def fetch_orders_light(db: Session, start_date: str = None) -> List[dict]:
    q = db.query(
        OrderRecord.customer,
        OrderRecord.pn,
        OrderRecord.date,
        OrderRecord.created_at,
        OrderRecord.qty,
        OrderRecord.order_no,
        OrderRecord.cust_id
    )
    if start_date:
        q = q.filter(OrderRecord.date >= start_date)

    rows = q.all()

    memo_cust = {}
    memo_pn = {}
    memo_id = {}

    processed = []
    for r in rows:
        d = parse_date_fast(r.date) or (r.created_at.replace(tzinfo=None) if r.created_at else datetime.max)

        c_raw = r.customer or ""
        pn_raw = r.pn or ""
        cust_id_raw = r.cust_id

        if c_raw not in memo_cust: memo_cust[c_raw] = normalize_customer_name(c_raw)
        if pn_raw not in memo_pn: memo_pn[pn_raw] = normalize_pn_for_matching(pn_raw)
        if cust_id_raw not in memo_id: memo_id[cust_id_raw] = normalize_id(cust_id_raw)

        processed.append({
            "customer": c_raw,
            "pn": pn_raw,
            "date": d,
            "qty": r.qty or 0,
            "order_no": r.order_no,
            "cust_id": cust_id_raw,
            "norm_cust_name": memo_cust[c_raw],
            "clean_pn": memo_pn[pn_raw],
            "clean_cust_id": memo_id[cust_id_raw]
        })
    return processed

def fetch_samples_light(db: Session, start_date: str = None, end_date: str = None) -> List[dict]:
    q = db.query(
        SampleRecord.id,
        SampleRecord.customer,
        SampleRecord.pn,
        SampleRecord.date,
        SampleRecord.qty,
        SampleRecord.order_no,
        SampleRecord.cust_id
    )
    if start_date:
        q = q.filter(SampleRecord.date >= start_date)
    if end_date:
        q = q.filter(SampleRecord.date <= end_date)

    rows = q.all()
    memo_cust = {}
    memo_pn = {}

    processed = []
    for r in rows:
        d = parse_date_fast(r.date)
        c_raw = r.customer or ""
        pn_raw = r.pn or ""

        if c_raw not in memo_cust: memo_cust[c_raw] = normalize_customer_name(c_raw)
        if pn_raw not in memo_pn: memo_pn[pn_raw] = normalize_pn_for_matching(pn_raw)

        processed.append({
            "id": r.id,
            "customer": c_raw,
            "pn": pn_raw,
            "date": d,
            "qty": r.qty or 0,
            "order_no": r.order_no,
            "cust_id": r.cust_id,
            "norm_cust_name": memo_cust[c_raw],
            "clean_pn": memo_pn[pn_raw],
        })
    return processed

def build_order_lookups(orders: List[dict]):
    by_id = {}
    by_name = {}
    by_cust_name_only = {}

    for o in orders:
        if o["clean_cust_id"]:
            k = (o["clean_cust_id"], o["clean_pn"])
            if k not in by_id: by_id[k] = []
            by_id[k].append(o)

        k_name = (o["norm_cust_name"], o["clean_pn"])
        if k_name not in by_name: by_name[k_name] = []
        by_name[k_name].append(o)

        cn = o["norm_cust_name"]
        if cn not in by_cust_name_only: by_cust_name_only[cn] = []
        by_cust_name_only[cn].append(o)

    return by_id, by_name, by_cust_name_only

def find_matches_in_memory(sample: dict, order_lookups: Tuple[dict, dict, dict]) -> List[dict]:
    (by_id, by_name, by_cust_name) = order_lookups
    candidates = []

    if sample.get("cust_id"):
        cid = normalize_id(sample["cust_id"])
        if cid:
            k = (cid, sample["clean_pn"])
            if k in by_id:
                candidates.extend(by_id[k])

    k_name = (sample["norm_cust_name"], sample["clean_pn"])
    # 3. Fuzzy Name Match (Fallback)
    # Optimization: Iterate unique keys to find substring/superstring matches
    # This solves issues where Sample="Corp Inc" but Order="Corp" or vice versa.
    if not candidates:
        s_name = sample["norm_cust_name"]

        # Key Scan
        target_keys = []
        if s_name in by_cust_name:
            target_keys.append(s_name)

        # Scan other keys if not exact match or just always scan?
        # Always scan allows finding "ABC" when we have "ABC Inc" even if "ABC Inc" has no orders.
        for k in by_cust_name.keys():
            if len(k) < 2: continue
            if k == s_name: continue

            # Mutual Containment check
            if k in s_name or s_name in k:
                target_keys.append(k)

        # Process candidates from identified keys
        checked_count = 0
        for k in target_keys:
            partial_candidates = by_cust_name[k]

            # Optimization: Safety break if too many candidates
            if checked_count > 5000:
                break

            spn = sample["clean_pn"]
            if spn:
                for o in partial_candidates:
                    opn = o["clean_pn"]
                    if opn and (spn.startswith(opn) or opn.startswith(spn)):
                        candidates.append(o)

            checked_count += len(partial_candidates)

    unique_map = {}
    for c in candidates:
        sig = (c["order_no"], c["date"], c["qty"])
        if sig not in unique_map:
            unique_map[sig] = c

    return list(unique_map.values())

def fetch_no_dit_samples(db: Session, start_date: str = None, end_date: str = None) -> List[NoDitSample]:
    q = db.query(SampleRecord).filter(SampleRecord.qty >= 1000)
    if start_date: q = q.filter(SampleRecord.date >= start_date)
    if end_date: q = q.filter(SampleRecord.date <= end_date)

    samples = q.all()
    if not samples: return []

    s_ids = [s.id for s in samples]

    matched_ids = db.query(MatchResult.target_id).filter(
        MatchResult.target_id.in_(s_ids),
        MatchResult.target_type == TargetType.SAMPLE,
        MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
    ).all()
    matched_set = set(m[0] for m in matched_ids)

    results = []
    for s in samples:
        if s.id not in matched_set:
            d = parse_date_fast(s.date)
            results.append(NoDitSample(
                sample_id=str(s.id),
                customer=s.customer,
                pn=s.pn,
                order_no=s.order_no,
                date=d.strftime("%Y-%m-%d") if d else "",
                qty=s.qty or 0
            ))

    return sorted(results, key=lambda x: x.qty, reverse=True)

def fetch_high_qty_no_order_samples(db: Session, start_date: str = None, end_date: str = None) -> List[HighQtyNoOrderSample]:
    q = db.query(
        SampleRecord.id, SampleRecord.customer, SampleRecord.pn,
        SampleRecord.date, SampleRecord.qty, SampleRecord.order_no, SampleRecord.cust_id
    ).filter(SampleRecord.qty >= 1000)

    if start_date: q = q.filter(SampleRecord.date >= start_date)
    if end_date: q = q.filter(SampleRecord.date <= end_date)

    raw_samples = q.all()
    samples = []
    for r in raw_samples:
        d = parse_date_fast(r.date)
        samples.append({
            "id": r.id, "customer": r.customer, "pn": r.pn, "date": d,
            "qty": r.qty or 0, "order_no": r.order_no, "cust_id": r.cust_id,
            "norm_cust_name": normalize_customer_name(r.customer),
            "clean_pn": normalize_pn_for_matching(r.pn),
            "clean_cust_id": normalize_id(r.cust_id)
        })

    orders = fetch_orders_light(db, start_date=start_date)
    lookups = build_order_lookups(orders)

    results = []
    now = datetime.now()

    for s in samples:
        if not s["date"]: continue
        matches = find_matches_in_memory(s, lookups)
        valid = [o for o in matches if o["date"] >= s["date"]]

        if not valid:
            results.append(HighQtyNoOrderSample(
                sample_id=str(s["id"]),
                customer=s["customer"],
                pn=s["pn"],
                order_no=s["order_no"],
                date=s["date"].strftime("%Y-%m-%d"),
                qty=s["qty"],
                days_since_sent=(now - s["date"]).days
            ))

    return sorted(results, key=lambda x: x.qty, reverse=True)

# --- Routes ---

@router.get("/conversions", response_model=List[ConversionRecord])
def get_conversions(
    start_date: Optional[str] = Query(None),
    end_date: Optional[str] = Query(None),
    db: Session = Depends(get_db)
):
    if not start_date:
        start_date = "2000-01-01"

    samples = fetch_samples_light(db, start_date, end_date)
    orders = fetch_orders_light(db, start_date=start_date)
    lookups = build_order_lookups(orders)
    conversions = []

    for s in samples:
        s_date = s["date"]
        if not s_date: continue

        matches = find_matches_in_memory(s, lookups)
        valid_orders = [o for o in matches if o["date"] >= s_date]

        if valid_orders:
            valid_orders.sort(key=lambda x: x["date"])
            first_order = valid_orders[0]
            first_date = first_order["date"]
            first_date_qty = sum(o["qty"] for o in valid_orders if o["date"] == first_date)
            total_qty = sum(o["qty"] for o in valid_orders)

            conversions.append(ConversionRecord(
                customer=s["customer"],
                pn=s["pn"],
                sample_date=s_date.strftime("%Y-%m-%d"),
                sample_qty=s["qty"],
                order_date=first_date.strftime("%Y-%m-%d"),
                order_qty=first_date_qty,
                total_order_qty=total_qty,
                days_to_convert=(first_date - s_date).days
            ))

    return sorted(conversions, key=lambda x: x.sample_date, reverse=True)

@router.get("/kpi", response_model=LabKPI)
def get_lab_kpi(
    start_date: Optional[str] = Query(None),
    end_date: Optional[str] = Query(None),
    db: Session = Depends(get_db)
):
    if not start_date:
        start_date = "2000-01-01"

    samples = fetch_samples_light(db, start_date, end_date)
    orders = fetch_orders_light(db, start_date=start_date)
    lookups = build_order_lookups(orders)

    unique_groups = {}
    high_qty_samples = []

    for s in samples:
        key = (s["norm_cust_name"], s["clean_pn"])
        if key not in unique_groups:
             unique_groups[key] = {
                 "dates": [],
                 "cust_ids": set(),
                 "raw_pns": set(),
                 "samples": []
             }
        if s["date"]: unique_groups[key]["dates"].append(s["date"])
        if s["cust_id"]: unique_groups[key]["cust_ids"].add(normalize_id(s["cust_id"]))
        unique_groups[key]["raw_pns"].add(s["clean_pn"])
        unique_groups[key]["samples"].append(s)

        if s["qty"] >= 1000:
            high_qty_samples.append(s)

    converted_count = 0
    orphan_count = 0
    velocities = []
    now = datetime.now()
    threshold90 = now - timedelta(days=90)

    high_qty_ids = [s["id"] for s in high_qty_samples]
    no_dit_count = 0
    if high_qty_ids:
        matched_ids = db.query(MatchResult.target_id).filter(
            MatchResult.target_id.in_(high_qty_ids),
            MatchResult.target_type == TargetType.SAMPLE,
            MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
        ).all()
        matched_set = set(m[0] for m in matched_ids)
        no_dit_count = sum(1 for sid in high_qty_ids if sid not in matched_set)

    for key, data in unique_groups.items():
        earliest_sample = min(data["dates"]) if data["dates"] else None
        is_converted = False
        min_order_date = None

        candidates = []
        (by_id, by_name, by_cust_name_only) = lookups

        for cid in data["cust_ids"]:
            k = (cid, key[1])
            if k in by_id: candidates.extend(by_id[k])

        if not candidates and key in by_name:
            candidates.extend(by_name[key])

        if not candidates:
            # Fuzzy Name Match (Containment)
            found_keys = []
            if key[0] in by_cust_name_only:
                found_keys.append(key[0])

            for k in by_cust_name_only.keys():
                if len(k) < 2: continue
                if k == key[0]: continue
                if k in key[0] or key[0] in k:
                    found_keys.append(k)

            checked = 0
            for k in found_keys:
                partial = by_cust_name_only[k]
                if checked > 5000: break

                spn = key[1]
                for o in partial:
                    opn = o["clean_pn"]
                    if opn and (spn.startswith(opn) or opn.startswith(spn)):
                        candidates.append(o)
                checked += len(partial)

        if candidates and earliest_sample:
            valid_dates = [o["date"] for o in candidates if o["date"] >= earliest_sample]
            if valid_dates:
                is_converted = True
                min_order_date = min(valid_dates)

        if is_converted:
            converted_count += 1
            if earliest_sample and min_order_date:
                diff = (min_order_date - earliest_sample).days
                if diff >= 0: velocities.append(diff)
        else:
            if earliest_sample and earliest_sample < threshold90:
                orphan_count += 1

    high_qty_no_order_count = 0
    for s in high_qty_samples:
        s_date = s["date"]
        if not s_date: continue
        matches = find_matches_in_memory(s, lookups)
        valid = [o for o in matches if o["date"] >= s_date]
        if not valid:
            high_qty_no_order_count += 1

    avg_v = sum(velocities) / len(velocities) if velocities else 0
    c_rate = (converted_count / len(unique_groups) * 100) if unique_groups else 0

    return LabKPI(
        converted_count=converted_count,
        avg_velocity=round(avg_v, 1),
        conversion_rate=round(c_rate, 1),
        orphan_count=orphan_count,
        no_dit_count=no_dit_count,
        high_qty_no_order_count=high_qty_no_order_count
    )

@router.get("/scatter", response_model=List[ScatterPoint])
def get_scatter(
    start_date: Optional[str] = Query(None),
    end_date: Optional[str] = Query(None),
    db: Session = Depends(get_db)
):
    samples = fetch_samples_light(db, start_date, end_date)
    orders = fetch_orders_light(db, start_date=start_date)
    lookups = build_order_lookups(orders)
    unique_groups = {}

    for s in samples:
        key = (s["norm_cust_name"], s["clean_pn"])
        if key not in unique_groups:
            unique_groups[key] = {
                "display_cust": s["customer"],
                "display_pn": s["pn"],
                "sample_qty": 0,
                "order_qty": 0,
                "min_date": s["date"]
            }
        unique_groups[key]["sample_qty"] += s["qty"]
        if s["date"] and (not unique_groups[key]["min_date"] or s["date"] < unique_groups[key]["min_date"]):
            unique_groups[key]["min_date"] = s["date"]

    for key, data in unique_groups.items():
        min_date = data["min_date"]
        candidates = []
        (by_id, by_name, by_cust_name_only) = lookups

        if key in by_name:
             candidates.extend(by_name[key])
        elif not candidates:
             # Fuzzy Name Match
             found_keys = []
             if key[0] in by_cust_name_only:
                 found_keys.append(key[0])

             for k in by_cust_name_only.keys():
                 if len(k) < 2: continue
                 if k == key[0]: continue
                 if k in key[0] or key[0] in k:
                     found_keys.append(k)

             checked = 0
             for k in found_keys:
                 partial = by_cust_name_only[k]
                 if checked > 5000: break

                 spn = key[1]
                 for o in partial:
                     opn = o["clean_pn"]
                     if opn and (spn.startswith(opn) or opn.startswith(spn)):
                         candidates.append(o)
                 checked += len(partial)

        seen_orders = set()
        matched_qty = 0
        for o in candidates:
            sig = (o["order_no"], o["qty"], o["date"])
            if sig in seen_orders: continue
            seen_orders.add(sig)
            if min_date and o["date"] < min_date: continue
            matched_qty += o["qty"]
        data["order_qty"] = matched_qty

    return [
        ScatterPoint(
            customer=v["display_cust"],
            pn=v["display_pn"],
            sample_qty=v["sample_qty"],
            order_qty=v["order_qty"]
        ) for v in unique_groups.values()
    ]

@router.get("/orphans", response_model=List[OrphanSample])
def get_orphans(
    start_date: Optional[str] = Query(None),
    end_date: Optional[str] = Query(None),
    db: Session = Depends(get_db)
):
    samples = fetch_samples_light(db, start_date, end_date)
    orders = fetch_orders_light(db, start_date=start_date)
    lookups = build_order_lookups(orders)
    threshold_date = datetime.now() - timedelta(days=90)
    orphan_groups = {}

    for s in samples:
        s_date = s["date"]
        if not s_date or s_date >= threshold_date: continue
        matches = find_matches_in_memory(s, lookups)
        valid = [o for o in matches if o["date"] >= s_date]
        if not valid:
             key = (s["norm_cust_name"], s["clean_pn"], s["order_no"] or "", s_date)
             if key not in orphan_groups:
                 orphan_groups[key] = {
                     "customer": s["customer"],
                     "pn": s["pn"],
                     "order_no": s["order_no"],
                     "date": s_date.strftime("%Y-%m-%d"),
                     "sample_qty": 0,
                     "days_since_sent": (datetime.now() - s_date).days
                 }
             orphan_groups[key]["sample_qty"] += s["qty"]

    return sorted(
        [OrphanSample(**v) for v in orphan_groups.values()],
        key=lambda x: x.days_since_sent,
        reverse=True
    )

@router.get("/no_dit_samples", response_model=List[NoDitSample])
def get_no_dit_samples(
    start_date: Optional[str] = Query(None),
    end_date: Optional[str] = Query(None),
    db: Session = Depends(get_db)
):
    return fetch_no_dit_samples(db, start_date, end_date)

@router.get("/high_qty_no_order_samples", response_model=List[HighQtyNoOrderSample])
def get_high_qty_no_order(
    start_date: Optional[str] = Query(None),
    end_date: Optional[str] = Query(None),
    db: Session = Depends(get_db)
):
    return fetch_high_qty_no_order_samples(db, start_date, end_date)