from typing import List, Optional, Tuple, Dict, Set from datetime import datetime, timedelta from fastapi import APIRouter, Depends, Query from sqlalchemy.orm import Session from sqlalchemy import func from pydantic import BaseModel from app.models import get_db from app.models.sample import SampleRecord from app.models.order import OrderRecord from app.models.match import MatchResult, MatchStatus, TargetType from app.services.fuzzy_matcher import normalize_pn_for_matching, normalize_customer_name router = APIRouter(prefix="/lab", tags=["Lab"]) # --- Pydantic Models --- class LabKPI(BaseModel): converted_count: int avg_velocity: float conversion_rate: float orphan_count: int no_dit_count: int high_qty_no_order_count: int class ConversionRecord(BaseModel): customer: str pn: str sample_date: str sample_qty: int order_date: str order_qty: int total_order_qty: int days_to_convert: int class ScatterPoint(BaseModel): customer: str pn: str sample_qty: int order_qty: int class OrphanSample(BaseModel): customer: str pn: str days_since_sent: int order_no: Optional[str] = None date: Optional[str] = None sample_qty: int = 0 class NoDitSample(BaseModel): sample_id: str customer: str pn: str order_no: Optional[str] date: Optional[str] qty: int class HighQtyNoOrderSample(BaseModel): sample_id: str customer: str pn: str order_no: Optional[str] date: Optional[str] qty: int days_since_sent: int # --- Helper Functions --- def parse_date_fast(date_val) -> Optional[datetime]: if not date_val: return None if isinstance(date_val, datetime): return date_val if isinstance(date_val, str): s = date_val.strip().split('.')[0] if "T" in s: try: return datetime.fromisoformat(s.replace("Z", "+00:00")) except ValueError: pass try: return datetime.strptime(s[:10], "%Y-%m-%d") except ValueError: pass for fmt in ["%Y/%m/%d", "%Y.%m.%d", "%d-%m-%Y", "%Y%m%d"]: try: return datetime.strptime(s, fmt) except ValueError: continue return None def normalize_id(val: any) -> str: if val is None: return "" s = str(val).strip() s = s.lstrip("'") if s.endswith(".0"): s = s[:-2] return s.upper() def fetch_orders_light(db: Session, start_date: str = None) -> List[dict]: q = db.query( OrderRecord.customer, OrderRecord.pn, OrderRecord.date, OrderRecord.created_at, OrderRecord.qty, OrderRecord.order_no, OrderRecord.cust_id ) if start_date: q = q.filter(OrderRecord.date >= start_date) rows = q.all() memo_cust = {} memo_pn = {} memo_id = {} processed = [] for r in rows: d = parse_date_fast(r.date) or (r.created_at.replace(tzinfo=None) if r.created_at else datetime.max) c_raw = r.customer or "" pn_raw = r.pn or "" cust_id_raw = r.cust_id if c_raw not in memo_cust: memo_cust[c_raw] = normalize_customer_name(c_raw) if pn_raw not in memo_pn: memo_pn[pn_raw] = normalize_pn_for_matching(pn_raw) if cust_id_raw not in memo_id: memo_id[cust_id_raw] = normalize_id(cust_id_raw) processed.append({ "customer": c_raw, "pn": pn_raw, "date": d, "qty": r.qty or 0, "order_no": r.order_no, "cust_id": cust_id_raw, "norm_cust_name": memo_cust[c_raw], "clean_pn": memo_pn[pn_raw], "clean_cust_id": memo_id[cust_id_raw] }) return processed def fetch_samples_light(db: Session, start_date: str = None, end_date: str = None) -> List[dict]: q = db.query( SampleRecord.id, SampleRecord.customer, SampleRecord.pn, SampleRecord.date, SampleRecord.qty, SampleRecord.order_no, SampleRecord.cust_id ) if start_date: q = q.filter(SampleRecord.date >= start_date) if end_date: q = q.filter(SampleRecord.date <= end_date) rows = q.all() memo_cust = {} memo_pn = {} processed = [] for r in rows: d = parse_date_fast(r.date) c_raw = r.customer or "" pn_raw = r.pn or "" if c_raw not in memo_cust: memo_cust[c_raw] = normalize_customer_name(c_raw) if pn_raw not in memo_pn: memo_pn[pn_raw] = normalize_pn_for_matching(pn_raw) processed.append({ "id": r.id, "customer": c_raw, "pn": pn_raw, "date": d, "qty": r.qty or 0, "order_no": r.order_no, "cust_id": r.cust_id, "norm_cust_name": memo_cust[c_raw], "clean_pn": memo_pn[pn_raw], }) return processed def build_order_lookups(orders: List[dict]): by_id = {} by_name = {} by_cust_name_only = {} for o in orders: if o["clean_cust_id"]: k = (o["clean_cust_id"], o["clean_pn"]) if k not in by_id: by_id[k] = [] by_id[k].append(o) k_name = (o["norm_cust_name"], o["clean_pn"]) if k_name not in by_name: by_name[k_name] = [] by_name[k_name].append(o) cn = o["norm_cust_name"] if cn not in by_cust_name_only: by_cust_name_only[cn] = [] by_cust_name_only[cn].append(o) return by_id, by_name, by_cust_name_only def find_matches_in_memory(sample: dict, order_lookups: Tuple[dict, dict, dict]) -> List[dict]: (by_id, by_name, by_cust_name) = order_lookups candidates = [] if sample.get("cust_id"): cid = normalize_id(sample["cust_id"]) if cid: k = (cid, sample["clean_pn"]) if k in by_id: candidates.extend(by_id[k]) k_name = (sample["norm_cust_name"], sample["clean_pn"]) # 3. Fuzzy Name Match (Fallback) # Optimization: Iterate unique keys to find substring/superstring matches # This solves issues where Sample="Corp Inc" but Order="Corp" or vice versa. if not candidates: s_name = sample["norm_cust_name"] # Key Scan target_keys = [] if s_name in by_cust_name: target_keys.append(s_name) # Scan other keys if not exact match or just always scan? # Always scan allows finding "ABC" when we have "ABC Inc" even if "ABC Inc" has no orders. for k in by_cust_name.keys(): if len(k) < 2: continue if k == s_name: continue # Mutual Containment check if k in s_name or s_name in k: target_keys.append(k) # Process candidates from identified keys checked_count = 0 for k in target_keys: partial_candidates = by_cust_name[k] # Optimization: Safety break if too many candidates if checked_count > 5000: break spn = sample["clean_pn"] if spn: for o in partial_candidates: opn = o["clean_pn"] if opn and (spn.startswith(opn) or opn.startswith(spn)): candidates.append(o) checked_count += len(partial_candidates) unique_map = {} for c in candidates: sig = (c["order_no"], c["date"], c["qty"]) if sig not in unique_map: unique_map[sig] = c return list(unique_map.values()) def fetch_no_dit_samples(db: Session, start_date: str = None, end_date: str = None) -> List[NoDitSample]: q = db.query(SampleRecord).filter(SampleRecord.qty >= 1000) if start_date: q = q.filter(SampleRecord.date >= start_date) if end_date: q = q.filter(SampleRecord.date <= end_date) samples = q.all() if not samples: return [] s_ids = [s.id for s in samples] matched_ids = db.query(MatchResult.target_id).filter( MatchResult.target_id.in_(s_ids), MatchResult.target_type == TargetType.SAMPLE, MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched]) ).all() matched_set = set(m[0] for m in matched_ids) results = [] for s in samples: if s.id not in matched_set: d = parse_date_fast(s.date) results.append(NoDitSample( sample_id=str(s.id), customer=s.customer, pn=s.pn, order_no=s.order_no, date=d.strftime("%Y-%m-%d") if d else "", qty=s.qty or 0 )) return sorted(results, key=lambda x: x.qty, reverse=True) def fetch_high_qty_no_order_samples(db: Session, start_date: str = None, end_date: str = None) -> List[HighQtyNoOrderSample]: q = db.query( SampleRecord.id, SampleRecord.customer, SampleRecord.pn, SampleRecord.date, SampleRecord.qty, SampleRecord.order_no, SampleRecord.cust_id ).filter(SampleRecord.qty >= 1000) if start_date: q = q.filter(SampleRecord.date >= start_date) if end_date: q = q.filter(SampleRecord.date <= end_date) raw_samples = q.all() samples = [] for r in raw_samples: d = parse_date_fast(r.date) samples.append({ "id": r.id, "customer": r.customer, "pn": r.pn, "date": d, "qty": r.qty or 0, "order_no": r.order_no, "cust_id": r.cust_id, "norm_cust_name": normalize_customer_name(r.customer), "clean_pn": normalize_pn_for_matching(r.pn), "clean_cust_id": normalize_id(r.cust_id) }) orders = fetch_orders_light(db, start_date=start_date) lookups = build_order_lookups(orders) results = [] now = datetime.now() for s in samples: if not s["date"]: continue matches = find_matches_in_memory(s, lookups) valid = [o for o in matches if o["date"] >= s["date"]] if not valid: results.append(HighQtyNoOrderSample( sample_id=str(s["id"]), customer=s["customer"], pn=s["pn"], order_no=s["order_no"], date=s["date"].strftime("%Y-%m-%d"), qty=s["qty"], days_since_sent=(now - s["date"]).days )) return sorted(results, key=lambda x: x.qty, reverse=True) # --- Routes --- @router.get("/conversions", response_model=List[ConversionRecord]) def get_conversions( start_date: Optional[str] = Query(None), end_date: Optional[str] = Query(None), db: Session = Depends(get_db) ): if not start_date: start_date = "2000-01-01" samples = fetch_samples_light(db, start_date, end_date) orders = fetch_orders_light(db, start_date=start_date) lookups = build_order_lookups(orders) conversions = [] for s in samples: s_date = s["date"] if not s_date: continue matches = find_matches_in_memory(s, lookups) valid_orders = [o for o in matches if o["date"] >= s_date] if valid_orders: valid_orders.sort(key=lambda x: x["date"]) first_order = valid_orders[0] first_date = first_order["date"] first_date_qty = sum(o["qty"] for o in valid_orders if o["date"] == first_date) total_qty = sum(o["qty"] for o in valid_orders) conversions.append(ConversionRecord( customer=s["customer"], pn=s["pn"], sample_date=s_date.strftime("%Y-%m-%d"), sample_qty=s["qty"], order_date=first_date.strftime("%Y-%m-%d"), order_qty=first_date_qty, total_order_qty=total_qty, days_to_convert=(first_date - s_date).days )) return sorted(conversions, key=lambda x: x.sample_date, reverse=True) @router.get("/kpi", response_model=LabKPI) def get_lab_kpi( start_date: Optional[str] = Query(None), end_date: Optional[str] = Query(None), db: Session = Depends(get_db) ): if not start_date: start_date = "2000-01-01" samples = fetch_samples_light(db, start_date, end_date) orders = fetch_orders_light(db, start_date=start_date) lookups = build_order_lookups(orders) unique_groups = {} high_qty_samples = [] for s in samples: key = (s["norm_cust_name"], s["clean_pn"]) if key not in unique_groups: unique_groups[key] = { "dates": [], "cust_ids": set(), "raw_pns": set(), "samples": [] } if s["date"]: unique_groups[key]["dates"].append(s["date"]) if s["cust_id"]: unique_groups[key]["cust_ids"].add(normalize_id(s["cust_id"])) unique_groups[key]["raw_pns"].add(s["clean_pn"]) unique_groups[key]["samples"].append(s) if s["qty"] >= 1000: high_qty_samples.append(s) converted_count = 0 orphan_count = 0 velocities = [] now = datetime.now() threshold90 = now - timedelta(days=90) high_qty_ids = [s["id"] for s in high_qty_samples] no_dit_count = 0 if high_qty_ids: matched_ids = db.query(MatchResult.target_id).filter( MatchResult.target_id.in_(high_qty_ids), MatchResult.target_type == TargetType.SAMPLE, MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched]) ).all() matched_set = set(m[0] for m in matched_ids) no_dit_count = sum(1 for sid in high_qty_ids if sid not in matched_set) for key, data in unique_groups.items(): earliest_sample = min(data["dates"]) if data["dates"] else None is_converted = False min_order_date = None candidates = [] (by_id, by_name, by_cust_name_only) = lookups for cid in data["cust_ids"]: k = (cid, key[1]) if k in by_id: candidates.extend(by_id[k]) if not candidates and key in by_name: candidates.extend(by_name[key]) if not candidates: # Fuzzy Name Match (Containment) found_keys = [] if key[0] in by_cust_name_only: found_keys.append(key[0]) for k in by_cust_name_only.keys(): if len(k) < 2: continue if k == key[0]: continue if k in key[0] or key[0] in k: found_keys.append(k) checked = 0 for k in found_keys: partial = by_cust_name_only[k] if checked > 5000: break spn = key[1] for o in partial: opn = o["clean_pn"] if opn and (spn.startswith(opn) or opn.startswith(spn)): candidates.append(o) checked += len(partial) if candidates and earliest_sample: valid_dates = [o["date"] for o in candidates if o["date"] >= earliest_sample] if valid_dates: is_converted = True min_order_date = min(valid_dates) if is_converted: converted_count += 1 if earliest_sample and min_order_date: diff = (min_order_date - earliest_sample).days if diff >= 0: velocities.append(diff) else: if earliest_sample and earliest_sample < threshold90: orphan_count += 1 high_qty_no_order_count = 0 for s in high_qty_samples: s_date = s["date"] if not s_date: continue matches = find_matches_in_memory(s, lookups) valid = [o for o in matches if o["date"] >= s_date] if not valid: high_qty_no_order_count += 1 avg_v = sum(velocities) / len(velocities) if velocities else 0 c_rate = (converted_count / len(unique_groups) * 100) if unique_groups else 0 return LabKPI( converted_count=converted_count, avg_velocity=round(avg_v, 1), conversion_rate=round(c_rate, 1), orphan_count=orphan_count, no_dit_count=no_dit_count, high_qty_no_order_count=high_qty_no_order_count ) @router.get("/scatter", response_model=List[ScatterPoint]) def get_scatter( start_date: Optional[str] = Query(None), end_date: Optional[str] = Query(None), db: Session = Depends(get_db) ): samples = fetch_samples_light(db, start_date, end_date) orders = fetch_orders_light(db, start_date=start_date) lookups = build_order_lookups(orders) unique_groups = {} for s in samples: key = (s["norm_cust_name"], s["clean_pn"]) if key not in unique_groups: unique_groups[key] = { "display_cust": s["customer"], "display_pn": s["pn"], "sample_qty": 0, "order_qty": 0, "min_date": s["date"] } unique_groups[key]["sample_qty"] += s["qty"] if s["date"] and (not unique_groups[key]["min_date"] or s["date"] < unique_groups[key]["min_date"]): unique_groups[key]["min_date"] = s["date"] for key, data in unique_groups.items(): min_date = data["min_date"] candidates = [] (by_id, by_name, by_cust_name_only) = lookups if key in by_name: candidates.extend(by_name[key]) elif not candidates: # Fuzzy Name Match found_keys = [] if key[0] in by_cust_name_only: found_keys.append(key[0]) for k in by_cust_name_only.keys(): if len(k) < 2: continue if k == key[0]: continue if k in key[0] or key[0] in k: found_keys.append(k) checked = 0 for k in found_keys: partial = by_cust_name_only[k] if checked > 5000: break spn = key[1] for o in partial: opn = o["clean_pn"] if opn and (spn.startswith(opn) or opn.startswith(spn)): candidates.append(o) checked += len(partial) seen_orders = set() matched_qty = 0 for o in candidates: sig = (o["order_no"], o["qty"], o["date"]) if sig in seen_orders: continue seen_orders.add(sig) if min_date and o["date"] < min_date: continue matched_qty += o["qty"] data["order_qty"] = matched_qty return [ ScatterPoint( customer=v["display_cust"], pn=v["display_pn"], sample_qty=v["sample_qty"], order_qty=v["order_qty"] ) for v in unique_groups.values() ] @router.get("/orphans", response_model=List[OrphanSample]) def get_orphans( start_date: Optional[str] = Query(None), end_date: Optional[str] = Query(None), db: Session = Depends(get_db) ): samples = fetch_samples_light(db, start_date, end_date) orders = fetch_orders_light(db, start_date=start_date) lookups = build_order_lookups(orders) threshold_date = datetime.now() - timedelta(days=90) orphan_groups = {} for s in samples: s_date = s["date"] if not s_date or s_date >= threshold_date: continue matches = find_matches_in_memory(s, lookups) valid = [o for o in matches if o["date"] >= s_date] if not valid: key = (s["norm_cust_name"], s["clean_pn"], s["order_no"] or "", s_date) if key not in orphan_groups: orphan_groups[key] = { "customer": s["customer"], "pn": s["pn"], "order_no": s["order_no"], "date": s_date.strftime("%Y-%m-%d"), "sample_qty": 0, "days_since_sent": (datetime.now() - s_date).days } orphan_groups[key]["sample_qty"] += s["qty"] return sorted( [OrphanSample(**v) for v in orphan_groups.values()], key=lambda x: x.days_since_sent, reverse=True ) @router.get("/no_dit_samples", response_model=List[NoDitSample]) def get_no_dit_samples( start_date: Optional[str] = Query(None), end_date: Optional[str] = Query(None), db: Session = Depends(get_db) ): return fetch_no_dit_samples(db, start_date, end_date) @router.get("/high_qty_no_order_samples", response_model=List[HighQtyNoOrderSample]) def get_high_qty_no_order( start_date: Optional[str] = Query(None), end_date: Optional[str] = Query(None), db: Session = Depends(get_db) ): return fetch_high_qty_no_order_samples(db, start_date, end_date)