Files
SalesPipeline/backend/app/routers/lab.py
2026-01-27 19:08:46 +08:00

636 lines
21 KiB
Python

from typing import List, Optional, Tuple, Dict, Set
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session
from sqlalchemy import func
from pydantic import BaseModel
from app.models import get_db
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus, TargetType
from app.services.fuzzy_matcher import normalize_pn_for_matching, normalize_customer_name
router = APIRouter(prefix="/lab", tags=["Lab"])
# --- Pydantic Models ---
class LabKPI(BaseModel):
converted_count: int
avg_velocity: float
conversion_rate: float
orphan_count: int
no_dit_count: int
high_qty_no_order_count: int
class ConversionRecord(BaseModel):
customer: str
pn: str
sample_date: str
sample_qty: int
order_date: str
order_qty: int
total_order_qty: int
days_to_convert: int
class ScatterPoint(BaseModel):
customer: str
pn: str
sample_qty: int
order_qty: int
class OrphanSample(BaseModel):
customer: str
pn: str
days_since_sent: int
order_no: Optional[str] = None
date: Optional[str] = None
sample_qty: int = 0
class NoDitSample(BaseModel):
sample_id: str
customer: str
pn: str
order_no: Optional[str]
date: Optional[str]
qty: int
class HighQtyNoOrderSample(BaseModel):
sample_id: str
customer: str
pn: str
order_no: Optional[str]
date: Optional[str]
qty: int
days_since_sent: int
# --- Helper Functions ---
def parse_date_fast(date_val) -> Optional[datetime]:
if not date_val:
return None
if isinstance(date_val, datetime):
return date_val
if isinstance(date_val, str):
s = date_val.strip().split('.')[0]
if "T" in s:
try:
return datetime.fromisoformat(s.replace("Z", "+00:00"))
except ValueError:
pass
try:
return datetime.strptime(s[:10], "%Y-%m-%d")
except ValueError:
pass
for fmt in ["%Y/%m/%d", "%Y.%m.%d", "%d-%m-%Y", "%Y%m%d"]:
try:
return datetime.strptime(s, fmt)
except ValueError:
continue
return None
def normalize_id(val: any) -> str:
if val is None:
return ""
s = str(val).strip()
s = s.lstrip("'")
if s.endswith(".0"):
s = s[:-2]
return s.upper()
def fetch_orders_light(db: Session, start_date: str = None) -> List[dict]:
q = db.query(
OrderRecord.customer,
OrderRecord.pn,
OrderRecord.date,
OrderRecord.created_at,
OrderRecord.qty,
OrderRecord.order_no,
OrderRecord.cust_id
)
if start_date:
q = q.filter(OrderRecord.date >= start_date)
rows = q.all()
memo_cust = {}
memo_pn = {}
memo_id = {}
processed = []
for r in rows:
d = parse_date_fast(r.date) or (r.created_at.replace(tzinfo=None) if r.created_at else datetime.max)
c_raw = r.customer or ""
pn_raw = r.pn or ""
cust_id_raw = r.cust_id
if c_raw not in memo_cust: memo_cust[c_raw] = normalize_customer_name(c_raw)
if pn_raw not in memo_pn: memo_pn[pn_raw] = normalize_pn_for_matching(pn_raw)
if cust_id_raw not in memo_id: memo_id[cust_id_raw] = normalize_id(cust_id_raw)
processed.append({
"customer": c_raw,
"pn": pn_raw,
"date": d,
"qty": r.qty or 0,
"order_no": r.order_no,
"cust_id": cust_id_raw,
"norm_cust_name": memo_cust[c_raw],
"clean_pn": memo_pn[pn_raw],
"clean_cust_id": memo_id[cust_id_raw]
})
return processed
def fetch_samples_light(db: Session, start_date: str = None, end_date: str = None) -> List[dict]:
q = db.query(
SampleRecord.id,
SampleRecord.customer,
SampleRecord.pn,
SampleRecord.date,
SampleRecord.qty,
SampleRecord.order_no,
SampleRecord.cust_id
)
if start_date:
q = q.filter(SampleRecord.date >= start_date)
if end_date:
q = q.filter(SampleRecord.date <= end_date)
rows = q.all()
memo_cust = {}
memo_pn = {}
processed = []
for r in rows:
d = parse_date_fast(r.date)
c_raw = r.customer or ""
pn_raw = r.pn or ""
if c_raw not in memo_cust: memo_cust[c_raw] = normalize_customer_name(c_raw)
if pn_raw not in memo_pn: memo_pn[pn_raw] = normalize_pn_for_matching(pn_raw)
processed.append({
"id": r.id,
"customer": c_raw,
"pn": pn_raw,
"date": d,
"qty": r.qty or 0,
"order_no": r.order_no,
"cust_id": r.cust_id,
"norm_cust_name": memo_cust[c_raw],
"clean_pn": memo_pn[pn_raw],
})
return processed
def build_order_lookups(orders: List[dict]):
by_id = {}
by_name = {}
by_cust_name_only = {}
for o in orders:
if o["clean_cust_id"]:
k = (o["clean_cust_id"], o["clean_pn"])
if k not in by_id: by_id[k] = []
by_id[k].append(o)
k_name = (o["norm_cust_name"], o["clean_pn"])
if k_name not in by_name: by_name[k_name] = []
by_name[k_name].append(o)
cn = o["norm_cust_name"]
if cn not in by_cust_name_only: by_cust_name_only[cn] = []
by_cust_name_only[cn].append(o)
return by_id, by_name, by_cust_name_only
def find_matches_in_memory(sample: dict, order_lookups: Tuple[dict, dict, dict]) -> List[dict]:
(by_id, by_name, by_cust_name) = order_lookups
candidates = []
if sample.get("cust_id"):
cid = normalize_id(sample["cust_id"])
if cid:
k = (cid, sample["clean_pn"])
if k in by_id:
candidates.extend(by_id[k])
k_name = (sample["norm_cust_name"], sample["clean_pn"])
# 3. Fuzzy Name Match (Fallback)
# Optimization: Iterate unique keys to find substring/superstring matches
# This solves issues where Sample="Corp Inc" but Order="Corp" or vice versa.
if not candidates:
s_name = sample["norm_cust_name"]
# Key Scan
target_keys = []
if s_name in by_cust_name:
target_keys.append(s_name)
# Scan other keys if not exact match or just always scan?
# Always scan allows finding "ABC" when we have "ABC Inc" even if "ABC Inc" has no orders.
for k in by_cust_name.keys():
if len(k) < 2: continue
if k == s_name: continue
# Mutual Containment check
if k in s_name or s_name in k:
target_keys.append(k)
# Process candidates from identified keys
checked_count = 0
for k in target_keys:
partial_candidates = by_cust_name[k]
# Optimization: Safety break if too many candidates
if checked_count > 5000:
break
spn = sample["clean_pn"]
if spn:
for o in partial_candidates:
opn = o["clean_pn"]
if opn and (spn.startswith(opn) or opn.startswith(spn)):
candidates.append(o)
checked_count += len(partial_candidates)
unique_map = {}
for c in candidates:
sig = (c["order_no"], c["date"], c["qty"])
if sig not in unique_map:
unique_map[sig] = c
return list(unique_map.values())
def fetch_no_dit_samples(db: Session, start_date: str = None, end_date: str = None) -> List[NoDitSample]:
q = db.query(SampleRecord).filter(SampleRecord.qty >= 1000)
if start_date: q = q.filter(SampleRecord.date >= start_date)
if end_date: q = q.filter(SampleRecord.date <= end_date)
samples = q.all()
if not samples: return []
s_ids = [s.id for s in samples]
matched_ids = db.query(MatchResult.target_id).filter(
MatchResult.target_id.in_(s_ids),
MatchResult.target_type == TargetType.SAMPLE,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).all()
matched_set = set(m[0] for m in matched_ids)
results = []
for s in samples:
if s.id not in matched_set:
d = parse_date_fast(s.date)
results.append(NoDitSample(
sample_id=str(s.id),
customer=s.customer,
pn=s.pn,
order_no=s.order_no,
date=d.strftime("%Y-%m-%d") if d else "",
qty=s.qty or 0
))
return sorted(results, key=lambda x: x.qty, reverse=True)
def fetch_high_qty_no_order_samples(db: Session, start_date: str = None, end_date: str = None) -> List[HighQtyNoOrderSample]:
q = db.query(
SampleRecord.id, SampleRecord.customer, SampleRecord.pn,
SampleRecord.date, SampleRecord.qty, SampleRecord.order_no, SampleRecord.cust_id
).filter(SampleRecord.qty >= 1000)
if start_date: q = q.filter(SampleRecord.date >= start_date)
if end_date: q = q.filter(SampleRecord.date <= end_date)
raw_samples = q.all()
samples = []
for r in raw_samples:
d = parse_date_fast(r.date)
samples.append({
"id": r.id, "customer": r.customer, "pn": r.pn, "date": d,
"qty": r.qty or 0, "order_no": r.order_no, "cust_id": r.cust_id,
"norm_cust_name": normalize_customer_name(r.customer),
"clean_pn": normalize_pn_for_matching(r.pn),
"clean_cust_id": normalize_id(r.cust_id)
})
orders = fetch_orders_light(db, start_date=start_date)
lookups = build_order_lookups(orders)
results = []
now = datetime.now()
for s in samples:
if not s["date"]: continue
matches = find_matches_in_memory(s, lookups)
valid = [o for o in matches if o["date"] >= s["date"]]
if not valid:
results.append(HighQtyNoOrderSample(
sample_id=str(s["id"]),
customer=s["customer"],
pn=s["pn"],
order_no=s["order_no"],
date=s["date"].strftime("%Y-%m-%d"),
qty=s["qty"],
days_since_sent=(now - s["date"]).days
))
return sorted(results, key=lambda x: x.qty, reverse=True)
# --- Routes ---
@router.get("/conversions", response_model=List[ConversionRecord])
def get_conversions(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
if not start_date:
start_date = "2000-01-01"
samples = fetch_samples_light(db, start_date, end_date)
orders = fetch_orders_light(db, start_date=start_date)
lookups = build_order_lookups(orders)
conversions = []
for s in samples:
s_date = s["date"]
if not s_date: continue
matches = find_matches_in_memory(s, lookups)
valid_orders = [o for o in matches if o["date"] >= s_date]
if valid_orders:
valid_orders.sort(key=lambda x: x["date"])
first_order = valid_orders[0]
first_date = first_order["date"]
first_date_qty = sum(o["qty"] for o in valid_orders if o["date"] == first_date)
total_qty = sum(o["qty"] for o in valid_orders)
conversions.append(ConversionRecord(
customer=s["customer"],
pn=s["pn"],
sample_date=s_date.strftime("%Y-%m-%d"),
sample_qty=s["qty"],
order_date=first_date.strftime("%Y-%m-%d"),
order_qty=first_date_qty,
total_order_qty=total_qty,
days_to_convert=(first_date - s_date).days
))
return sorted(conversions, key=lambda x: x.sample_date, reverse=True)
@router.get("/kpi", response_model=LabKPI)
def get_lab_kpi(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
if not start_date:
start_date = "2000-01-01"
samples = fetch_samples_light(db, start_date, end_date)
orders = fetch_orders_light(db, start_date=start_date)
lookups = build_order_lookups(orders)
unique_groups = {}
high_qty_samples = []
for s in samples:
key = (s["norm_cust_name"], s["clean_pn"])
if key not in unique_groups:
unique_groups[key] = {
"dates": [],
"cust_ids": set(),
"raw_pns": set(),
"samples": []
}
if s["date"]: unique_groups[key]["dates"].append(s["date"])
if s["cust_id"]: unique_groups[key]["cust_ids"].add(normalize_id(s["cust_id"]))
unique_groups[key]["raw_pns"].add(s["clean_pn"])
unique_groups[key]["samples"].append(s)
if s["qty"] >= 1000:
high_qty_samples.append(s)
converted_count = 0
orphan_count = 0
velocities = []
now = datetime.now()
threshold90 = now - timedelta(days=90)
high_qty_ids = [s["id"] for s in high_qty_samples]
no_dit_count = 0
if high_qty_ids:
matched_ids = db.query(MatchResult.target_id).filter(
MatchResult.target_id.in_(high_qty_ids),
MatchResult.target_type == TargetType.SAMPLE,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).all()
matched_set = set(m[0] for m in matched_ids)
no_dit_count = sum(1 for sid in high_qty_ids if sid not in matched_set)
for key, data in unique_groups.items():
earliest_sample = min(data["dates"]) if data["dates"] else None
is_converted = False
min_order_date = None
candidates = []
(by_id, by_name, by_cust_name_only) = lookups
for cid in data["cust_ids"]:
k = (cid, key[1])
if k in by_id: candidates.extend(by_id[k])
if not candidates and key in by_name:
candidates.extend(by_name[key])
if not candidates:
# Fuzzy Name Match (Containment)
found_keys = []
if key[0] in by_cust_name_only:
found_keys.append(key[0])
for k in by_cust_name_only.keys():
if len(k) < 2: continue
if k == key[0]: continue
if k in key[0] or key[0] in k:
found_keys.append(k)
checked = 0
for k in found_keys:
partial = by_cust_name_only[k]
if checked > 5000: break
spn = key[1]
for o in partial:
opn = o["clean_pn"]
if opn and (spn.startswith(opn) or opn.startswith(spn)):
candidates.append(o)
checked += len(partial)
if candidates and earliest_sample:
valid_dates = [o["date"] for o in candidates if o["date"] >= earliest_sample]
if valid_dates:
is_converted = True
min_order_date = min(valid_dates)
if is_converted:
converted_count += 1
if earliest_sample and min_order_date:
diff = (min_order_date - earliest_sample).days
if diff >= 0: velocities.append(diff)
else:
if earliest_sample and earliest_sample < threshold90:
orphan_count += 1
high_qty_no_order_count = 0
for s in high_qty_samples:
s_date = s["date"]
if not s_date: continue
matches = find_matches_in_memory(s, lookups)
valid = [o for o in matches if o["date"] >= s_date]
if not valid:
high_qty_no_order_count += 1
avg_v = sum(velocities) / len(velocities) if velocities else 0
c_rate = (converted_count / len(unique_groups) * 100) if unique_groups else 0
return LabKPI(
converted_count=converted_count,
avg_velocity=round(avg_v, 1),
conversion_rate=round(c_rate, 1),
orphan_count=orphan_count,
no_dit_count=no_dit_count,
high_qty_no_order_count=high_qty_no_order_count
)
@router.get("/scatter", response_model=List[ScatterPoint])
def get_scatter(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
samples = fetch_samples_light(db, start_date, end_date)
orders = fetch_orders_light(db, start_date=start_date)
lookups = build_order_lookups(orders)
unique_groups = {}
for s in samples:
key = (s["norm_cust_name"], s["clean_pn"])
if key not in unique_groups:
unique_groups[key] = {
"display_cust": s["customer"],
"display_pn": s["pn"],
"sample_qty": 0,
"order_qty": 0,
"min_date": s["date"]
}
unique_groups[key]["sample_qty"] += s["qty"]
if s["date"] and (not unique_groups[key]["min_date"] or s["date"] < unique_groups[key]["min_date"]):
unique_groups[key]["min_date"] = s["date"]
for key, data in unique_groups.items():
min_date = data["min_date"]
candidates = []
(by_id, by_name, by_cust_name_only) = lookups
if key in by_name:
candidates.extend(by_name[key])
elif not candidates:
# Fuzzy Name Match
found_keys = []
if key[0] in by_cust_name_only:
found_keys.append(key[0])
for k in by_cust_name_only.keys():
if len(k) < 2: continue
if k == key[0]: continue
if k in key[0] or key[0] in k:
found_keys.append(k)
checked = 0
for k in found_keys:
partial = by_cust_name_only[k]
if checked > 5000: break
spn = key[1]
for o in partial:
opn = o["clean_pn"]
if opn and (spn.startswith(opn) or opn.startswith(spn)):
candidates.append(o)
checked += len(partial)
seen_orders = set()
matched_qty = 0
for o in candidates:
sig = (o["order_no"], o["qty"], o["date"])
if sig in seen_orders: continue
seen_orders.add(sig)
if min_date and o["date"] < min_date: continue
matched_qty += o["qty"]
data["order_qty"] = matched_qty
return [
ScatterPoint(
customer=v["display_cust"],
pn=v["display_pn"],
sample_qty=v["sample_qty"],
order_qty=v["order_qty"]
) for v in unique_groups.values()
]
@router.get("/orphans", response_model=List[OrphanSample])
def get_orphans(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
samples = fetch_samples_light(db, start_date, end_date)
orders = fetch_orders_light(db, start_date=start_date)
lookups = build_order_lookups(orders)
threshold_date = datetime.now() - timedelta(days=90)
orphan_groups = {}
for s in samples:
s_date = s["date"]
if not s_date or s_date >= threshold_date: continue
matches = find_matches_in_memory(s, lookups)
valid = [o for o in matches if o["date"] >= s_date]
if not valid:
key = (s["norm_cust_name"], s["clean_pn"], s["order_no"] or "", s_date)
if key not in orphan_groups:
orphan_groups[key] = {
"customer": s["customer"],
"pn": s["pn"],
"order_no": s["order_no"],
"date": s_date.strftime("%Y-%m-%d"),
"sample_qty": 0,
"days_since_sent": (datetime.now() - s_date).days
}
orphan_groups[key]["sample_qty"] += s["qty"]
return sorted(
[OrphanSample(**v) for v in orphan_groups.values()],
key=lambda x: x.days_since_sent,
reverse=True
)
@router.get("/no_dit_samples", response_model=List[NoDitSample])
def get_no_dit_samples(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
return fetch_no_dit_samples(db, start_date, end_date)
@router.get("/high_qty_no_order_samples", response_model=List[HighQtyNoOrderSample])
def get_high_qty_no_order(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
return fetch_high_qty_no_order_samples(db, start_date, end_date)