from typing import List, Optional from datetime import datetime, timedelta from fastapi import APIRouter, Depends, Query from sqlalchemy.orm import Session from sqlalchemy import func, and_ from pydantic import BaseModel from app.models import get_db from app.models.sample import SampleRecord from app.models.order import OrderRecord router = APIRouter(prefix="/lab", tags=["Lab"]) class LabKPI(BaseModel): converted_count: int # 成功收單總數 avg_velocity: float # 平均轉換時間 (天) conversion_rate: float # 轉換比例 (%) orphan_count: int # 孤兒樣品總數 class ConversionRecord(BaseModel): customer: str pn: str sample_date: str sample_qty: int order_date: str order_qty: int days_to_convert: int # ... (ScatterPoint and OrphanSample classes remain same) class ScatterPoint(BaseModel): customer: str pn: str sample_qty: int order_qty: int class OrphanSample(BaseModel): customer: str pn: str days_since_sent: int order_no: str date: str # ... (parse_date function remains same) # Helper to build order lookups from app.services.fuzzy_matcher import normalize_pn_for_matching, normalize_customer_name def build_order_lookups(orders): order_lookup_by_id = {} order_lookup_by_name = {} for o in orders: clean_pn = normalize_pn_for_matching(o.pn) clean_cust_id = o.cust_id.strip().upper() if o.cust_id else "" norm_cust_name = normalize_customer_name(o.customer) o_date = parse_date(o.date) or (o.created_at.replace(tzinfo=None) if o.created_at else datetime.max) data = { "date": o_date, "qty": o.qty or 0, "order_no": o.order_no } if clean_cust_id: key_id = (clean_cust_id, clean_pn) if key_id not in order_lookup_by_id: order_lookup_by_id[key_id] = [] order_lookup_by_id[key_id].append(data) key_name = (norm_cust_name, clean_pn) if key_name not in order_lookup_by_name: order_lookup_by_name[key_name] = [] order_lookup_by_name[key_name].append(data) return order_lookup_by_id, order_lookup_by_name @router.get("/conversions", response_model=List[ConversionRecord]) def get_conversions(db: Session = Depends(get_db)): # 找出所有樣品 samples = db.query(SampleRecord).all() # 找出所有訂單 orders = db.query(OrderRecord).all() order_lookup_by_id, order_lookup_by_name = build_order_lookups(orders) conversions = [] # We want to list "Sample Records" that successfully converted. # Or "Groups"? The user said "list of sample sent and their order qty". # Listing each sample record seems appropriate. for s in samples: clean_pn = normalize_pn_for_matching(s.pn) norm_cust_name = normalize_customer_name(s.customer) clean_cust_id = s.cust_id.strip().upper() if s.cust_id else "" s_date = parse_date(s.date) matched_orders = [] # 1. Try via ID if clean_cust_id: if (clean_cust_id, clean_pn) in order_lookup_by_id: matched_orders.extend(order_lookup_by_id[(clean_cust_id, clean_pn)]) # 2. Try via Name (Fallback) if not matched_orders: if (norm_cust_name, clean_pn) in order_lookup_by_name: matched_orders.extend(order_lookup_by_name[(norm_cust_name, clean_pn)]) if matched_orders and s_date: # Sort orders by date matched_orders.sort(key=lambda x: x["date"]) first_order = matched_orders[0] # Simple aggregations if multiple orders? User asked for "their order qty". # showing total order qty for this PN/Cust might be better total_order_qty = sum(o["qty"] for o in matched_orders) days_diff = (first_order["date"] - s_date).days # Filter unrealistic past orders? # if days_diff < 0: continue # Optional conversions.append(ConversionRecord( customer=s.customer, pn=s.pn, sample_date=s.date, sample_qty=s.qty or 0, order_date=first_order["date"].strftime("%Y-%m-%d"), # First order date order_qty=total_order_qty, days_to_convert=days_diff )) # Sort by recent sample date return sorted(conversions, key=lambda x: x.sample_date, reverse=True) def parse_date(date_str: str) -> Optional[datetime]: if not date_str: return None val = str(date_str).strip() # Try parsing YYYYMMDD if len(val) == 8 and val.isdigit(): try: return datetime.strptime(val, "%Y%m%d") except ValueError: pass for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S", "%d-%b-%y"): try: return datetime.strptime(str(date_str).split(' ')[0], fmt.split(' ')[0]) except ValueError: continue return None @router.get("/kpi", response_model=LabKPI) def get_lab_kpi( start_date: Optional[str] = Query(None), end_date: Optional[str] = Query(None), db: Session = Depends(get_db) ): # 1. 取得所有樣品與訂單 samples_query = db.query(SampleRecord) orders_query = db.query(OrderRecord) if start_date: samples_query = samples_query.filter(SampleRecord.date >= start_date) orders_query = orders_query.filter(OrderRecord.date >= start_date) if end_date: samples_query = samples_query.filter(SampleRecord.date <= end_date) orders_query = orders_query.filter(OrderRecord.date <= end_date) samples = samples_query.all() orders = orders_query.all() # 建立群組 (ERP Code + PN) # ERP Code correspond to cust_id from app.services.fuzzy_matcher import normalize_pn_for_matching sample_groups = {} for s in samples: # Use simple normalization like stripping spaces clean_pn = normalize_pn_for_matching(s.pn) clean_cust = s.cust_id.strip().upper() if s.cust_id else "" key = (clean_cust, clean_pn) if key not in sample_groups: sample_groups[key] = [] sample_groups[key].append(s) order_groups = {} for o in orders: clean_pn = normalize_pn_for_matching(o.pn) clean_cust = o.cust_id.strip().upper() if o.cust_id else "" key = (clean_cust, clean_pn) if key not in order_groups: order_groups[key] = [] order_groups[key].append(o) # 計算 Velocity 與 轉換率 velocities = [] converted_samples_count = 0 total_samples_count = len(samples) # Re-use the lookup maps built above if possible, but we need to build them first. # Let's rebuild lookups here for clarity or refactor. # To be safe and clean, let's just implement the loop here. from app.services.fuzzy_matcher import normalize_pn_for_matching, normalize_customer_name order_lookup_by_id = {} order_lookup_by_name = {} for o in orders: clean_pn = normalize_pn_for_matching(o.pn) clean_cust_id = o.cust_id.strip().upper() if o.cust_id else "" norm_cust_name = normalize_customer_name(o.customer) o_date = parse_date(o.date) or (o.created_at.replace(tzinfo=None) if o.created_at else datetime.max) if clean_cust_id: key_id = (clean_cust_id, clean_pn) if key_id not in order_lookup_by_id: order_lookup_by_id[key_id] = [] order_lookup_by_id[key_id].append(o_date) key_name = (norm_cust_name, clean_pn) if key_name not in order_lookup_by_name: order_lookup_by_name[key_name] = [] order_lookup_by_name[key_name].append(o_date) # Group Samples by (CustName, PN) for calculation to avoid double counting if multiple samples -> same order # Actually, "Conversion Rate" is usually "Percentage of Sample Records that resulted in Order". # Or "Percentage of Projects". Let's stick to "Sample Groups" (Unique trials). unique_sample_groups = {} # (norm_cust_name, clean_pn) -> list of sample dates for s in samples: clean_pn = normalize_pn_for_matching(s.pn) norm_cust_name = normalize_customer_name(s.customer) clean_cust_id = s.cust_id.strip().upper() if s.cust_id else "" key = (norm_cust_name, clean_pn) # Group by Name+PN if key not in unique_sample_groups: unique_sample_groups[key] = { "dates": [], "cust_ids": set() } s_date = parse_date(s.date) if s_date: unique_sample_groups[key]["dates"].append(s_date) if clean_cust_id: unique_sample_groups[key]["cust_ids"].add(clean_cust_id) # Calculate total_samples_count = len(unique_sample_groups) # Total "Projects" converted_count = 0 orphan_count = 0 now = datetime.now() for key, data in unique_sample_groups.items(): norm_cust_name, clean_pn = key # Try finding orders matched_dates = [] # 1. Try via ID for cid in data["cust_ids"]: if (cid, clean_pn) in order_lookup_by_id: matched_dates.extend(order_lookup_by_id[(cid, clean_pn)]) # 2. Try via Name if not matched_dates: if key in order_lookup_by_name: matched_dates.extend(order_lookup_by_name[key]) if matched_dates: converted_count += 1 # Velocity earliest_sample = min(data["dates"]) if data["dates"] else None # Filter orders that came AFTER sample? Or just first order? # Typically first order date. first_order = min(matched_dates) if matched_dates else None if earliest_sample and first_order: diff = (first_order - earliest_sample).days if diff >= 0: velocities.append(diff) else: # Check Orphan (No Order) # Use earliest sample date earliest_sample = min(data["dates"]) if data["dates"] else None if earliest_sample and (now - earliest_sample).days > 90: orphan_count += 1 avg_velocity = sum(velocities) / len(velocities) if velocities else 0 conversion_rate = (converted_count / total_samples_count * 100) if total_samples_count > 0 else 0 return LabKPI( converted_count=converted_count, avg_velocity=round(avg_velocity, 1), conversion_rate=round(conversion_rate, 1), orphan_count=orphan_count ) @router.get("/scatter", response_model=List[ScatterPoint]) def get_scatter_data( start_date: Optional[str] = Query(None), end_date: Optional[str] = Query(None), db: Session = Depends(get_db) ): samples_query = db.query(SampleRecord) orders_query = db.query(OrderRecord) if start_date: samples_query = samples_query.filter(SampleRecord.date >= start_date) if end_date: samples_query = samples_query.filter(SampleRecord.date <= end_date) samples = samples_query.all() orders = orders_query.all() # 聚合資料 from app.services.fuzzy_matcher import normalize_pn_for_matching, normalize_customer_name # 建立多重索引的 Order Lookup # order_lookup_by_id: (cust_id, pn) -> Order Data # order_lookup_by_name: (cust_name, pn) -> Order Data order_lookup_by_id = {} order_lookup_by_name = {} for o in orders: clean_pn = normalize_pn_for_matching(o.pn) clean_cust_id = o.cust_id.strip().upper() if o.cust_id else "" norm_cust_name = normalize_customer_name(o.customer) # Aggregate by Cust ID if clean_cust_id: key_id = (clean_cust_id, clean_pn) if key_id not in order_lookup_by_id: order_lookup_by_id[key_id] = {"qty": 0, "dates": []} order_lookup_by_id[key_id]["qty"] += (o.qty or 0) if o.date: order_lookup_by_id[key_id]["dates"].append(parse_date(o.date) or datetime.max) elif o.created_at: order_lookup_by_id[key_id]["dates"].append(o.created_at.replace(tzinfo=None)) # Aggregate by Cust Name (Fallback) key_name = (norm_cust_name, clean_pn) if key_name not in order_lookup_by_name: order_lookup_by_name[key_name] = {"qty": 0, "dates": []} order_lookup_by_name[key_name]["qty"] += (o.qty or 0) if o.date: order_lookup_by_name[key_name]["dates"].append(parse_date(o.date) or datetime.max) elif o.created_at: order_lookup_by_name[key_name]["dates"].append(o.created_at.replace(tzinfo=None)) final_data_map = {} # Key (Display Customer, Original PN) -> Data for s in samples: clean_pn = normalize_pn_for_matching(s.pn) clean_cust_id = s.cust_id.strip().upper() if s.cust_id else "" norm_cust_name = normalize_customer_name(s.customer) # 嘗試比對 Order matched_order = None # 1. Try Cust ID match if clean_cust_id: matched_order = order_lookup_by_id.get((clean_cust_id, clean_pn)) # 2. If no match, Try Cust Name match if not matched_order: matched_order = order_lookup_by_name.get((norm_cust_name, clean_pn)) # Render Key using Sample's info display_key = (s.customer, s.pn) if display_key not in final_data_map: final_data_map[display_key] = {"sample_qty": 0, "order_qty": 0, "customer": s.customer, "orignal_pn": s.pn} final_data_map[display_key]["sample_qty"] += (s.qty or 0) if matched_order: # 注意:這裡簡單累加可能會導致重複計算如果多個樣品對應同一個訂單聚合 # 但目前邏輯是以「樣品」為基底看轉換,所以我們顯示該樣品對應到的訂單總量是合理的 # 不過為了 scatter plot 的準確性,我們應該只在第一次遇到這個 key 時加上 order qty? # 或者,Scatter Plot 的點是 (Customer, PN),所以我們應該是把這個 Group 的 Sample Qty 和 Order Qty 放在一起。 # Order Qty 已經在 lookup 裡聚合過了。 pass # Re-construct the final map properly merging Order Data # 上面的迴圈有點問題,因為我們是依據 Sample 來建立點,但 Order 總量是固定的。 # 正確做法:以 (Customer, PN) 為 Unique Key。 unique_groups = {} # (norm_cust_name, clean_pn) -> {display_cust, display_pn, sample_qty, order_qty} for s in samples: clean_pn = normalize_pn_for_matching(s.pn) norm_cust_name = normalize_customer_name(s.customer) key = (norm_cust_name, clean_pn) if key not in unique_groups: unique_groups[key] = { "display_cust": s.customer, "display_pn": s.pn, "sample_qty": 0, "order_qty": 0, "matched": False } unique_groups[key]["sample_qty"] += (s.qty or 0) # Fill in Order Qty for key, data in unique_groups.items(): norm_cust_name, clean_pn = key # Try finding orders # Note: We rely on Name match here primarily since we grouped by Name. # Ideally we should also check CustID if available on the samples in this group, but grouping by Name is safer for visual scatter plot. matched_order = order_lookup_by_name.get((norm_cust_name, clean_pn)) # If no name match, maybe check if any sample in this group had a CustId that matches? # For simplicity, let's stick to Name+PN for the Scatter Plot aggregation if matched_order: data["order_qty"] = matched_order["qty"] data["matched"] = True data_map = unique_groups # Replace old data_map logic # 如果有訂單但沒樣品,我們在 ROI 分析中可能不顯示,或者顯示在 Y 軸上 X=0。 # 根據需求:分析「樣品寄送」與「訂單接收」的關聯,通常以有送樣的為基底。 return [ ScatterPoint( customer=v["display_cust"], pn=v["display_pn"], sample_qty=v["sample_qty"], order_qty=v["order_qty"] ) for key, v in data_map.items() ] @router.get("/orphans", response_model=List[OrphanSample]) def get_orphans(db: Session = Depends(get_db)): now = datetime.now() threshold_date = now - timedelta(days=90) # 找出所有樣品 samples = db.query(SampleRecord).all() # 找出所有訂單 orders = db.query(OrderRecord).all() # Build Order Lookups (ID and Name) from app.services.fuzzy_matcher import normalize_pn_for_matching, normalize_customer_name order_keys_id = set() order_keys_name = set() for o in orders: clean_pn = normalize_pn_for_matching(o.pn) clean_cust_id = o.cust_id.strip().upper() if o.cust_id else "" norm_cust_name = normalize_customer_name(o.customer) if clean_cust_id: order_keys_id.add((clean_cust_id, clean_pn)) order_keys_name.add((norm_cust_name, clean_pn)) orphans = [] for s in samples: clean_pn = normalize_pn_for_matching(s.pn) norm_cust_name = normalize_customer_name(s.customer) clean_cust_id = s.cust_id.strip().upper() if s.cust_id else "" s_date = parse_date(s.date) # Check match matched = False if clean_cust_id: if (clean_cust_id, clean_pn) in order_keys_id: matched = True if not matched: if (norm_cust_name, clean_pn) in order_keys_name: matched = True if not matched: if s_date and s_date < threshold_date: orphans.append(OrphanSample( customer=s.customer, pn=s.pn, days_since_sent=(now - s_date).days, order_no=s.order_no, date=s.date )) return sorted(orphans, key=lambda x: x.days_since_sent, reverse=True)