Files
SalesPipeline/backend/test_fuzzy_lookup.py
2026-01-27 19:08:46 +08:00

62 lines
1.9 KiB
Python

from app.routers.lab import fetch_orders_light, fetch_samples_light
from app.models import get_db
import time
db = next(get_db())
print("Fetching data...")
orders = fetch_orders_light(db, start_date="2025-01-01")
samples = fetch_samples_light(db, start_date="2025-01-01")
unique_order_custs = {}
for o in orders:
n = o['norm_cust_name']
if n not in unique_order_custs:
unique_order_custs[n] = []
unique_order_custs[n].append(o)
print(f"Total Orders: {len(orders)}")
print(f"Unique Order Customers: {len(unique_order_custs)}")
print(f"Total Samples: {len(samples)}")
start_time = time.time()
matches_found = 0
for s in samples:
s_name = s['norm_cust_name']
s_pn = s['clean_pn']
# Fuzzy Lookup
candidates = []
# Optimization: Only check if s_name is NOT in dict (Exact match handled elsewhere)
if s_name in unique_order_custs:
candidates.extend(unique_order_custs[s_name])
else:
# Fallback: Scan all keys
for o_name in unique_order_custs.keys():
if len(o_name) < 2: continue # Skip too short
# Check containment
if o_name in s_name or s_name in o_name:
candidates.extend(unique_order_custs[o_name])
# Filter by PN and Date
final_matches = []
s_date = s['date'] if s['date'] else None
for c in candidates:
if c['clean_pn'] == s_pn or (s_pn and c['clean_pn'] and s_pn.startswith(c['clean_pn'])):
if s_date and c['date'] and c['date'] >= s_date:
final_matches.append(c)
if final_matches:
matches_found += 1
print(f"Match: {s['customer']} -> {final_matches[0]['customer']}")
print(f"Dates: Sample {s_date} <= Order {final_matches[0]['date']}")
end_time = time.time()
print(f"Loop Match Time: {end_time - start_time:.4f}s")
print(f"Matches Found: {matches_found}")