This commit is contained in:
2026-01-16 18:16:33 +08:00
parent 9f3c96ce73
commit e53c3c838c
26 changed files with 1473 additions and 386 deletions

View File

@@ -14,15 +14,22 @@ from datetime import timedelta
COMPANY_SUFFIXES = [
'股份有限公司', '有限公司', '公司',
'株式会社', '株式會社',
'Co., Ltd.', 'Co.,Ltd.', 'Co. Ltd.', 'Co.Ltd.',
'Co., Ltd.', 'Co.,Ltd.', 'Co. Ltd.', 'Co.Ltd.', 'Co., Ltd', 'Co.,Ltd',
'Corporation', 'Corp.', 'Corp',
'Inc.', 'Inc',
'Limited', 'Ltd.', 'Ltd',
'Limited', 'Ltd.', 'Ltd', 'L.T.D.',
'LLC', 'L.L.C.',
]
def sanitize_pn(pn: str) -> str:
"""去除非字母數字字元並轉大寫 (PMSM-808-LL -> PMSM808LL)"""
"""去除非字母數字字元並轉大寫 (允許 - 與 _)"""
if not pn:
return ""
# 保留 - 和 _移除其他特殊符號
return re.sub(r'[^a-zA-Z0-9\-_]', '', str(pn)).upper()
def normalize_pn_for_matching(pn: str) -> str:
"""比對專用的正規化 (移除所有符號,只留英數)"""
if not pn:
return ""
return re.sub(r'[^a-zA-Z0-9]', '', str(pn)).upper()
@@ -34,10 +41,23 @@ def normalize_customer_name(name: str) -> str:
# 轉換為大寫
normalized = name.strip()
# 移除公司後綴
for suffix in COMPANY_SUFFIXES:
normalized = re.sub(re.escape(suffix), '', normalized, flags=re.IGNORECASE)
# Pre-clean: Remove common punctuation/separators to make suffix matching easier
# But be careful not to merge words incorrectly.
# 移除公司後綴 - iterate multiple times or use regex for robust matching
# Sort suffixes by length descending to match longest first
sorted_suffixes = sorted(COMPANY_SUFFIXES, key=len, reverse=True)
for suffix in sorted_suffixes:
# Use word boundary or simple end of string check
# Escape suffix for regex
pattern = re.compile(re.escape(suffix) + r'$', re.IGNORECASE)
normalized = pattern.sub('', normalized).strip()
# Also try matching with preceding comma/space
pattern_strict = re.compile(r'[,.\s]+' + re.escape(suffix) + r'$', re.IGNORECASE)
normalized = pattern_strict.sub('', normalized).strip()
# 移除括號及其內容
normalized = re.sub(r'\([^)]*\)', '', normalized)
@@ -45,9 +65,20 @@ def normalize_customer_name(name: str) -> str:
# 全形轉半形
normalized = normalized.replace(' ', ' ')
# 移除特殊結尾字符 that might remain (like "Co.,") if suffix list didn't catch it
# Remove trailing "Co." or "Co.,"
normalized = re.sub(r'[,.\s]+Co[.,]*$', '', normalized, flags=re.IGNORECASE)
# 移除多餘空白
normalized = re.sub(r'\s+', ' ', normalized).strip()
# Remove all punctuation for final key? No, fuzzy match might rely on it.
# But for "Key" based matching in Lab, we want strict alphabetic?
# No, keep it similar to before but cleaner.
# Final aggressive strip of trailing punctuation
normalized = normalized.strip("., ")
return normalized.upper()
@@ -103,7 +134,7 @@ class FuzzyMatcher:
# 1. 取得所有 DIT 記錄
dit_records = self.db.query(DitRecord).all()
# 2. 取得所有樣品和訂單記錄並按 PN 分組
# 2. 取得所有樣品和訂單記錄並按 PN (比對專用正規化) 分組
sample_records = self.db.query(SampleRecord).all()
order_records = self.db.query(OrderRecord).all()
@@ -111,9 +142,10 @@ class FuzzyMatcher:
samples_by_oppy = {}
for s in sample_records:
if s.pn:
if s.pn not in samples_by_pn:
samples_by_pn[s.pn] = []
samples_by_pn[s.pn].append(s)
norm_pn = normalize_pn_for_matching(s.pn)
if norm_pn not in samples_by_pn:
samples_by_pn[norm_pn] = []
samples_by_pn[norm_pn].append(s)
if s.oppy_no:
if s.oppy_no not in samples_by_oppy:
samples_by_oppy[s.oppy_no] = []
@@ -121,9 +153,11 @@ class FuzzyMatcher:
orders_by_pn = {}
for o in order_records:
if o.pn not in orders_by_pn:
orders_by_pn[o.pn] = []
orders_by_pn[o.pn].append(o)
if o.pn:
norm_pn = normalize_pn_for_matching(o.pn)
if norm_pn not in orders_by_pn:
orders_by_pn[norm_pn] = []
orders_by_pn[norm_pn].append(o)
# 3. 清除舊的比對結果
self.db.query(ReviewLog).delete()
@@ -136,13 +170,16 @@ class FuzzyMatcher:
for dit in dit_records:
dit_date = pd.to_datetime(dit.date, errors='coerce')
# 取得 DIT PN 的比對用正規化版本
dit_norm_pn = normalize_pn_for_matching(dit.pn)
# --- 比對樣品 (DIT -> Sample) ---
# 收集所有可能的樣品 (Priority 1: Oppy ID, Priority 2/3: PN)
potential_samples = []
if dit.op_id:
potential_samples.extend(samples_by_oppy.get(dit.op_id, []))
if dit.pn:
potential_samples.extend(samples_by_pn.get(dit.pn, []))
if dit_norm_pn:
potential_samples.extend(samples_by_pn.get(dit_norm_pn, []))
# 去重
seen_sample_ids = set()
@@ -172,8 +209,8 @@ class FuzzyMatcher:
score = 100.0
reason = "Golden Key Match"
# Priority 2 & 3 則限制在相同 PN
elif dit.pn == sample.pn:
# Priority 2 & 3 則限制在相同 PN (Ignored symbols)
elif dit_norm_pn == normalize_pn_for_matching(sample.pn):
# Priority 2: 客戶代碼比對 (Silver Key)
if dit.erp_account and sample.cust_id and dit.erp_account == sample.cust_id:
match_priority = 2
@@ -209,44 +246,45 @@ class FuzzyMatcher:
# --- 比對訂單 (DIT -> Order) ---
# 訂單比對通常基於 PN
for order in orders_by_pn.get(dit.pn, []):
match_priority = 0
match_source = ""
score = 0.0
reason = ""
if dit_norm_pn:
for order in orders_by_pn.get(dit_norm_pn, []):
match_priority = 0
match_source = ""
score = 0.0
reason = ""
# Priority 2: 客戶代碼比對 (Silver Key)
if dit.erp_account and order.cust_id and dit.erp_account == order.cust_id:
match_priority = 2
match_source = f"Matched via ERP Account: {dit.erp_account}"
score = 99.0
reason = "Silver Key Match"
# Priority 3: 名稱模糊比對 (Fallback)
else:
score, reason = calculate_similarity(dit.customer, order.customer)
if score >= MATCH_THRESHOLD_REVIEW:
match_priority = 3
match_source = f"Matched via Name Similarity ({reason})"
if match_priority > 0:
status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
match = MatchResult(
dit_id=dit.id,
target_type=TargetType.ORDER,
target_id=order.id,
score=score,
match_priority=match_priority,
match_source=match_source,
reason=reason,
status=status
)
self.db.add(match)
match_count += 1
if status == MatchStatus.auto_matched:
auto_matched += 1
# Priority 2: 客戶代碼比對 (Silver Key)
if dit.erp_account and order.cust_id and dit.erp_account == order.cust_id:
match_priority = 2
match_source = f"Matched via ERP Account: {dit.erp_account}"
score = 99.0
reason = "Silver Key Match"
# Priority 3: 名稱模糊比對 (Fallback)
else:
pending_review += 1
score, reason = calculate_similarity(dit.customer, order.customer)
if score >= MATCH_THRESHOLD_REVIEW:
match_priority = 3
match_source = f"Matched via Name Similarity ({reason})"
if match_priority > 0:
status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
match = MatchResult(
dit_id=dit.id,
target_type=TargetType.ORDER,
target_id=order.id,
score=score,
match_priority=match_priority,
match_source=match_source,
reason=reason,
status=status
)
self.db.add(match)
match_count += 1
if status == MatchStatus.auto_matched:
auto_matched += 1
else:
pending_review += 1
self.db.commit()