first commit

2026-01-09 19:14:41 +08:00
commit 9f3c96ce73
67 changed files with 9636 additions and 0 deletions
--- a/backend/app/services/fuzzy_matcher.py
+++ b/backend/app/services/fuzzy_matcher.py
@@ -0,0 +1,277 @@
+import re
+from typing import List, Tuple, Optional
+from rapidfuzz import fuzz, process
+from sqlalchemy.orm import Session
+from app.config import MATCH_THRESHOLD_AUTO, MATCH_THRESHOLD_REVIEW
+from app.models.dit import DitRecord
+from app.models.sample import SampleRecord
+from app.models.order import OrderRecord
+from app.models.match import MatchResult, MatchStatus, TargetType, ReviewLog
+import pandas as pd
+from datetime import timedelta
+
+# 公司後綴清單（用於正規化）
+COMPANY_SUFFIXES = [
+    '股份有限公司', '有限公司', '公司',
+    '株式会社', '株式會社',
+    'Co., Ltd.', 'Co.,Ltd.', 'Co. Ltd.', 'Co.Ltd.',
+    'Corporation', 'Corp.', 'Corp',
+    'Inc.', 'Inc',
+    'Limited', 'Ltd.', 'Ltd',
+    'LLC', 'L.L.C.',
+]
+
+def sanitize_pn(pn: str) -> str:
+    """去除非字母數字字元並轉大寫 (PMSM-808-LL -> PMSM808LL)"""
+    if not pn:
+        return ""
+    return re.sub(r'[^a-zA-Z0-9]', '', str(pn)).upper()
+
+def normalize_customer_name(name: str) -> str:
+    """正規化客戶名稱 (轉大寫)"""
+    if not name:
+        return ""
+
+    # 轉換為大寫
+    normalized = name.strip()
+
+    # 移除公司後綴
+    for suffix in COMPANY_SUFFIXES:
+        normalized = re.sub(re.escape(suffix), '', normalized, flags=re.IGNORECASE)
+
+    # 移除括號及其內容
+    normalized = re.sub(r'\([^)]*\)', '', normalized)
+    normalized = re.sub(r'（[^）]*）', '', normalized)
+
+    # 全形轉半形
+    normalized = normalized.replace('　', ' ')
+
+    # 移除多餘空白
+    normalized = re.sub(r'\s+', ' ', normalized).strip()
+
+    return normalized.upper()
+
+def calculate_similarity(name1: str, name2: str) -> Tuple[float, str]:
+    """計算兩個名稱的相似度"""
+    # 正規化
+    norm1 = normalize_customer_name(name1)
+    norm2 = normalize_customer_name(name2)
+
+    if not norm1 or not norm2:
+        return 0.0, "Empty name"
+
+    # 完全匹配
+    if norm1 == norm2:
+        return 100.0, "Exact Match"
+
+    # 使用多種比對方法
+    ratio = fuzz.ratio(norm1, norm2)
+    partial_ratio = fuzz.partial_ratio(norm1, norm2)
+    token_sort_ratio = fuzz.token_sort_ratio(norm1, norm2)
+    token_set_ratio = fuzz.token_set_ratio(norm1, norm2)
+
+    # 取最高分
+    best_score = max(ratio, partial_ratio, token_sort_ratio, token_set_ratio)
+
+    # 決定原因
+    if ratio == best_score:
+        reason = "Character Similarity"
+    elif partial_ratio == best_score:
+        reason = "Partial Match"
+    elif token_sort_ratio == best_score:
+        reason = "Token Order Match"
+    else:
+        reason = "Token Set Match"
+
+    # 檢查是否為後綴差異
+    if best_score >= 80:
+        for suffix in COMPANY_SUFFIXES[:3]:  # 只檢查常見後綴
+            if (suffix in name1 and suffix not in name2) or \
+               (suffix not in name1 and suffix in name2):
+                reason = "Corporate Suffix Mismatch"
+                break
+
+    return best_score, reason
+
+class FuzzyMatcher:
+    def __init__(self, db: Session):
+        self.db = db
+
+    def run_matching(self) -> dict:
+        """執行瀑布式模糊比對 (Waterfall Matching)"""
+
+        # 1. 取得所有 DIT 記錄
+        dit_records = self.db.query(DitRecord).all()
+
+        # 2. 取得所有樣品和訂單記錄並按 PN 分組
+        sample_records = self.db.query(SampleRecord).all()
+        order_records = self.db.query(OrderRecord).all()
+
+        samples_by_pn = {}
+        samples_by_oppy = {}
+        for s in sample_records:
+            if s.pn:
+                if s.pn not in samples_by_pn:
+                    samples_by_pn[s.pn] = []
+                samples_by_pn[s.pn].append(s)
+            if s.oppy_no:
+                if s.oppy_no not in samples_by_oppy:
+                    samples_by_oppy[s.oppy_no] = []
+                samples_by_oppy[s.oppy_no].append(s)
+
+        orders_by_pn = {}
+        for o in order_records:
+            if o.pn not in orders_by_pn:
+                orders_by_pn[o.pn] = []
+            orders_by_pn[o.pn].append(o)
+
+        # 3. 清除舊的比對結果
+        self.db.query(ReviewLog).delete()
+        self.db.query(MatchResult).delete()
+
+        match_count = 0
+        auto_matched = 0
+        pending_review = 0
+
+        for dit in dit_records:
+            dit_date = pd.to_datetime(dit.date, errors='coerce')
+            
+            # --- 比對樣品 (DIT -> Sample) ---
+            # 收集所有可能的樣品 (Priority 1: Oppy ID, Priority 2/3: PN)
+            potential_samples = []
+            if dit.op_id:
+                potential_samples.extend(samples_by_oppy.get(dit.op_id, []))
+            if dit.pn:
+                potential_samples.extend(samples_by_pn.get(dit.pn, []))
+            
+            # 去重
+            seen_sample_ids = set()
+            unique_potential_samples = []
+            for s in potential_samples:
+                if s.id not in seen_sample_ids:
+                    seen_sample_ids.add(s.id)
+                    unique_potential_samples.append(s)
+
+            for sample in unique_potential_samples:
+                sample_date = pd.to_datetime(sample.date, errors='coerce')
+                
+                # 時間窗檢查: Sample Date 必須在 DIT Date 的 前 30 天 至 今日 之間
+                if pd.notna(dit_date) and pd.notna(sample_date):
+                    if sample_date < (dit_date - timedelta(days=30)):
+                        continue
+
+                match_priority = 0
+                match_source = ""
+                score = 0.0
+                reason = ""
+
+                # Priority 1: 案號精準比對 (Golden Key)
+                if dit.op_id and sample.oppy_no and dit.op_id == sample.oppy_no:
+                    match_priority = 1
+                    match_source = f"Matched via Opportunity ID: {dit.op_id}"
+                    score = 100.0
+                    reason = "Golden Key Match"
+                
+                # Priority 2 & 3 則限制在相同 PN
+                elif dit.pn == sample.pn:
+                    # Priority 2: 客戶代碼比對 (Silver Key)
+                    if dit.erp_account and sample.cust_id and dit.erp_account == sample.cust_id:
+                        match_priority = 2
+                        match_source = f"Matched via ERP Account: {dit.erp_account}"
+                        score = 99.0
+                        reason = "Silver Key Match"
+                    
+                    # Priority 3: 名稱模糊比對 (Fallback)
+                    else:
+                        score, reason = calculate_similarity(dit.customer, sample.customer)
+                        if score >= MATCH_THRESHOLD_REVIEW:
+                            match_priority = 3
+                            match_source = f"Matched via Name Similarity ({reason})"
+                
+                if match_priority > 0:
+                    status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
+                    match = MatchResult(
+                        dit_id=dit.id,
+                        target_type=TargetType.SAMPLE,
+                        target_id=sample.id,
+                        score=score,
+                        match_priority=match_priority,
+                        match_source=match_source,
+                        reason=reason,
+                        status=status
+                    )
+                    self.db.add(match)
+                    match_count += 1
+                    if status == MatchStatus.auto_matched:
+                        auto_matched += 1
+                    else:
+                        pending_review += 1
+
+            # --- 比對訂單 (DIT -> Order) ---
+            # 訂單比對通常基於 PN
+            for order in orders_by_pn.get(dit.pn, []):
+                match_priority = 0
+                match_source = ""
+                score = 0.0
+                reason = ""
+
+                # Priority 2: 客戶代碼比對 (Silver Key)
+                if dit.erp_account and order.cust_id and dit.erp_account == order.cust_id:
+                    match_priority = 2
+                    match_source = f"Matched via ERP Account: {dit.erp_account}"
+                    score = 99.0
+                    reason = "Silver Key Match"
+                
+                # Priority 3: 名稱模糊比對 (Fallback)
+                else:
+                    score, reason = calculate_similarity(dit.customer, order.customer)
+                    if score >= MATCH_THRESHOLD_REVIEW:
+                        match_priority = 3
+                        match_source = f"Matched via Name Similarity ({reason})"
+
+                if match_priority > 0:
+                    status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
+                    match = MatchResult(
+                        dit_id=dit.id,
+                        target_type=TargetType.ORDER,
+                        target_id=order.id,
+                        score=score,
+                        match_priority=match_priority,
+                        match_source=match_source,
+                        reason=reason,
+                        status=status
+                    )
+                    self.db.add(match)
+                    match_count += 1
+                    if status == MatchStatus.auto_matched:
+                        auto_matched += 1
+                    else:
+                        pending_review += 1
+
+        self.db.commit()
+
+        return {
+            'match_count': match_count,
+            'auto_matched': auto_matched,
+            'pending_review': pending_review
+        }
+
+    def get_pending_reviews(self) -> List[MatchResult]:
+        """取得待審核的比對結果"""
+        return self.db.query(MatchResult).filter(
+            MatchResult.status == MatchStatus.pending
+        ).all()
+
+    def review_match(self, match_id: int, action: str) -> Optional[MatchResult]:
+        """審核比對結果"""
+        match = self.db.query(MatchResult).filter(MatchResult.id == match_id).first()
+        if not match:
+            return None
+
+        if action == 'accept':
+            match.status = MatchStatus.accepted
+        elif action == 'reject':
+            match.status = MatchStatus.rejected
+
+        self.db.commit()
+        return match