20160116

2026-01-16 18:16:33 +08:00
parent 9f3c96ce73
commit e53c3c838c
26 changed files with 1473 additions and 386 deletions
--- a/backend/app/services/excel_parser.py
+++ b/backend/app/services/excel_parser.py
@@ -16,6 +16,8 @@ def clean_value(val):
    if isinstance(val, float):
        if math.isnan(val) or math.isinf(val):
            return None
+    if isinstance(val, str):
+        val = val.lstrip("'")  # Remove leading apostrophe often added by Excel
    return val


@@ -31,7 +33,8 @@ def clean_records(records: List[Dict]) -> List[Dict]:
 # 欄位名稱對應表
 COLUMN_MAPPING = {
    'dit': {
-        'op_id': ['opportunity name', 'opportunity no', 'opportunity', 'op編號', 'op 編號', 'op_id', 'opid', '案件編號', '案號', 'opportunity id'],
+        'op_id': ['opportunity no', 'opportunity', 'op編號', 'op 編號', 'op_id', 'opid', '案件編號', '案號', 'opportunity id'],
+        'op_name': ['opportunity name', '專案名稱', '案件名稱'],
        'erp_account': ['erp account', 'account no', 'erp account no', '客戶代碼', '客戶編號', 'erp_account'],
        'customer': ['account name', 'branding customer', '客戶', '客戶名稱', 'customer', 'customer name', '公司名稱'],
        'pn': ['product name', '料號', 'part number', 'pn', 'part no', 'part_number', '產品料號', 'stage/part'],
@@ -47,17 +50,18 @@ COLUMN_MAPPING = {
        'customer': ['客戶名稱', '客戶簡稱', '客戶', 'customer', 'customer name'],
        'pn': ['item', 'type', '料號', 'part number', 'pn', 'part no', '產品料號', '索樣數量'],
        'qty': ['索樣數量pcs', '索樣數量 k', '數量', 'qty', 'quantity', '申請數量'],
-        'date': ['需求日', '日期', 'date', '申請日期']
+        'date': ['出貨日', '需求日', '日期', 'date', '申請日期']
    },
    'order': {
        'order_id': ['項次', '訂單編號', 'order_id', 'order id'],
        'order_no': ['訂單單號', '訂單號', 'order_no', 'order no', '銷貨單號'],
-        'cust_id': ['客戶編號', '客戶代碼', '客戶代號', 'cust_id', 'cust id'],
+        'cust_id': ['客戶編號', '客戶代碼', '客戶代號', 'cust_id', 'cust id', 'erp code', 'erp_code', 'erpcode', 'erp'],
        'customer': ['客戶', '客戶名稱', 'customer', 'customer name'],
-        'pn': ['type', '內部料號', '料號', 'part number', 'pn', 'part no', '產品料號'],
+        'pn': ['內部料號', '料號', 'part number', 'pn', 'part no', '產品料號', 'type'],
        'qty': ['訂單量', '數量', 'qty', 'quantity', '訂購數量', '出貨數量'],
        'status': ['狀態', 'status', '訂單狀態'],
-        'amount': ['原幣金額(含稅)', '台幣金額(未稅)', '金額', 'amount', 'total', '訂單金額']
+        'amount': ['原幣金額(含稅)', '台幣金額(未稅)', '金額', 'amount', 'total', '訂單金額'],
+        'date': ['訂單日期', '日期', 'date', 'order date', 'order_date']
    }
 }

@@ -101,10 +105,12 @@ class ExcelParser:
                for idx, col in enumerate(df_columns):
                    if variant_lower in col or col in variant_lower:
                        mapping[df.columns[idx]] = standard_name
+                        print(f"[DEBUG] Mapped '{df.columns[idx]}' to '{standard_name}' (matched '{variant}')")
                        break
                if standard_name in mapping.values():
                    break
-
+        
+        print(f"[DEBUG] Final Mapping for {file_type}: {mapping}")
        return mapping

    def parse_file(self, file_path: Path, file_type: str) -> Tuple[str, Dict[str, Any]]:
--- a/backend/app/services/fuzzy_matcher.py
+++ b/backend/app/services/fuzzy_matcher.py
@@ -14,15 +14,22 @@ from datetime import timedelta
 COMPANY_SUFFIXES = [
    '股份有限公司', '有限公司', '公司',
    '株式会社', '株式會社',
-    'Co., Ltd.', 'Co.,Ltd.', 'Co. Ltd.', 'Co.Ltd.',
+    'Co., Ltd.', 'Co.,Ltd.', 'Co. Ltd.', 'Co.Ltd.', 'Co., Ltd', 'Co.,Ltd',
    'Corporation', 'Corp.', 'Corp',
    'Inc.', 'Inc',
-    'Limited', 'Ltd.', 'Ltd',
+    'Limited', 'Ltd.', 'Ltd', 'L.T.D.',
    'LLC', 'L.L.C.',
 ]

 def sanitize_pn(pn: str) -> str:
-    """去除非字母數字字元並轉大寫 (PMSM-808-LL -> PMSM808LL)"""
+    """去除非字母數字字元並轉大寫 (允許 - 與 _)"""
+    if not pn:
+        return ""
+    # 保留 - 和 _，移除其他特殊符號
+    return re.sub(r'[^a-zA-Z0-9\-_]', '', str(pn)).upper()
+
+def normalize_pn_for_matching(pn: str) -> str:
+    """比對專用的正規化 (移除所有符號，只留英數)"""
    if not pn:
        return ""
    return re.sub(r'[^a-zA-Z0-9]', '', str(pn)).upper()
@@ -34,10 +41,23 @@ def normalize_customer_name(name: str) -> str:

    # 轉換為大寫
    normalized = name.strip()
-
-    # 移除公司後綴
-    for suffix in COMPANY_SUFFIXES:
-        normalized = re.sub(re.escape(suffix), '', normalized, flags=re.IGNORECASE)
+    
+    # Pre-clean: Remove common punctuation/separators to make suffix matching easier
+    # But be careful not to merge words incorrectly.
+    
+    # 移除公司後綴 - iterate multiple times or use regex for robust matching
+    # Sort suffixes by length descending to match longest first
+    sorted_suffixes = sorted(COMPANY_SUFFIXES, key=len, reverse=True)
+    
+    for suffix in sorted_suffixes:
+        # Use word boundary or simple end of string check
+        # Escape suffix for regex
+        pattern = re.compile(re.escape(suffix) + r'$', re.IGNORECASE)
+        normalized = pattern.sub('', normalized).strip()
+        
+        # Also try matching with preceding comma/space
+        pattern_strict = re.compile(r'[,.\s]+' + re.escape(suffix) + r'$', re.IGNORECASE)
+        normalized = pattern_strict.sub('', normalized).strip()

    # 移除括號及其內容
    normalized = re.sub(r'\([^)]*\)', '', normalized)
@@ -45,9 +65,20 @@ def normalize_customer_name(name: str) -> str:

    # 全形轉半形
    normalized = normalized.replace('　', ' ')
+    
+    # 移除特殊結尾字符 that might remain (like "Co.,") if suffix list didn't catch it
+    # Remove trailing "Co." or "Co.,"
+    normalized = re.sub(r'[,.\s]+Co[.,]*$', '', normalized, flags=re.IGNORECASE)

    # 移除多餘空白
    normalized = re.sub(r'\s+', ' ', normalized).strip()
+    
+    # Remove all punctuation for final key? No, fuzzy match might rely on it. 
+    # But for "Key" based matching in Lab, we want strict alphabetic?
+    # No, keep it similar to before but cleaner.
+    
+    # Final aggressive strip of trailing punctuation
+    normalized = normalized.strip("., ")

    return normalized.upper()

@@ -103,7 +134,7 @@ class FuzzyMatcher:
        # 1. 取得所有 DIT 記錄
        dit_records = self.db.query(DitRecord).all()

-        # 2. 取得所有樣品和訂單記錄並按 PN 分組
+        # 2. 取得所有樣品和訂單記錄並按 PN (比對專用正規化) 分組
        sample_records = self.db.query(SampleRecord).all()
        order_records = self.db.query(OrderRecord).all()

@@ -111,9 +142,10 @@ class FuzzyMatcher:
        samples_by_oppy = {}
        for s in sample_records:
            if s.pn:
-                if s.pn not in samples_by_pn:
-                    samples_by_pn[s.pn] = []
-                samples_by_pn[s.pn].append(s)
+                norm_pn = normalize_pn_for_matching(s.pn)
+                if norm_pn not in samples_by_pn:
+                    samples_by_pn[norm_pn] = []
+                samples_by_pn[norm_pn].append(s)
            if s.oppy_no:
                if s.oppy_no not in samples_by_oppy:
                    samples_by_oppy[s.oppy_no] = []
@@ -121,9 +153,11 @@ class FuzzyMatcher:

        orders_by_pn = {}
        for o in order_records:
-            if o.pn not in orders_by_pn:
-                orders_by_pn[o.pn] = []
-            orders_by_pn[o.pn].append(o)
+            if o.pn:
+                norm_pn = normalize_pn_for_matching(o.pn)
+                if norm_pn not in orders_by_pn:
+                    orders_by_pn[norm_pn] = []
+                orders_by_pn[norm_pn].append(o)

        # 3. 清除舊的比對結果
        self.db.query(ReviewLog).delete()
@@ -136,13 +170,16 @@ class FuzzyMatcher:
        for dit in dit_records:
            dit_date = pd.to_datetime(dit.date, errors='coerce')
            
+            # 取得 DIT PN 的比對用正規化版本
+            dit_norm_pn = normalize_pn_for_matching(dit.pn)
+
            # --- 比對樣品 (DIT -> Sample) ---
            # 收集所有可能的樣品 (Priority 1: Oppy ID, Priority 2/3: PN)
            potential_samples = []
            if dit.op_id:
                potential_samples.extend(samples_by_oppy.get(dit.op_id, []))
-            if dit.pn:
-                potential_samples.extend(samples_by_pn.get(dit.pn, []))
+            if dit_norm_pn:
+                potential_samples.extend(samples_by_pn.get(dit_norm_pn, []))
            
            # 去重
            seen_sample_ids = set()
@@ -172,8 +209,8 @@ class FuzzyMatcher:
                    score = 100.0
                    reason = "Golden Key Match"
                
-                # Priority 2 & 3 則限制在相同 PN
-                elif dit.pn == sample.pn:
+                # Priority 2 & 3 則限制在相同 PN (Ignored symbols)
+                elif dit_norm_pn == normalize_pn_for_matching(sample.pn):
                    # Priority 2: 客戶代碼比對 (Silver Key)
                    if dit.erp_account and sample.cust_id and dit.erp_account == sample.cust_id:
                        match_priority = 2
@@ -209,44 +246,45 @@ class FuzzyMatcher:

            # --- 比對訂單 (DIT -> Order) ---
            # 訂單比對通常基於 PN
-            for order in orders_by_pn.get(dit.pn, []):
-                match_priority = 0
-                match_source = ""
-                score = 0.0
-                reason = ""
+            if dit_norm_pn:
+                for order in orders_by_pn.get(dit_norm_pn, []):
+                    match_priority = 0
+                    match_source = ""
+                    score = 0.0
+                    reason = ""

-                # Priority 2: 客戶代碼比對 (Silver Key)
-                if dit.erp_account and order.cust_id and dit.erp_account == order.cust_id:
-                    match_priority = 2
-                    match_source = f"Matched via ERP Account: {dit.erp_account}"
-                    score = 99.0
-                    reason = "Silver Key Match"
-                
-                # Priority 3: 名稱模糊比對 (Fallback)
-                else:
-                    score, reason = calculate_similarity(dit.customer, order.customer)
-                    if score >= MATCH_THRESHOLD_REVIEW:
-                        match_priority = 3
-                        match_source = f"Matched via Name Similarity ({reason})"
-
-                if match_priority > 0:
-                    status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
-                    match = MatchResult(
-                        dit_id=dit.id,
-                        target_type=TargetType.ORDER,
-                        target_id=order.id,
-                        score=score,
-                        match_priority=match_priority,
-                        match_source=match_source,
-                        reason=reason,
-                        status=status
-                    )
-                    self.db.add(match)
-                    match_count += 1
-                    if status == MatchStatus.auto_matched:
-                        auto_matched += 1
+                    # Priority 2: 客戶代碼比對 (Silver Key)
+                    if dit.erp_account and order.cust_id and dit.erp_account == order.cust_id:
+                        match_priority = 2
+                        match_source = f"Matched via ERP Account: {dit.erp_account}"
+                        score = 99.0
+                        reason = "Silver Key Match"
+                    
+                    # Priority 3: 名稱模糊比對 (Fallback)
                    else:
-                        pending_review += 1
+                        score, reason = calculate_similarity(dit.customer, order.customer)
+                        if score >= MATCH_THRESHOLD_REVIEW:
+                            match_priority = 3
+                            match_source = f"Matched via Name Similarity ({reason})"
+
+                    if match_priority > 0:
+                        status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
+                        match = MatchResult(
+                            dit_id=dit.id,
+                            target_type=TargetType.ORDER,
+                            target_id=order.id,
+                            score=score,
+                            match_priority=match_priority,
+                            match_source=match_source,
+                            reason=reason,
+                            status=status
+                        )
+                        self.db.add(match)
+                        match_count += 1
+                        if status == MatchStatus.auto_matched:
+                            auto_matched += 1
+                        else:
+                            pending_review += 1

        self.db.commit()

--- a/backend/app/services/report_generator.py
+++ b/backend/app/services/report_generator.py
@@ -72,41 +72,60 @@ class ReportGenerator:
        return result

    def generate_excel(self) -> io.BytesIO:
-        """產生 Excel 報表"""
+        """產生 Excel 報表 (包含三個分頁：DIT歸因明細, 成功送樣, 取得訂單)"""
        wb = Workbook()
-        ws = wb.active
-        ws.title = "DIT Attribution Report"
-
-        # 標題樣式
+        
+        # 取得所有資料
+        all_data = self.get_attribution_data()
+        
+        # 定義樣式
        header_font = Font(bold=True, color="FFFFFF")
        header_fill = PatternFill(start_color="4F46E5", end_color="4F46E5", fill_type="solid")
        header_alignment = Alignment(horizontal="center", vertical="center")
-
-        # 表頭
+        
        headers = ['OP編號', '客戶名稱', '料號', 'EAU', '階段', '樣品單號', '訂單單號', '訂單狀態', '訂單金額']
-        for col, header in enumerate(headers, 1):
-            cell = ws.cell(row=1, column=col, value=header)
-            cell.font = header_font
-            cell.fill = header_fill
-            cell.alignment = header_alignment
-
-        # 資料
-        data = self.get_attribution_data()
-        for row_idx, row_data in enumerate(data, 2):
-            ws.cell(row=row_idx, column=1, value=row_data['op_id'])
-            ws.cell(row=row_idx, column=2, value=row_data['customer'])
-            ws.cell(row=row_idx, column=3, value=row_data['pn'])
-            ws.cell(row=row_idx, column=4, value=row_data['eau'])
-            ws.cell(row=row_idx, column=5, value=row_data['stage'])
-            ws.cell(row=row_idx, column=6, value=row_data['sample_order'] or '-')
-            ws.cell(row=row_idx, column=7, value=row_data['order_no'] or '-')
-            ws.cell(row=row_idx, column=8, value=row_data['order_status'] or '-')
-            ws.cell(row=row_idx, column=9, value=row_data['order_amount'] or 0)
-
-        # 調整欄寬
        column_widths = [15, 30, 20, 12, 15, 15, 15, 12, 12]
-        for col, width in enumerate(column_widths, 1):
-            ws.column_dimensions[chr(64 + col)].width = width
+
+        def create_sheet(sheet_name, data_rows):
+            if sheet_name == "DIT歸因明細":
+                 ws = wb.active
+                 ws.title = sheet_name
+            else:
+                 ws = wb.create_sheet(title=sheet_name)
+
+            # 表頭
+            for col, header in enumerate(headers, 1):
+                cell = ws.cell(row=1, column=col, value=header)
+                cell.font = header_font
+                cell.fill = header_fill
+                cell.alignment = header_alignment
+
+            # 資料
+            for row_idx, row_data in enumerate(data_rows, 2):
+                ws.cell(row=row_idx, column=1, value=row_data['op_id'])
+                ws.cell(row=row_idx, column=2, value=row_data['customer'])
+                ws.cell(row=row_idx, column=3, value=row_data['pn'])
+                ws.cell(row=row_idx, column=4, value=row_data['eau'])
+                ws.cell(row=row_idx, column=5, value=row_data['stage'])
+                ws.cell(row=row_idx, column=6, value=row_data['sample_order'] or '-')
+                ws.cell(row=row_idx, column=7, value=row_data['order_no'] or '-')
+                ws.cell(row=row_idx, column=8, value=row_data['order_status'] or '-')
+                ws.cell(row=row_idx, column=9, value=row_data['order_amount'] or 0)
+
+            # 調整欄寬
+            for col, width in enumerate(column_widths, 1):
+                ws.column_dimensions[chr(64 + col)].width = width
+
+        # 1. DIT歸因明細 (全部)
+        create_sheet("DIT歸因明細", all_data)
+
+        # 2. 成功送樣 (有樣品單號)
+        success_samples = [row for row in all_data if row['sample_order']]
+        create_sheet("成功送樣", success_samples)
+
+        # 3. 取得訂單 (有訂單單號)
+        orders_received = [row for row in all_data if row['order_no']]
+        create_sheet("取得訂單", orders_received)

        # 儲存到 BytesIO
        output = io.BytesIO()