first commit

This commit is contained in:
2026-01-09 19:14:41 +08:00
commit 9f3c96ce73
67 changed files with 9636 additions and 0 deletions

View File

@@ -0,0 +1 @@
# Services package

View File

@@ -0,0 +1,175 @@
import re
import uuid
import math
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import pandas as pd
import chardet
from openpyxl import load_workbook
from app.config import MAX_HEADER_SCAN_ROWS, UPLOAD_DIR
def clean_value(val):
"""清理單一值,將 NaN/Inf 轉換為 None 以便 JSON 序列化"""
if val is None:
return None
if isinstance(val, float):
if math.isnan(val) or math.isinf(val):
return None
return val
def clean_dict(d: Dict) -> Dict:
"""清理字典中的所有 NaN/Inf 值"""
return {k: clean_value(v) for k, v in d.items()}
def clean_records(records: List[Dict]) -> List[Dict]:
"""清理記錄列表中的所有 NaN/Inf 值"""
return [clean_dict(r) for r in records]
# 欄位名稱對應表
COLUMN_MAPPING = {
'dit': {
'op_id': ['opportunity name', 'opportunity no', 'opportunity', 'op編號', 'op 編號', 'op_id', 'opid', '案件編號', '案號', 'opportunity id'],
'erp_account': ['erp account', 'account no', 'erp account no', '客戶代碼', '客戶編號', 'erp_account'],
'customer': ['account name', 'branding customer', '客戶', '客戶名稱', 'customer', 'customer name', '公司名稱'],
'pn': ['product name', '料號', 'part number', 'pn', 'part no', 'part_number', '產品料號', 'stage/part'],
'eau': ['eau quantity', 'eau quantity (pcs)', 'eau', '年預估量', 'annual usage', '預估用量'],
'stage': ['stage', 'oppty product stage', '階段', 'status', '狀態', '專案階段'],
'date': ['created date', '日期', 'date', '建立日期', 'create date']
},
'sample': {
'sample_id': ['樣品訂單號碼', 'item', '樣品編號', 'sample_id', 'sample id', '編號'],
'order_no': ['樣品訂單號碼', '單號', 'order_no', 'order no', '樣品單號', '申請單號'],
'oppy_no': ['oppy no', 'oppy_no', '案號', '案件編號', 'opportunity no'],
'cust_id': ['cust id', 'cust_id', '客戶編號', '客戶代碼', '客戶代號'],
'customer': ['客戶名稱', '客戶簡稱', '客戶', 'customer', 'customer name'],
'pn': ['item', 'type', '料號', 'part number', 'pn', 'part no', '產品料號', '索樣數量'],
'qty': ['索樣數量pcs', '索樣數量 k', '數量', 'qty', 'quantity', '申請數量'],
'date': ['需求日', '日期', 'date', '申請日期']
},
'order': {
'order_id': ['項次', '訂單編號', 'order_id', 'order id'],
'order_no': ['訂單單號', '訂單號', 'order_no', 'order no', '銷貨單號'],
'cust_id': ['客戶編號', '客戶代碼', '客戶代號', 'cust_id', 'cust id'],
'customer': ['客戶', '客戶名稱', 'customer', 'customer name'],
'pn': ['type', '內部料號', '料號', 'part number', 'pn', 'part no', '產品料號'],
'qty': ['訂單量', '數量', 'qty', 'quantity', '訂購數量', '出貨數量'],
'status': ['狀態', 'status', '訂單狀態'],
'amount': ['原幣金額(含稅)', '台幣金額(未稅)', '金額', 'amount', 'total', '訂單金額']
}
}
class ExcelParser:
def __init__(self):
self.parsed_files: Dict[str, Dict] = {}
def detect_encoding(self, file_path: Path) -> str:
"""偵測檔案編碼"""
with open(file_path, 'rb') as f:
result = chardet.detect(f.read(10000))
return result.get('encoding', 'utf-8')
def find_header_row(self, df: pd.DataFrame, file_type: str) -> int:
"""自動偵測表頭位置"""
expected_columns = set()
for variants in COLUMN_MAPPING[file_type].values():
expected_columns.update([v.lower() for v in variants])
for idx in range(min(MAX_HEADER_SCAN_ROWS, len(df))):
row = df.iloc[idx]
row_values = [str(v).lower().strip() for v in row.values if pd.notna(v)]
# 檢查是否有匹配的欄位名稱
matches = sum(1 for v in row_values if any(exp in v for exp in expected_columns))
if matches >= 2: # 至少匹配 2 個欄位
return idx
return 0 # 預設第一行為表頭
def map_columns(self, df: pd.DataFrame, file_type: str) -> Dict[str, str]:
"""將 DataFrame 欄位對應到標準欄位名稱"""
mapping = {}
column_map = COLUMN_MAPPING[file_type]
df_columns = [str(c).lower().strip() for c in df.columns]
for standard_name, variants in column_map.items():
for variant in variants:
variant_lower = variant.lower()
for idx, col in enumerate(df_columns):
if variant_lower in col or col in variant_lower:
mapping[df.columns[idx]] = standard_name
break
if standard_name in mapping.values():
break
return mapping
def parse_file(self, file_path: Path, file_type: str) -> Tuple[str, Dict[str, Any]]:
"""解析 Excel/CSV 檔案"""
file_id = str(uuid.uuid4())
# 讀取檔案
if file_path.suffix.lower() == '.csv':
encoding = self.detect_encoding(file_path)
df = pd.read_csv(file_path, encoding=encoding, header=None)
else:
df = pd.read_excel(file_path, header=None)
# 找到表頭
header_row = self.find_header_row(df, file_type)
# 重新讀取,以正確的表頭
if file_path.suffix.lower() == '.csv':
df = pd.read_csv(file_path, encoding=encoding, header=header_row)
else:
df = pd.read_excel(file_path, header=header_row)
# 欄位對應
column_mapping = self.map_columns(df, file_type)
df = df.rename(columns=column_mapping)
# 只保留需要的欄位
required_columns = list(COLUMN_MAPPING[file_type].keys())
available_columns = [c for c in required_columns if c in df.columns]
df = df[available_columns]
# 清理資料
df = df.dropna(how='all')
# 產生預覽資料(清理 NaN 值以便 JSON 序列化)
preview = clean_records(df.head(10).to_dict(orient='records'))
# 儲存解析結果
parsed_data = {
'file_id': file_id,
'file_type': file_type,
'filename': file_path.name,
'header_row': header_row,
'row_count': len(df),
'columns': list(df.columns),
'preview': preview,
'dataframe': df
}
self.parsed_files[file_id] = parsed_data
return file_id, {k: v for k, v in parsed_data.items() if k != 'dataframe'}
def get_parsed_data(self, file_id: str) -> Optional[pd.DataFrame]:
"""取得解析後的 DataFrame"""
if file_id in self.parsed_files:
return self.parsed_files[file_id].get('dataframe')
return None
def get_file_info(self, file_id: str) -> Optional[Dict]:
"""取得檔案資訊"""
if file_id in self.parsed_files:
data = self.parsed_files[file_id]
return {k: v for k, v in data.items() if k != 'dataframe'}
return None
# 全域實例
excel_parser = ExcelParser()

View File

@@ -0,0 +1,277 @@
import re
from typing import List, Tuple, Optional
from rapidfuzz import fuzz, process
from sqlalchemy.orm import Session
from app.config import MATCH_THRESHOLD_AUTO, MATCH_THRESHOLD_REVIEW
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus, TargetType, ReviewLog
import pandas as pd
from datetime import timedelta
# 公司後綴清單(用於正規化)
COMPANY_SUFFIXES = [
'股份有限公司', '有限公司', '公司',
'株式会社', '株式會社',
'Co., Ltd.', 'Co.,Ltd.', 'Co. Ltd.', 'Co.Ltd.',
'Corporation', 'Corp.', 'Corp',
'Inc.', 'Inc',
'Limited', 'Ltd.', 'Ltd',
'LLC', 'L.L.C.',
]
def sanitize_pn(pn: str) -> str:
"""去除非字母數字字元並轉大寫 (PMSM-808-LL -> PMSM808LL)"""
if not pn:
return ""
return re.sub(r'[^a-zA-Z0-9]', '', str(pn)).upper()
def normalize_customer_name(name: str) -> str:
"""正規化客戶名稱 (轉大寫)"""
if not name:
return ""
# 轉換為大寫
normalized = name.strip()
# 移除公司後綴
for suffix in COMPANY_SUFFIXES:
normalized = re.sub(re.escape(suffix), '', normalized, flags=re.IGNORECASE)
# 移除括號及其內容
normalized = re.sub(r'\([^)]*\)', '', normalized)
normalized = re.sub(r'[^]*', '', normalized)
# 全形轉半形
normalized = normalized.replace(' ', ' ')
# 移除多餘空白
normalized = re.sub(r'\s+', ' ', normalized).strip()
return normalized.upper()
def calculate_similarity(name1: str, name2: str) -> Tuple[float, str]:
"""計算兩個名稱的相似度"""
# 正規化
norm1 = normalize_customer_name(name1)
norm2 = normalize_customer_name(name2)
if not norm1 or not norm2:
return 0.0, "Empty name"
# 完全匹配
if norm1 == norm2:
return 100.0, "Exact Match"
# 使用多種比對方法
ratio = fuzz.ratio(norm1, norm2)
partial_ratio = fuzz.partial_ratio(norm1, norm2)
token_sort_ratio = fuzz.token_sort_ratio(norm1, norm2)
token_set_ratio = fuzz.token_set_ratio(norm1, norm2)
# 取最高分
best_score = max(ratio, partial_ratio, token_sort_ratio, token_set_ratio)
# 決定原因
if ratio == best_score:
reason = "Character Similarity"
elif partial_ratio == best_score:
reason = "Partial Match"
elif token_sort_ratio == best_score:
reason = "Token Order Match"
else:
reason = "Token Set Match"
# 檢查是否為後綴差異
if best_score >= 80:
for suffix in COMPANY_SUFFIXES[:3]: # 只檢查常見後綴
if (suffix in name1 and suffix not in name2) or \
(suffix not in name1 and suffix in name2):
reason = "Corporate Suffix Mismatch"
break
return best_score, reason
class FuzzyMatcher:
def __init__(self, db: Session):
self.db = db
def run_matching(self) -> dict:
"""執行瀑布式模糊比對 (Waterfall Matching)"""
# 1. 取得所有 DIT 記錄
dit_records = self.db.query(DitRecord).all()
# 2. 取得所有樣品和訂單記錄並按 PN 分組
sample_records = self.db.query(SampleRecord).all()
order_records = self.db.query(OrderRecord).all()
samples_by_pn = {}
samples_by_oppy = {}
for s in sample_records:
if s.pn:
if s.pn not in samples_by_pn:
samples_by_pn[s.pn] = []
samples_by_pn[s.pn].append(s)
if s.oppy_no:
if s.oppy_no not in samples_by_oppy:
samples_by_oppy[s.oppy_no] = []
samples_by_oppy[s.oppy_no].append(s)
orders_by_pn = {}
for o in order_records:
if o.pn not in orders_by_pn:
orders_by_pn[o.pn] = []
orders_by_pn[o.pn].append(o)
# 3. 清除舊的比對結果
self.db.query(ReviewLog).delete()
self.db.query(MatchResult).delete()
match_count = 0
auto_matched = 0
pending_review = 0
for dit in dit_records:
dit_date = pd.to_datetime(dit.date, errors='coerce')
# --- 比對樣品 (DIT -> Sample) ---
# 收集所有可能的樣品 (Priority 1: Oppy ID, Priority 2/3: PN)
potential_samples = []
if dit.op_id:
potential_samples.extend(samples_by_oppy.get(dit.op_id, []))
if dit.pn:
potential_samples.extend(samples_by_pn.get(dit.pn, []))
# 去重
seen_sample_ids = set()
unique_potential_samples = []
for s in potential_samples:
if s.id not in seen_sample_ids:
seen_sample_ids.add(s.id)
unique_potential_samples.append(s)
for sample in unique_potential_samples:
sample_date = pd.to_datetime(sample.date, errors='coerce')
# 時間窗檢查: Sample Date 必須在 DIT Date 的 前 30 天 至 今日 之間
if pd.notna(dit_date) and pd.notna(sample_date):
if sample_date < (dit_date - timedelta(days=30)):
continue
match_priority = 0
match_source = ""
score = 0.0
reason = ""
# Priority 1: 案號精準比對 (Golden Key)
if dit.op_id and sample.oppy_no and dit.op_id == sample.oppy_no:
match_priority = 1
match_source = f"Matched via Opportunity ID: {dit.op_id}"
score = 100.0
reason = "Golden Key Match"
# Priority 2 & 3 則限制在相同 PN
elif dit.pn == sample.pn:
# Priority 2: 客戶代碼比對 (Silver Key)
if dit.erp_account and sample.cust_id and dit.erp_account == sample.cust_id:
match_priority = 2
match_source = f"Matched via ERP Account: {dit.erp_account}"
score = 99.0
reason = "Silver Key Match"
# Priority 3: 名稱模糊比對 (Fallback)
else:
score, reason = calculate_similarity(dit.customer, sample.customer)
if score >= MATCH_THRESHOLD_REVIEW:
match_priority = 3
match_source = f"Matched via Name Similarity ({reason})"
if match_priority > 0:
status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
match = MatchResult(
dit_id=dit.id,
target_type=TargetType.SAMPLE,
target_id=sample.id,
score=score,
match_priority=match_priority,
match_source=match_source,
reason=reason,
status=status
)
self.db.add(match)
match_count += 1
if status == MatchStatus.auto_matched:
auto_matched += 1
else:
pending_review += 1
# --- 比對訂單 (DIT -> Order) ---
# 訂單比對通常基於 PN
for order in orders_by_pn.get(dit.pn, []):
match_priority = 0
match_source = ""
score = 0.0
reason = ""
# Priority 2: 客戶代碼比對 (Silver Key)
if dit.erp_account and order.cust_id and dit.erp_account == order.cust_id:
match_priority = 2
match_source = f"Matched via ERP Account: {dit.erp_account}"
score = 99.0
reason = "Silver Key Match"
# Priority 3: 名稱模糊比對 (Fallback)
else:
score, reason = calculate_similarity(dit.customer, order.customer)
if score >= MATCH_THRESHOLD_REVIEW:
match_priority = 3
match_source = f"Matched via Name Similarity ({reason})"
if match_priority > 0:
status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
match = MatchResult(
dit_id=dit.id,
target_type=TargetType.ORDER,
target_id=order.id,
score=score,
match_priority=match_priority,
match_source=match_source,
reason=reason,
status=status
)
self.db.add(match)
match_count += 1
if status == MatchStatus.auto_matched:
auto_matched += 1
else:
pending_review += 1
self.db.commit()
return {
'match_count': match_count,
'auto_matched': auto_matched,
'pending_review': pending_review
}
def get_pending_reviews(self) -> List[MatchResult]:
"""取得待審核的比對結果"""
return self.db.query(MatchResult).filter(
MatchResult.status == MatchStatus.pending
).all()
def review_match(self, match_id: int, action: str) -> Optional[MatchResult]:
"""審核比對結果"""
match = self.db.query(MatchResult).filter(MatchResult.id == match_id).first()
if not match:
return None
if action == 'accept':
match.status = MatchStatus.accepted
elif action == 'reject':
match.status = MatchStatus.rejected
self.db.commit()
return match

View File

@@ -0,0 +1,171 @@
import io
from typing import List, Dict, Any
from datetime import datetime
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4, landscape
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from sqlalchemy.orm import Session
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus
class ReportGenerator:
def __init__(self, db: Session):
self.db = db
def get_attribution_data(self) -> List[Dict[str, Any]]:
"""取得歸因明細資料"""
dit_records = self.db.query(DitRecord).all()
result = []
for dit in dit_records:
row = {
'op_id': dit.op_id,
'customer': dit.customer,
'pn': dit.pn,
'eau': dit.eau,
'stage': dit.stage,
'sample_order': None,
'order_no': None,
'order_status': None,
'order_amount': None
}
# 找到已接受的樣品匹配
sample_match = self.db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == 'SAMPLE',
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).first()
if sample_match:
sample = self.db.query(SampleRecord).filter(
SampleRecord.id == sample_match.target_id
).first()
if sample:
row['sample_order'] = sample.order_no
# 找到已接受的訂單匹配
order_match = self.db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == 'ORDER',
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).first()
if order_match:
order = self.db.query(OrderRecord).filter(
OrderRecord.id == order_match.target_id
).first()
if order:
row['order_no'] = order.order_no
row['order_status'] = order.status.value if order.status else None
row['order_amount'] = order.amount
result.append(row)
return result
def generate_excel(self) -> io.BytesIO:
"""產生 Excel 報表"""
wb = Workbook()
ws = wb.active
ws.title = "DIT Attribution Report"
# 標題樣式
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill(start_color="4F46E5", end_color="4F46E5", fill_type="solid")
header_alignment = Alignment(horizontal="center", vertical="center")
# 表頭
headers = ['OP編號', '客戶名稱', '料號', 'EAU', '階段', '樣品單號', '訂單單號', '訂單狀態', '訂單金額']
for col, header in enumerate(headers, 1):
cell = ws.cell(row=1, column=col, value=header)
cell.font = header_font
cell.fill = header_fill
cell.alignment = header_alignment
# 資料
data = self.get_attribution_data()
for row_idx, row_data in enumerate(data, 2):
ws.cell(row=row_idx, column=1, value=row_data['op_id'])
ws.cell(row=row_idx, column=2, value=row_data['customer'])
ws.cell(row=row_idx, column=3, value=row_data['pn'])
ws.cell(row=row_idx, column=4, value=row_data['eau'])
ws.cell(row=row_idx, column=5, value=row_data['stage'])
ws.cell(row=row_idx, column=6, value=row_data['sample_order'] or '-')
ws.cell(row=row_idx, column=7, value=row_data['order_no'] or '-')
ws.cell(row=row_idx, column=8, value=row_data['order_status'] or '-')
ws.cell(row=row_idx, column=9, value=row_data['order_amount'] or 0)
# 調整欄寬
column_widths = [15, 30, 20, 12, 15, 15, 15, 12, 12]
for col, width in enumerate(column_widths, 1):
ws.column_dimensions[chr(64 + col)].width = width
# 儲存到 BytesIO
output = io.BytesIO()
wb.save(output)
output.seek(0)
return output
def generate_pdf(self) -> io.BytesIO:
"""產生 PDF 報表"""
output = io.BytesIO()
doc = SimpleDocTemplate(output, pagesize=landscape(A4))
elements = []
styles = getSampleStyleSheet()
# 標題
title = Paragraph("DIT Attribution Report", styles['Title'])
elements.append(title)
elements.append(Spacer(1, 20))
# 日期
date_text = Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}", styles['Normal'])
elements.append(date_text)
elements.append(Spacer(1, 20))
# 表格資料
data = self.get_attribution_data()
table_data = [['OP No.', 'Customer', 'P/N', 'EAU', 'Stage', 'Sample', 'Order', 'Status', 'Amount']]
for row in data:
table_data.append([
row['op_id'],
row['customer'][:20] + '...' if len(row['customer']) > 20 else row['customer'],
row['pn'],
str(row['eau']),
row['stage'] or '-',
row['sample_order'] or '-',
row['order_no'] or '-',
row['order_status'] or '-',
f"${row['order_amount']:,.0f}" if row['order_amount'] else '-'
])
# 建立表格
table = Table(table_data)
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTSIZE', (0, 0), (-1, 0), 10),
('FONTSIZE', (0, 1), (-1, -1), 8),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#F8FAFC')]),
]))
elements.append(table)
doc.build(elements)
output.seek(0)
return output