first commit

This commit is contained in:
2026-01-09 19:14:41 +08:00
commit 9f3c96ce73
67 changed files with 9636 additions and 0 deletions

1
backend/app/__init__.py Normal file
View File

@@ -0,0 +1 @@
# SalesPipeline Backend

58
backend/app/config.py Normal file
View File

@@ -0,0 +1,58 @@
import os
from pathlib import Path
from dotenv import load_dotenv
# 載入環境變數
load_dotenv()
# 專案路徑
BASE_DIR = Path(__file__).resolve().parent.parent.parent
DATA_DIR = BASE_DIR / "data"
UPLOAD_DIR = DATA_DIR / "uploads"
STATIC_DIR = BASE_DIR / "backend" / "static"
# 確保目錄存在
DATA_DIR.mkdir(exist_ok=True)
UPLOAD_DIR.mkdir(exist_ok=True)
STATIC_DIR.mkdir(exist_ok=True)
# MySQL 資料庫設定
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "3306")
DB_USER = os.getenv("DB_USER", "root")
DB_PASSWORD = os.getenv("DB_PASSWORD", "")
DB_DATABASE = os.getenv("DB_DATABASE", "sales_pipeline")
# MySQL 連線字串
DATABASE_URL = f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_DATABASE}?charset=utf8mb4"
# 資料表前綴
TABLE_PREFIX = os.getenv("TABLE_PREFIX", "PJ_SOA_")
# JWT 設定
SECRET_KEY = os.getenv("SECRET_KEY", "your-secret-key-change-in-production-12345678")
ALGORITHM = os.getenv("ALGORITHM", "HS256")
ACCESS_TOKEN_EXPIRE_MINUTES = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
# 模糊比對閾值
MATCH_THRESHOLD_AUTO = int(os.getenv("MATCH_THRESHOLD_AUTO", "95"))
MATCH_THRESHOLD_REVIEW = int(os.getenv("MATCH_THRESHOLD_REVIEW", "80"))
# Excel 解析設定
MAX_HEADER_SCAN_ROWS = int(os.getenv("MAX_HEADER_SCAN_ROWS", "20"))
# 應用設定
APP_HOST = os.getenv("APP_HOST", "0.0.0.0")
APP_PORT = int(os.getenv("APP_PORT", "8000"))
WORKERS = int(os.getenv("WORKERS", "1"))
DEBUG = os.getenv("DEBUG", "False").lower() == "true"
# CORS 設定
CORS_ORIGINS = [
origin.strip()
for origin in os.getenv(
"CORS_ORIGINS",
"http://localhost:3000,http://127.0.0.1:3000,http://localhost:5173,http://127.0.0.1:5173"
).split(",")
if origin.strip()
]

54
backend/app/init_admin.py Normal file
View File

@@ -0,0 +1,54 @@
"""
初始化管理員帳號腳本
"""
from sqlalchemy.orm import Session
from app.models import engine, Base
from app.models.user import User, UserRole
from app.utils.security import get_password_hash
import os
def create_admin_user(db: Session):
"""建立預設管理員帳號"""
admin_email = os.getenv("ADMIN_EMAIL", "admin@example.com")
admin_password = os.getenv("ADMIN_PASSWORD", "admin123")
# 檢查是否已存在
existing = db.query(User).filter(User.email == admin_email).first()
if existing:
print(f"Admin user already exists: {admin_email}")
return existing
# 建立管理員
admin = User(
email=admin_email,
password_hash=get_password_hash(admin_password),
display_name="Administrator",
language="zh-TW",
role=UserRole.admin
)
db.add(admin)
db.commit()
db.refresh(admin)
print(f"Admin user created: {admin_email}")
return admin
def init_database():
"""初始化資料庫並建立預設帳號"""
from sqlalchemy.orm import sessionmaker
# 建立所有資料表
Base.metadata.create_all(bind=engine)
print("Database tables created.")
# 建立 session
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
db = SessionLocal()
try:
create_admin_user(db)
finally:
db.close()
if __name__ == "__main__":
init_database()

71
backend/app/main.py Normal file
View File

@@ -0,0 +1,71 @@
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from app.models import init_db
from app.routers import auth, etl, match, dashboard, report, lab
from app.config import STATIC_DIR, DEBUG, CORS_ORIGINS, APP_HOST, APP_PORT
# 初始化資料庫
init_db()
app = FastAPI(
title="SalesPipeline API",
description="銷售管線管理系統 API",
version="1.0.0",
docs_url="/api/docs" if DEBUG else None,
redoc_url="/api/redoc" if DEBUG else None,
)
# CORS 設定 (開發模式需要)
if DEBUG and CORS_ORIGINS:
app.add_middleware(
CORSMiddleware,
allow_origins=CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 註冊 API 路由
app.include_router(auth.router, prefix="/api")
app.include_router(etl.router, prefix="/api")
app.include_router(match.router, prefix="/api")
app.include_router(dashboard.router, prefix="/api")
app.include_router(report.router, prefix="/api")
app.include_router(lab.router, prefix="/api")
@app.get("/api/health")
def health_check():
return {"status": "healthy", "version": "1.0.0"}
# 靜態檔案服務 (前端 build 後的檔案)
static_path = STATIC_DIR
if static_path.exists():
assets_dir = static_path / "assets"
if assets_dir.exists():
app.mount("/assets", StaticFiles(directory=assets_dir), name="assets")
# SPA 路由處理 - 所有非 API 路由都返回 index.html
@app.get("/{full_path:path}")
async def serve_spa(request: Request, full_path: str):
if full_path.startswith("api/"):
return {"error": "Not Found"}, 404
static_file = static_path / full_path
if static_file.exists() and static_file.is_file():
return FileResponse(static_file)
index_file = static_path / "index.html"
if index_file.exists():
return FileResponse(index_file)
return {
"message": "SalesPipeline API is running",
"docs": "/api/docs" if DEBUG else "Disabled in production",
"health": "/api/health"
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host=APP_HOST, port=APP_PORT)

View File

@@ -0,0 +1,64 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, declarative_base
from app.config import DATABASE_URL
import os
# MySQL 連線引擎設定
engine = create_engine(
DATABASE_URL,
pool_size=10,
max_overflow=20,
pool_pre_ping=True,
pool_recycle=3600,
echo=False
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
# Import models to register them
from app.models.user import User, UserRole
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, ReviewLog
def init_db():
"""初始化資料庫並建立預設管理員"""
from app.utils.security import get_password_hash
# 建立所有資料表
Base.metadata.create_all(bind=engine)
# 建立預設管理員帳號
db = SessionLocal()
try:
admin_email = os.getenv("ADMIN_EMAIL", "admin@example.com")
admin_password = os.getenv("ADMIN_PASSWORD", "admin123")
existing = db.query(User).filter(User.email == admin_email).first()
if not existing:
admin = User(
email=admin_email,
password_hash=get_password_hash(admin_password),
display_name="Administrator",
language="zh-TW",
role=UserRole.admin
)
db.add(admin)
db.commit()
print(f"[Init] Admin user created: {admin_email}")
else:
print(f"[Init] Admin user exists: {admin_email}")
except Exception as e:
print(f"[Init] Error creating admin: {e}")
db.rollback()
finally:
db.close()

22
backend/app/models/dit.py Normal file
View File

@@ -0,0 +1,22 @@
from sqlalchemy import Column, Integer, String, DateTime, Float, UniqueConstraint
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
class DitRecord(Base):
__tablename__ = f"{TABLE_PREFIX}DIT_Records"
__table_args__ = (
UniqueConstraint('op_id', 'pn', name='uix_dit_op_pn'),
)
id = Column(Integer, primary_key=True, index=True)
op_id = Column(String(255), index=True, nullable=False) # 移除 unique因為同一 op_id 可有多個 pn
erp_account = Column(String(100), index=True) # AQ 欄
customer = Column(String(255), nullable=False, index=True)
customer_normalized = Column(String(255), index=True)
pn = Column(String(100), nullable=False, index=True)
eau = Column(Integer, default=0)
stage = Column(String(50))
date = Column(String(20))
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())

View File

@@ -0,0 +1,49 @@
from sqlalchemy import Column, Integer, String, DateTime, Float, Enum, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
import enum
class TargetType(str, enum.Enum):
SAMPLE = "SAMPLE"
ORDER = "ORDER"
class MatchStatus(str, enum.Enum):
pending = "pending"
accepted = "accepted"
rejected = "rejected"
auto_matched = "auto_matched"
class ReviewAction(str, enum.Enum):
accept = "accept"
reject = "reject"
class MatchResult(Base):
__tablename__ = f"{TABLE_PREFIX}Match_Results"
id = Column(Integer, primary_key=True, index=True)
dit_id = Column(Integer, ForeignKey(f"{TABLE_PREFIX}DIT_Records.id"), nullable=False)
target_type = Column(Enum(TargetType), nullable=False)
target_id = Column(Integer, nullable=False)
score = Column(Float, nullable=False)
match_priority = Column(Integer, default=3) # 1: Oppy ID, 2: Account, 3: Name
match_source = Column(String(255)) # e.g., "Matched via Opportunity ID: OP12345"
reason = Column(String(255))
status = Column(Enum(MatchStatus), default=MatchStatus.pending)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
dit = relationship("DitRecord", backref="matches")
class ReviewLog(Base):
__tablename__ = f"{TABLE_PREFIX}Review_Logs"
id = Column(Integer, primary_key=True, index=True)
match_id = Column(Integer, ForeignKey(f"{TABLE_PREFIX}Match_Results.id"), nullable=False)
user_id = Column(Integer, ForeignKey(f"{TABLE_PREFIX}users.id"), nullable=False)
action = Column(Enum(ReviewAction), nullable=False)
timestamp = Column(DateTime(timezone=True), server_default=func.now())
match_result = relationship("MatchResult", backref="review_logs")
user = relationship("User", backref="review_logs")

View File

@@ -0,0 +1,20 @@
from sqlalchemy import Column, Integer, String, DateTime, Float
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
class OrderRecord(Base):
__tablename__ = f"{TABLE_PREFIX}Order_Records"
id = Column(Integer, primary_key=True, index=True)
order_id = Column(String(50), index=True, nullable=False) # 移除 unique訂單可能有多個項次
order_no = Column(String(50), index=True)
cust_id = Column(String(100), index=True)
customer = Column(String(255), nullable=False, index=True)
customer_normalized = Column(String(255), index=True)
pn = Column(String(100), nullable=False, index=True)
qty = Column(Integer, default=0)
status = Column(String(50), default='Backlog') # 改為 String 以支援中文狀態
amount = Column(Float, default=0.0)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())

View File

@@ -0,0 +1,20 @@
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
class SampleRecord(Base):
__tablename__ = f"{TABLE_PREFIX}Sample_Records"
id = Column(Integer, primary_key=True, index=True)
sample_id = Column(String(50), unique=True, index=True, nullable=False)
order_no = Column(String(50), index=True)
oppy_no = Column(String(100), index=True) # AU 欄
cust_id = Column(String(100), index=True) # G 欄
customer = Column(String(255), nullable=False, index=True)
customer_normalized = Column(String(255), index=True)
pn = Column(String(100), nullable=False, index=True)
qty = Column(Integer, default=0)
date = Column(String(20))
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())

View File

@@ -0,0 +1,23 @@
from sqlalchemy import Column, Integer, String, DateTime, Enum
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
import enum
class UserRole(str, enum.Enum):
admin = "admin"
user = "user"
class User(Base):
__tablename__ = f"{TABLE_PREFIX}users"
id = Column(Integer, primary_key=True, index=True)
email = Column(String(200), unique=True, index=True, nullable=False)
ad_username = Column(String(100), nullable=True) # Added to satisfy DB constraint
department = Column(String(100), nullable=True) # Added to satisfy DB constraint
password_hash = Column("local_password", String(255), nullable=True)
display_name = Column(String(100), nullable=True)
# language = Column(String(10), default="zh-TW") # Not in DB
role = Column(String(20), default="user") # Simplified from Enum
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())

View File

@@ -0,0 +1 @@
# Routers package

View File

@@ -0,0 +1,84 @@
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordRequestForm
from sqlalchemy.orm import Session
from pydantic import BaseModel, EmailStr
from app.models import get_db
from app.models.user import User, UserRole
from app.utils.security import (
get_password_hash, verify_password,
create_access_token, get_current_user
)
router = APIRouter(prefix="/auth", tags=["Authentication"])
class UserCreate(BaseModel):
email: EmailStr
password: str
class UserResponse(BaseModel):
id: int
email: str
role: str
class Config:
from_attributes = True
class TokenResponse(BaseModel):
access_token: str
token_type: str
user: UserResponse
def get_role_value(role) -> str:
"""取得 role 的字串值,相容 Enum 和字串"""
if hasattr(role, 'value'):
return role.value
return str(role) if role else 'user'
@router.post("/register", response_model=UserResponse)
def register(user_data: UserCreate, db: Session = Depends(get_db)):
"""註冊新使用者"""
existing_user = db.query(User).filter(User.email == user_data.email).first()
if existing_user:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Email already registered"
)
user = User(
email=user_data.email,
password_hash=get_password_hash(user_data.password),
role=UserRole.user
)
db.add(user)
db.commit()
db.refresh(user)
return UserResponse(id=user.id, email=user.email, role=get_role_value(user.role))
@router.post("/login", response_model=TokenResponse)
def login(form_data: OAuth2PasswordRequestForm = Depends(), db: Session = Depends(get_db)):
"""登入取得 JWT Token"""
user = db.query(User).filter(User.email == form_data.username).first()
if not user or not verify_password(form_data.password, user.password_hash):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect email or password",
headers={"WWW-Authenticate": "Bearer"},
)
access_token = create_access_token(data={"sub": str(user.id)})
return TokenResponse(
access_token=access_token,
token_type="bearer",
user=UserResponse(id=user.id, email=user.email, role=get_role_value(user.role))
)
@router.get("/me", response_model=UserResponse)
def get_me(current_user: User = Depends(get_current_user)):
"""取得當前使用者資訊"""
return UserResponse(
id=current_user.id,
email=current_user.email,
role=get_role_value(current_user.role)
)

View File

@@ -0,0 +1,225 @@
from typing import List
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from sqlalchemy import func
from pydantic import BaseModel
from app.models import get_db
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus, TargetType
router = APIRouter(prefix="/dashboard", tags=["Dashboard"])
class KPIResponse(BaseModel):
total_dit: int
sample_rate: float # 送樣轉換率
hit_rate: float # 訂單命中率
fulfillment_rate: float # EAU 達成率
orphan_sample_rate: float # 無效送樣率
total_revenue: float
class FunnelItem(BaseModel):
name: str
value: int
fill: str
class AttributionDit(BaseModel):
op_id: str
customer: str
pn: str
eau: int
stage: str
date: str
class AttributionSample(BaseModel):
order_no: str
date: str
class AttributionOrder(BaseModel):
order_no: str
status: str
qty: int
amount: float
class AttributionRow(BaseModel):
dit: AttributionDit
sample: AttributionSample | None
order: AttributionOrder | None
match_source: str | None
attributed_qty: int
fulfillment_rate: float
def get_lifo_attribution(db: Session):
"""執行 LIFO 業績分配邏輯"""
# 1. 取得所有 DIT按日期由新到舊排序 (LIFO)
dits = db.query(DitRecord).order_by(DitRecord.date.desc()).all()
# 2. 取得所有已匹配且接受的訂單
matched_orders = db.query(MatchResult, OrderRecord).join(
OrderRecord, MatchResult.target_id == OrderRecord.id
).filter(
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).all()
# 3. 建立業績池 (Revenue Pool) - 按 (客戶, 料號) 分組
order_pools = {}
for match, order in matched_orders:
key = (order.customer_normalized, order.pn)
if key not in order_pools:
order_pools[key] = 0
order_pools[key] += (order.qty or 0)
# 4. 進行分配
attribution_map = {} # dit_id -> {qty, total_eau}
for dit in dits:
key = (dit.customer_normalized, dit.pn)
eau = dit.eau or 0
allocated = 0
if key in order_pools and order_pools[key] > 0:
allocated = min(eau, order_pools[key])
order_pools[key] -= allocated
attribution_map[dit.id] = {
"qty": allocated,
"eau": eau
}
return attribution_map
@router.get("/kpi", response_model=KPIResponse)
def get_kpi(db: Session = Depends(get_db)):
"""取得 KPI 統計 (符合規格書 v1.0)"""
total_dit = db.query(DitRecord).count()
if total_dit == 0:
return KPIResponse(total_dit=0, sample_rate=0, hit_rate=0, fulfillment_rate=0, orphan_sample_rate=0, total_revenue=0)
# 1. 送樣轉換率 (Sample Rate): (有匹配到樣品的 DIT 數) / (總 DIT 數)
dits_with_sample = db.query(func.count(func.distinct(MatchResult.dit_id))).filter(
MatchResult.target_type == TargetType.SAMPLE,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
sample_rate = (dits_with_sample / total_dit * 100)
# 2. 訂單命中率 (Hit Rate): (有匹配到訂單的 DIT 數) / (總 DIT 數)
dits_with_order = db.query(func.count(func.distinct(MatchResult.dit_id))).filter(
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
hit_rate = (dits_with_order / total_dit * 100)
# 3. EAU 達成率 (Fulfillment Rate): (歸因之訂單總量) / (DIT 預估 EAU)
attribution_map = get_lifo_attribution(db)
total_attributed_qty = sum(item['qty'] for item in attribution_map.values())
total_eau = sum(item['eau'] for item in attribution_map.values())
fulfillment_rate = (total_attributed_qty / total_eau * 100) if total_eau > 0 else 0
# 4. 無效送樣率 (Orphan Sample Rate): (未匹配到 DIT 的送樣數) / (總送樣數)
total_samples = db.query(SampleRecord).count()
matched_sample_ids = db.query(func.distinct(MatchResult.target_id)).filter(
MatchResult.target_type == TargetType.SAMPLE
).all()
matched_sample_count = len(matched_sample_ids)
orphan_sample_rate = ((total_samples - matched_sample_count) / total_samples * 100) if total_samples > 0 else 0
# 5. 總營收
total_revenue = db.query(func.sum(OrderRecord.amount)).join(
MatchResult, MatchResult.target_id == OrderRecord.id
).filter(
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
return KPIResponse(
total_dit=total_dit,
sample_rate=round(sample_rate, 1),
hit_rate=round(hit_rate, 1),
fulfillment_rate=round(fulfillment_rate, 1),
orphan_sample_rate=round(orphan_sample_rate, 1),
total_revenue=total_revenue
)
@router.get("/funnel", response_model=List[FunnelItem])
def get_funnel(db: Session = Depends(get_db)):
"""取得漏斗數據"""
total_dit = db.query(DitRecord).count()
dits_with_sample = db.query(func.count(func.distinct(MatchResult.dit_id))).filter(
MatchResult.target_type == TargetType.SAMPLE,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
dits_with_order = db.query(func.count(func.distinct(MatchResult.dit_id))).filter(
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
return [
FunnelItem(name='DIT 案件', value=total_dit, fill='#6366f1'),
FunnelItem(name='成功送樣', value=dits_with_sample, fill='#8b5cf6'),
FunnelItem(name='取得訂單', value=dits_with_order, fill='#10b981'),
]
@router.get("/attribution", response_model=List[AttributionRow])
def get_attribution(db: Session = Depends(get_db)):
"""取得歸因明細 (含 LIFO 分配與追溯資訊)"""
dit_records = db.query(DitRecord).order_by(DitRecord.date.desc()).all()
attribution_map = get_lifo_attribution(db)
result = []
for dit in dit_records:
# 找到樣品匹配 (取分數最高的一個)
sample_match = db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == TargetType.SAMPLE,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).order_by(MatchResult.score.desc()).first()
sample_info = None
if sample_match:
sample = db.query(SampleRecord).filter(SampleRecord.id == sample_match.target_id).first()
if sample:
sample_info = AttributionSample(order_no=sample.order_no, date=sample.date or '')
# 找到訂單匹配 (取分數最高的一個)
order_match = db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).order_by(MatchResult.score.desc()).first()
order_info = None
match_source = None
if order_match:
order = db.query(OrderRecord).filter(OrderRecord.id == order_match.target_id).first()
if order:
order_info = AttributionOrder(
order_no=order.order_no,
status=order.status or 'Unknown',
qty=order.qty or 0,
amount=order.amount or 0
)
match_source = order_match.match_source
attr_data = attribution_map.get(dit.id, {"qty": 0, "eau": dit.eau or 0})
fulfillment = (attr_data['qty'] / attr_data['eau'] * 100) if attr_data['eau'] > 0 else 0
result.append(AttributionRow(
dit=AttributionDit(
op_id=dit.op_id,
customer=dit.customer,
pn=dit.pn,
eau=dit.eau,
stage=dit.stage or '',
date=dit.date or ''
),
sample=sample_info,
order=order_info,
match_source=match_source,
attributed_qty=attr_data['qty'],
fulfillment_rate=round(fulfillment, 1)
))
return result

246
backend/app/routers/etl.py Normal file
View File

@@ -0,0 +1,246 @@
import shutil
from pathlib import Path
from typing import List
import pandas as pd
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
from sqlalchemy.orm import Session
from pydantic import BaseModel
from app.models import get_db
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, TargetType, ReviewLog
from app.config import UPLOAD_DIR
from app.services.excel_parser import excel_parser
from app.services.fuzzy_matcher import normalize_customer_name, sanitize_pn
router = APIRouter(prefix="/etl", tags=["ETL"])
class ParsedFileResponse(BaseModel):
file_id: str
file_type: str
filename: str
header_row: int
row_count: int
preview: List[dict]
class ImportRequest(BaseModel):
file_id: str
class ImportResponse(BaseModel):
imported_count: int
@router.post("/upload", response_model=ParsedFileResponse)
async def upload_file(
file: UploadFile = File(...),
file_type: str = Form(...),
db: Session = Depends(get_db)
):
"""上傳並解析 Excel 檔案"""
if file_type not in ['dit', 'sample', 'order']:
raise HTTPException(status_code=400, detail="Invalid file type")
# 儲存檔案
file_path = UPLOAD_DIR / file.filename
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
try:
# 解析檔案
file_id, file_info = excel_parser.parse_file(file_path, file_type)
return ParsedFileResponse(
file_id=file_id,
file_type=file_info['file_type'],
filename=file_info['filename'],
header_row=file_info['header_row'],
row_count=file_info['row_count'],
preview=file_info['preview']
)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Failed to parse file: {str(e)}")
@router.get("/preview/{file_id}", response_model=ParsedFileResponse)
def get_preview(file_id: str):
"""取得檔案預覽"""
file_info = excel_parser.get_file_info(file_id)
if not file_info:
raise HTTPException(status_code=404, detail="File not found")
return ParsedFileResponse(
file_id=file_info['file_id'],
file_type=file_info['file_type'],
filename=file_info['filename'],
header_row=file_info['header_row'],
row_count=file_info['row_count'],
preview=file_info['preview']
)
def clean_value(val, default=''):
"""清理欄位值,處理 nan 和空值"""
if val is None or (isinstance(val, float) and pd.isna(val)):
return default
str_val = str(val).strip()
if str_val.lower() in ('nan', 'none', 'null', ''):
return default
return str_val
@router.post("/import", response_model=ImportResponse)
def import_data(request: ImportRequest, db: Session = Depends(get_db)):
"""匯入資料到資料庫"""
import traceback
try:
file_info = excel_parser.get_file_info(request.file_id)
if not file_info:
print(f"[ETL Import] Error: File not found for file_id={request.file_id}")
raise HTTPException(status_code=404, detail="File not found")
df = excel_parser.get_parsed_data(request.file_id)
if df is None:
print(f"[ETL Import] Error: Parsed data not found for file_id={request.file_id}")
raise HTTPException(status_code=404, detail="Parsed data not found")
print(f"[ETL Import] Starting import: file_type={file_info['file_type']}, rows={len(df)}")
file_type = file_info['file_type']
imported_count = 0
seen_ids = set() # 追蹤已處理的 ID避免檔案內重複
# 清除該類型的舊資料,避免重複鍵衝突
try:
if file_type == 'dit':
print("[ETL Import] Clearing old DIT records and dependent matches/logs...")
# 先清除與 DIT 相關的審核日誌與比對結果
db.query(ReviewLog).delete()
db.query(MatchResult).delete()
db.query(DitRecord).delete()
elif file_type == 'sample':
print("[ETL Import] Clearing old Sample records and dependent matches/logs...")
# 先清除與 Sample 相關的比對結果 (及其日誌)
# 這裡比較複雜,因為 ReviewLog 是透過 MatchResult 關聯的
# 但既然我們是清空整個類別,直接清空所有 ReviewLog 和對應的 MatchResult 是最安全的
db.query(ReviewLog).delete()
db.query(MatchResult).filter(MatchResult.target_type == TargetType.SAMPLE).delete()
db.query(SampleRecord).delete()
elif file_type == 'order':
print("[ETL Import] Clearing old Order records and dependent matches/logs...")
db.query(ReviewLog).delete()
db.query(MatchResult).filter(MatchResult.target_type == TargetType.ORDER).delete()
db.query(OrderRecord).delete()
db.flush() # 使用 flush 而非 commit保持在同一個事務中
print("[ETL Import] Old data cleared successfully.")
except Exception as e:
db.rollback()
print(f"[ETL Import] Error clearing old data: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Failed to clear old data: {str(e)}")
for idx, row in df.iterrows():
try:
if file_type == 'dit':
op_id = clean_value(row.get('op_id'), '')
erp_account = clean_value(row.get('erp_account'), '')
customer = clean_value(row.get('customer'))
pn = clean_value(row.get('pn'))
# 跳過無效資料列或重複的 op_id + pn 組合
unique_key = f"{op_id}|{pn}"
if not op_id or unique_key in seen_ids:
continue
seen_ids.add(unique_key)
record = DitRecord(
op_id=op_id,
erp_account=erp_account,
customer=customer,
customer_normalized=normalize_customer_name(customer),
pn=sanitize_pn(pn),
eau=int(row.get('eau', 0)) if row.get('eau') and not pd.isna(row.get('eau')) else 0,
stage=clean_value(row.get('stage')),
date=clean_value(row.get('date'))
)
elif file_type == 'sample':
sample_id = clean_value(row.get('sample_id'), f'S{idx}')
oppy_no = clean_value(row.get('oppy_no'), '')
cust_id = clean_value(row.get('cust_id'), '')
customer = clean_value(row.get('customer'))
pn = clean_value(row.get('pn'))
# 跳過重複的 sample_id
if sample_id in seen_ids:
continue
seen_ids.add(sample_id)
record = SampleRecord(
sample_id=sample_id,
order_no=clean_value(row.get('order_no')),
oppy_no=oppy_no,
cust_id=cust_id,
customer=customer,
customer_normalized=normalize_customer_name(customer),
pn=sanitize_pn(pn),
qty=int(row.get('qty', 0)) if row.get('qty') and not pd.isna(row.get('qty')) else 0,
date=clean_value(row.get('date'))
)
elif file_type == 'order':
order_id = clean_value(row.get('order_id'), f'O{idx}')
cust_id = clean_value(row.get('cust_id'), '')
customer = clean_value(row.get('customer'))
pn = clean_value(row.get('pn'))
# 跳過重複的 order_id
if order_id in seen_ids:
continue
seen_ids.add(order_id)
record = OrderRecord(
order_id=order_id,
order_no=clean_value(row.get('order_no')),
cust_id=cust_id,
customer=customer,
customer_normalized=normalize_customer_name(customer),
pn=sanitize_pn(pn),
qty=int(row.get('qty', 0)) if row.get('qty') and not pd.isna(row.get('qty')) else 0,
status=clean_value(row.get('status'), 'Backlog'),
amount=float(row.get('amount', 0)) if row.get('amount') and not pd.isna(row.get('amount')) else 0
)
else:
continue
db.add(record)
imported_count += 1
if imported_count % 500 == 0:
print(f"[ETL Import] Processed {imported_count} rows...")
except Exception as e:
print(f"[ETL Import] Error importing row {idx}: {e}")
continue
try:
print(f"[ETL Import] Committing {imported_count} records...")
db.commit()
print(f"[ETL Import] Import successful: {imported_count} records.")
except Exception as e:
db.rollback()
print(f"[ETL Import] Commit Error: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Failed to commit data: {str(e)}")
return ImportResponse(imported_count=imported_count)
except HTTPException:
raise
except Exception as e:
print(f"[ETL Import] Unhandled Exception: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
@router.get("/data/{data_type}")
def get_data(data_type: str, db: Session = Depends(get_db)):
"""取得已匯入的資料"""
if data_type == 'dit':
records = db.query(DitRecord).all()
elif data_type == 'sample':
records = db.query(SampleRecord).all()
elif data_type == 'order':
records = db.query(OrderRecord).all()
else:
raise HTTPException(status_code=400, detail="Invalid data type")
return [
{
**{c.name: getattr(record, c.name) for c in record.__table__.columns}
}
for record in records
]

181
backend/app/routers/lab.py Normal file
View File

@@ -0,0 +1,181 @@
from typing import List, Optional
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session
from sqlalchemy import func, and_
from pydantic import BaseModel
from app.models import get_db
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
router = APIRouter(prefix="/lab", tags=["Lab"])
class LabKPI(BaseModel):
avg_velocity: float # 平均轉換時間 (天)
conversion_rate: float # 轉換比例 (%)
orphan_count: int # 孤兒樣品總數
class ScatterPoint(BaseModel):
customer: str
pn: str
sample_qty: int
order_qty: int
class OrphanSample(BaseModel):
customer: str
pn: str
days_since_sent: int
order_no: str
date: str
def parse_date(date_str: str) -> Optional[datetime]:
try:
return datetime.strptime(date_str, "%Y-%m-%d")
except:
return None
@router.get("/kpi", response_model=LabKPI)
def get_lab_kpi(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
# 1. 取得所有樣品與訂單
samples_query = db.query(SampleRecord)
orders_query = db.query(OrderRecord)
if start_date:
samples_query = samples_query.filter(SampleRecord.date >= start_date)
orders_query = orders_query.filter(OrderRecord.created_at >= start_date) # 訂單使用 created_at or date? OrderRecord 只有 created_at 欄位是 DateTime
if end_date:
samples_query = samples_query.filter(SampleRecord.date <= end_date)
# Note: OrderRecord 只有 created_at
samples = samples_query.all()
orders = orders_query.all()
# 建立群組 (ERP Code + PN)
# ERP Code correspond to cust_id
sample_groups = {}
for s in samples:
key = (s.cust_id, s.pn)
if key not in sample_groups:
sample_groups[key] = []
sample_groups[key].append(s)
order_groups = {}
for o in orders:
key = (o.cust_id, o.pn)
if key not in order_groups:
order_groups[key] = []
order_groups[key].append(o)
# 計算 Velocity 與 轉換率
velocities = []
converted_samples_count = 0
total_samples_count = len(samples)
for key, group_samples in sample_groups.items():
if key in order_groups:
# 轉換成功
converted_samples_count += len(group_samples)
# 計算 Velocity: First Order Date - Earliest Sample Date
earliest_sample_date = min([parse_date(s.date) for s in group_samples if s.date] or [datetime.max])
first_order_date = min([o.created_at for o in order_groups[key] if o.created_at] or [datetime.max])
if earliest_sample_date != datetime.max and first_order_date != datetime.max:
diff = (first_order_date - earliest_sample_date).days
if diff >= 0:
velocities.append(diff)
avg_velocity = sum(velocities) / len(velocities) if velocities else 0
conversion_rate = (converted_samples_count / total_samples_count * 100) if total_samples_count > 0 else 0
# 孤兒樣品: > 90天且無訂單
now = datetime.now()
orphan_count = 0
for key, group_samples in sample_groups.items():
if key not in order_groups:
for s in group_samples:
s_date = parse_date(s.date)
if s_date and (now - s_date).days > 90:
orphan_count += 1
return LabKPI(
avg_velocity=round(avg_velocity, 1),
conversion_rate=round(conversion_rate, 1),
orphan_count=orphan_count
)
@router.get("/scatter", response_model=List[ScatterPoint])
def get_scatter_data(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
samples_query = db.query(SampleRecord)
orders_query = db.query(OrderRecord)
if start_date:
samples_query = samples_query.filter(SampleRecord.date >= start_date)
if end_date:
samples_query = samples_query.filter(SampleRecord.date <= end_date)
samples = samples_query.all()
orders = orders_query.all()
# 聚合資料
data_map = {} # (cust_id, pn) -> {sample_qty, order_qty, customer_name}
for s in samples:
key = (s.cust_id, s.pn)
if key not in data_map:
data_map[key] = {"sample_qty": 0, "order_qty": 0, "customer": s.customer}
data_map[key]["sample_qty"] += (s.qty or 0)
for o in orders:
key = (o.cust_id, o.pn)
if key in data_map:
data_map[key]["order_qty"] += (o.qty or 0)
# 如果有訂單但沒樣品,我們在 ROI 分析中可能不顯示,或者顯示在 Y 軸上 X=0。
# 根據需求:分析「樣品寄送」與「訂單接收」的關聯,通常以有送樣的為基底。
return [
ScatterPoint(
customer=v["customer"],
pn=key[1],
sample_qty=v["sample_qty"],
order_qty=v["order_qty"]
)
for key, v in data_map.items()
]
@router.get("/orphans", response_model=List[OrphanSample])
def get_orphans(db: Session = Depends(get_db)):
now = datetime.now()
threshold_date = now - timedelta(days=90)
# 找出所有樣品
samples = db.query(SampleRecord).all()
# 找出有訂單的人 (cust_id, pn)
orders_keys = set(db.query(OrderRecord.cust_id, OrderRecord.pn).distinct().all())
orphans = []
for s in samples:
key = (s.cust_id, s.pn)
s_date = parse_date(s.date)
if key not in orders_keys:
if s_date and s_date < threshold_date:
orphans.append(OrphanSample(
customer=s.customer,
pn=s.pn,
days_since_sent=(now - s_date).days,
order_no=s.order_no,
date=s.date
))
return sorted(orphans, key=lambda x: x.days_since_sent, reverse=True)

View File

@@ -0,0 +1,171 @@
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from pydantic import BaseModel
from app.models import get_db
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus, TargetType
from app.services.fuzzy_matcher import FuzzyMatcher
router = APIRouter(prefix="/match", tags=["Matching"])
class MatchRunResponse(BaseModel):
match_count: int
auto_matched: int
pending_review: int
class DitInfo(BaseModel):
id: int
op_id: str
customer: str
pn: str
eau: int
stage: Optional[str]
class Config:
from_attributes = True
class TargetInfo(BaseModel):
id: int
customer: str
pn: str
order_no: Optional[str]
qty: Optional[int]
class MatchResultResponse(BaseModel):
id: int
dit_id: int
target_type: str
target_id: int
score: float
reason: str
status: str
dit: Optional[DitInfo]
target: Optional[TargetInfo]
class Config:
from_attributes = True
class ReviewRequest(BaseModel):
action: str # 'accept' or 'reject'
@router.post("/run", response_model=MatchRunResponse)
def run_matching(db: Session = Depends(get_db)):
"""執行模糊比對"""
matcher = FuzzyMatcher(db)
result = matcher.run_matching()
return MatchRunResponse(**result)
@router.get("/results", response_model=List[MatchResultResponse])
def get_results(db: Session = Depends(get_db)):
"""取得所有比對結果"""
matches = db.query(MatchResult).all()
results = []
for match in matches:
# 取得 DIT 資訊
dit = db.query(DitRecord).filter(DitRecord.id == match.dit_id).first()
dit_info = DitInfo(
id=dit.id,
op_id=dit.op_id,
customer=dit.customer,
pn=dit.pn,
eau=dit.eau,
stage=dit.stage
) if dit else None
# 取得目標資訊
target_info = None
if match.target_type == TargetType.SAMPLE:
sample = db.query(SampleRecord).filter(SampleRecord.id == match.target_id).first()
if sample:
target_info = TargetInfo(
id=sample.id,
customer=sample.customer,
pn=sample.pn,
order_no=sample.order_no,
qty=sample.qty
)
elif match.target_type == TargetType.ORDER:
order = db.query(OrderRecord).filter(OrderRecord.id == match.target_id).first()
if order:
target_info = TargetInfo(
id=order.id,
customer=order.customer,
pn=order.pn,
order_no=order.order_no,
qty=order.qty
)
results.append(MatchResultResponse(
id=match.id,
dit_id=match.dit_id,
target_type=match.target_type.value,
target_id=match.target_id,
score=match.score,
reason=match.reason,
status=match.status.value,
dit=dit_info,
target=target_info
))
return results
@router.put("/{match_id}/review", response_model=MatchResultResponse)
def review_match(match_id: int, request: ReviewRequest, db: Session = Depends(get_db)):
"""審核比對結果"""
if request.action not in ['accept', 'reject']:
raise HTTPException(status_code=400, detail="Invalid action")
matcher = FuzzyMatcher(db)
match = matcher.review_match(match_id, request.action)
if not match:
raise HTTPException(status_code=404, detail="Match not found")
# 取得相關資訊
dit = db.query(DitRecord).filter(DitRecord.id == match.dit_id).first()
dit_info = DitInfo(
id=dit.id,
op_id=dit.op_id,
customer=dit.customer,
pn=dit.pn,
eau=dit.eau,
stage=dit.stage
) if dit else None
target_info = None
if match.target_type == TargetType.SAMPLE:
sample = db.query(SampleRecord).filter(SampleRecord.id == match.target_id).first()
if sample:
target_info = TargetInfo(
id=sample.id,
customer=sample.customer,
pn=sample.pn,
order_no=sample.order_no,
qty=sample.qty
)
elif match.target_type == TargetType.ORDER:
order = db.query(OrderRecord).filter(OrderRecord.id == match.target_id).first()
if order:
target_info = TargetInfo(
id=order.id,
customer=order.customer,
pn=order.pn,
order_no=order.order_no,
qty=order.qty
)
return MatchResultResponse(
id=match.id,
dit_id=match.dit_id,
target_type=match.target_type.value,
target_id=match.target_id,
score=match.score,
reason=match.reason,
status=match.status.value,
dit=dit_info,
target=target_info
)

View File

@@ -0,0 +1,32 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from app.models import get_db
from app.services.report_generator import ReportGenerator
router = APIRouter(prefix="/report", tags=["Report"])
@router.get("/export")
def export_report(format: str = "xlsx", db: Session = Depends(get_db)):
"""匯出報表"""
if format not in ['xlsx', 'pdf']:
raise HTTPException(status_code=400, detail="Invalid format. Use 'xlsx' or 'pdf'")
generator = ReportGenerator(db)
if format == 'xlsx':
output = generator.generate_excel()
media_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
filename = "dit_attribution_report.xlsx"
else:
output = generator.generate_pdf()
media_type = "application/pdf"
filename = "dit_attribution_report.pdf"
return StreamingResponse(
output,
media_type=media_type,
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
)

View File

@@ -0,0 +1 @@
# Services package

View File

@@ -0,0 +1,175 @@
import re
import uuid
import math
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import pandas as pd
import chardet
from openpyxl import load_workbook
from app.config import MAX_HEADER_SCAN_ROWS, UPLOAD_DIR
def clean_value(val):
"""清理單一值,將 NaN/Inf 轉換為 None 以便 JSON 序列化"""
if val is None:
return None
if isinstance(val, float):
if math.isnan(val) or math.isinf(val):
return None
return val
def clean_dict(d: Dict) -> Dict:
"""清理字典中的所有 NaN/Inf 值"""
return {k: clean_value(v) for k, v in d.items()}
def clean_records(records: List[Dict]) -> List[Dict]:
"""清理記錄列表中的所有 NaN/Inf 值"""
return [clean_dict(r) for r in records]
# 欄位名稱對應表
COLUMN_MAPPING = {
'dit': {
'op_id': ['opportunity name', 'opportunity no', 'opportunity', 'op編號', 'op 編號', 'op_id', 'opid', '案件編號', '案號', 'opportunity id'],
'erp_account': ['erp account', 'account no', 'erp account no', '客戶代碼', '客戶編號', 'erp_account'],
'customer': ['account name', 'branding customer', '客戶', '客戶名稱', 'customer', 'customer name', '公司名稱'],
'pn': ['product name', '料號', 'part number', 'pn', 'part no', 'part_number', '產品料號', 'stage/part'],
'eau': ['eau quantity', 'eau quantity (pcs)', 'eau', '年預估量', 'annual usage', '預估用量'],
'stage': ['stage', 'oppty product stage', '階段', 'status', '狀態', '專案階段'],
'date': ['created date', '日期', 'date', '建立日期', 'create date']
},
'sample': {
'sample_id': ['樣品訂單號碼', 'item', '樣品編號', 'sample_id', 'sample id', '編號'],
'order_no': ['樣品訂單號碼', '單號', 'order_no', 'order no', '樣品單號', '申請單號'],
'oppy_no': ['oppy no', 'oppy_no', '案號', '案件編號', 'opportunity no'],
'cust_id': ['cust id', 'cust_id', '客戶編號', '客戶代碼', '客戶代號'],
'customer': ['客戶名稱', '客戶簡稱', '客戶', 'customer', 'customer name'],
'pn': ['item', 'type', '料號', 'part number', 'pn', 'part no', '產品料號', '索樣數量'],
'qty': ['索樣數量pcs', '索樣數量 k', '數量', 'qty', 'quantity', '申請數量'],
'date': ['需求日', '日期', 'date', '申請日期']
},
'order': {
'order_id': ['項次', '訂單編號', 'order_id', 'order id'],
'order_no': ['訂單單號', '訂單號', 'order_no', 'order no', '銷貨單號'],
'cust_id': ['客戶編號', '客戶代碼', '客戶代號', 'cust_id', 'cust id'],
'customer': ['客戶', '客戶名稱', 'customer', 'customer name'],
'pn': ['type', '內部料號', '料號', 'part number', 'pn', 'part no', '產品料號'],
'qty': ['訂單量', '數量', 'qty', 'quantity', '訂購數量', '出貨數量'],
'status': ['狀態', 'status', '訂單狀態'],
'amount': ['原幣金額(含稅)', '台幣金額(未稅)', '金額', 'amount', 'total', '訂單金額']
}
}
class ExcelParser:
def __init__(self):
self.parsed_files: Dict[str, Dict] = {}
def detect_encoding(self, file_path: Path) -> str:
"""偵測檔案編碼"""
with open(file_path, 'rb') as f:
result = chardet.detect(f.read(10000))
return result.get('encoding', 'utf-8')
def find_header_row(self, df: pd.DataFrame, file_type: str) -> int:
"""自動偵測表頭位置"""
expected_columns = set()
for variants in COLUMN_MAPPING[file_type].values():
expected_columns.update([v.lower() for v in variants])
for idx in range(min(MAX_HEADER_SCAN_ROWS, len(df))):
row = df.iloc[idx]
row_values = [str(v).lower().strip() for v in row.values if pd.notna(v)]
# 檢查是否有匹配的欄位名稱
matches = sum(1 for v in row_values if any(exp in v for exp in expected_columns))
if matches >= 2: # 至少匹配 2 個欄位
return idx
return 0 # 預設第一行為表頭
def map_columns(self, df: pd.DataFrame, file_type: str) -> Dict[str, str]:
"""將 DataFrame 欄位對應到標準欄位名稱"""
mapping = {}
column_map = COLUMN_MAPPING[file_type]
df_columns = [str(c).lower().strip() for c in df.columns]
for standard_name, variants in column_map.items():
for variant in variants:
variant_lower = variant.lower()
for idx, col in enumerate(df_columns):
if variant_lower in col or col in variant_lower:
mapping[df.columns[idx]] = standard_name
break
if standard_name in mapping.values():
break
return mapping
def parse_file(self, file_path: Path, file_type: str) -> Tuple[str, Dict[str, Any]]:
"""解析 Excel/CSV 檔案"""
file_id = str(uuid.uuid4())
# 讀取檔案
if file_path.suffix.lower() == '.csv':
encoding = self.detect_encoding(file_path)
df = pd.read_csv(file_path, encoding=encoding, header=None)
else:
df = pd.read_excel(file_path, header=None)
# 找到表頭
header_row = self.find_header_row(df, file_type)
# 重新讀取,以正確的表頭
if file_path.suffix.lower() == '.csv':
df = pd.read_csv(file_path, encoding=encoding, header=header_row)
else:
df = pd.read_excel(file_path, header=header_row)
# 欄位對應
column_mapping = self.map_columns(df, file_type)
df = df.rename(columns=column_mapping)
# 只保留需要的欄位
required_columns = list(COLUMN_MAPPING[file_type].keys())
available_columns = [c for c in required_columns if c in df.columns]
df = df[available_columns]
# 清理資料
df = df.dropna(how='all')
# 產生預覽資料(清理 NaN 值以便 JSON 序列化)
preview = clean_records(df.head(10).to_dict(orient='records'))
# 儲存解析結果
parsed_data = {
'file_id': file_id,
'file_type': file_type,
'filename': file_path.name,
'header_row': header_row,
'row_count': len(df),
'columns': list(df.columns),
'preview': preview,
'dataframe': df
}
self.parsed_files[file_id] = parsed_data
return file_id, {k: v for k, v in parsed_data.items() if k != 'dataframe'}
def get_parsed_data(self, file_id: str) -> Optional[pd.DataFrame]:
"""取得解析後的 DataFrame"""
if file_id in self.parsed_files:
return self.parsed_files[file_id].get('dataframe')
return None
def get_file_info(self, file_id: str) -> Optional[Dict]:
"""取得檔案資訊"""
if file_id in self.parsed_files:
data = self.parsed_files[file_id]
return {k: v for k, v in data.items() if k != 'dataframe'}
return None
# 全域實例
excel_parser = ExcelParser()

View File

@@ -0,0 +1,277 @@
import re
from typing import List, Tuple, Optional
from rapidfuzz import fuzz, process
from sqlalchemy.orm import Session
from app.config import MATCH_THRESHOLD_AUTO, MATCH_THRESHOLD_REVIEW
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus, TargetType, ReviewLog
import pandas as pd
from datetime import timedelta
# 公司後綴清單(用於正規化)
COMPANY_SUFFIXES = [
'股份有限公司', '有限公司', '公司',
'株式会社', '株式會社',
'Co., Ltd.', 'Co.,Ltd.', 'Co. Ltd.', 'Co.Ltd.',
'Corporation', 'Corp.', 'Corp',
'Inc.', 'Inc',
'Limited', 'Ltd.', 'Ltd',
'LLC', 'L.L.C.',
]
def sanitize_pn(pn: str) -> str:
"""去除非字母數字字元並轉大寫 (PMSM-808-LL -> PMSM808LL)"""
if not pn:
return ""
return re.sub(r'[^a-zA-Z0-9]', '', str(pn)).upper()
def normalize_customer_name(name: str) -> str:
"""正規化客戶名稱 (轉大寫)"""
if not name:
return ""
# 轉換為大寫
normalized = name.strip()
# 移除公司後綴
for suffix in COMPANY_SUFFIXES:
normalized = re.sub(re.escape(suffix), '', normalized, flags=re.IGNORECASE)
# 移除括號及其內容
normalized = re.sub(r'\([^)]*\)', '', normalized)
normalized = re.sub(r'[^]*', '', normalized)
# 全形轉半形
normalized = normalized.replace(' ', ' ')
# 移除多餘空白
normalized = re.sub(r'\s+', ' ', normalized).strip()
return normalized.upper()
def calculate_similarity(name1: str, name2: str) -> Tuple[float, str]:
"""計算兩個名稱的相似度"""
# 正規化
norm1 = normalize_customer_name(name1)
norm2 = normalize_customer_name(name2)
if not norm1 or not norm2:
return 0.0, "Empty name"
# 完全匹配
if norm1 == norm2:
return 100.0, "Exact Match"
# 使用多種比對方法
ratio = fuzz.ratio(norm1, norm2)
partial_ratio = fuzz.partial_ratio(norm1, norm2)
token_sort_ratio = fuzz.token_sort_ratio(norm1, norm2)
token_set_ratio = fuzz.token_set_ratio(norm1, norm2)
# 取最高分
best_score = max(ratio, partial_ratio, token_sort_ratio, token_set_ratio)
# 決定原因
if ratio == best_score:
reason = "Character Similarity"
elif partial_ratio == best_score:
reason = "Partial Match"
elif token_sort_ratio == best_score:
reason = "Token Order Match"
else:
reason = "Token Set Match"
# 檢查是否為後綴差異
if best_score >= 80:
for suffix in COMPANY_SUFFIXES[:3]: # 只檢查常見後綴
if (suffix in name1 and suffix not in name2) or \
(suffix not in name1 and suffix in name2):
reason = "Corporate Suffix Mismatch"
break
return best_score, reason
class FuzzyMatcher:
def __init__(self, db: Session):
self.db = db
def run_matching(self) -> dict:
"""執行瀑布式模糊比對 (Waterfall Matching)"""
# 1. 取得所有 DIT 記錄
dit_records = self.db.query(DitRecord).all()
# 2. 取得所有樣品和訂單記錄並按 PN 分組
sample_records = self.db.query(SampleRecord).all()
order_records = self.db.query(OrderRecord).all()
samples_by_pn = {}
samples_by_oppy = {}
for s in sample_records:
if s.pn:
if s.pn not in samples_by_pn:
samples_by_pn[s.pn] = []
samples_by_pn[s.pn].append(s)
if s.oppy_no:
if s.oppy_no not in samples_by_oppy:
samples_by_oppy[s.oppy_no] = []
samples_by_oppy[s.oppy_no].append(s)
orders_by_pn = {}
for o in order_records:
if o.pn not in orders_by_pn:
orders_by_pn[o.pn] = []
orders_by_pn[o.pn].append(o)
# 3. 清除舊的比對結果
self.db.query(ReviewLog).delete()
self.db.query(MatchResult).delete()
match_count = 0
auto_matched = 0
pending_review = 0
for dit in dit_records:
dit_date = pd.to_datetime(dit.date, errors='coerce')
# --- 比對樣品 (DIT -> Sample) ---
# 收集所有可能的樣品 (Priority 1: Oppy ID, Priority 2/3: PN)
potential_samples = []
if dit.op_id:
potential_samples.extend(samples_by_oppy.get(dit.op_id, []))
if dit.pn:
potential_samples.extend(samples_by_pn.get(dit.pn, []))
# 去重
seen_sample_ids = set()
unique_potential_samples = []
for s in potential_samples:
if s.id not in seen_sample_ids:
seen_sample_ids.add(s.id)
unique_potential_samples.append(s)
for sample in unique_potential_samples:
sample_date = pd.to_datetime(sample.date, errors='coerce')
# 時間窗檢查: Sample Date 必須在 DIT Date 的 前 30 天 至 今日 之間
if pd.notna(dit_date) and pd.notna(sample_date):
if sample_date < (dit_date - timedelta(days=30)):
continue
match_priority = 0
match_source = ""
score = 0.0
reason = ""
# Priority 1: 案號精準比對 (Golden Key)
if dit.op_id and sample.oppy_no and dit.op_id == sample.oppy_no:
match_priority = 1
match_source = f"Matched via Opportunity ID: {dit.op_id}"
score = 100.0
reason = "Golden Key Match"
# Priority 2 & 3 則限制在相同 PN
elif dit.pn == sample.pn:
# Priority 2: 客戶代碼比對 (Silver Key)
if dit.erp_account and sample.cust_id and dit.erp_account == sample.cust_id:
match_priority = 2
match_source = f"Matched via ERP Account: {dit.erp_account}"
score = 99.0
reason = "Silver Key Match"
# Priority 3: 名稱模糊比對 (Fallback)
else:
score, reason = calculate_similarity(dit.customer, sample.customer)
if score >= MATCH_THRESHOLD_REVIEW:
match_priority = 3
match_source = f"Matched via Name Similarity ({reason})"
if match_priority > 0:
status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
match = MatchResult(
dit_id=dit.id,
target_type=TargetType.SAMPLE,
target_id=sample.id,
score=score,
match_priority=match_priority,
match_source=match_source,
reason=reason,
status=status
)
self.db.add(match)
match_count += 1
if status == MatchStatus.auto_matched:
auto_matched += 1
else:
pending_review += 1
# --- 比對訂單 (DIT -> Order) ---
# 訂單比對通常基於 PN
for order in orders_by_pn.get(dit.pn, []):
match_priority = 0
match_source = ""
score = 0.0
reason = ""
# Priority 2: 客戶代碼比對 (Silver Key)
if dit.erp_account and order.cust_id and dit.erp_account == order.cust_id:
match_priority = 2
match_source = f"Matched via ERP Account: {dit.erp_account}"
score = 99.0
reason = "Silver Key Match"
# Priority 3: 名稱模糊比對 (Fallback)
else:
score, reason = calculate_similarity(dit.customer, order.customer)
if score >= MATCH_THRESHOLD_REVIEW:
match_priority = 3
match_source = f"Matched via Name Similarity ({reason})"
if match_priority > 0:
status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
match = MatchResult(
dit_id=dit.id,
target_type=TargetType.ORDER,
target_id=order.id,
score=score,
match_priority=match_priority,
match_source=match_source,
reason=reason,
status=status
)
self.db.add(match)
match_count += 1
if status == MatchStatus.auto_matched:
auto_matched += 1
else:
pending_review += 1
self.db.commit()
return {
'match_count': match_count,
'auto_matched': auto_matched,
'pending_review': pending_review
}
def get_pending_reviews(self) -> List[MatchResult]:
"""取得待審核的比對結果"""
return self.db.query(MatchResult).filter(
MatchResult.status == MatchStatus.pending
).all()
def review_match(self, match_id: int, action: str) -> Optional[MatchResult]:
"""審核比對結果"""
match = self.db.query(MatchResult).filter(MatchResult.id == match_id).first()
if not match:
return None
if action == 'accept':
match.status = MatchStatus.accepted
elif action == 'reject':
match.status = MatchStatus.rejected
self.db.commit()
return match

View File

@@ -0,0 +1,171 @@
import io
from typing import List, Dict, Any
from datetime import datetime
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4, landscape
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from sqlalchemy.orm import Session
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus
class ReportGenerator:
def __init__(self, db: Session):
self.db = db
def get_attribution_data(self) -> List[Dict[str, Any]]:
"""取得歸因明細資料"""
dit_records = self.db.query(DitRecord).all()
result = []
for dit in dit_records:
row = {
'op_id': dit.op_id,
'customer': dit.customer,
'pn': dit.pn,
'eau': dit.eau,
'stage': dit.stage,
'sample_order': None,
'order_no': None,
'order_status': None,
'order_amount': None
}
# 找到已接受的樣品匹配
sample_match = self.db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == 'SAMPLE',
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).first()
if sample_match:
sample = self.db.query(SampleRecord).filter(
SampleRecord.id == sample_match.target_id
).first()
if sample:
row['sample_order'] = sample.order_no
# 找到已接受的訂單匹配
order_match = self.db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == 'ORDER',
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).first()
if order_match:
order = self.db.query(OrderRecord).filter(
OrderRecord.id == order_match.target_id
).first()
if order:
row['order_no'] = order.order_no
row['order_status'] = order.status.value if order.status else None
row['order_amount'] = order.amount
result.append(row)
return result
def generate_excel(self) -> io.BytesIO:
"""產生 Excel 報表"""
wb = Workbook()
ws = wb.active
ws.title = "DIT Attribution Report"
# 標題樣式
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill(start_color="4F46E5", end_color="4F46E5", fill_type="solid")
header_alignment = Alignment(horizontal="center", vertical="center")
# 表頭
headers = ['OP編號', '客戶名稱', '料號', 'EAU', '階段', '樣品單號', '訂單單號', '訂單狀態', '訂單金額']
for col, header in enumerate(headers, 1):
cell = ws.cell(row=1, column=col, value=header)
cell.font = header_font
cell.fill = header_fill
cell.alignment = header_alignment
# 資料
data = self.get_attribution_data()
for row_idx, row_data in enumerate(data, 2):
ws.cell(row=row_idx, column=1, value=row_data['op_id'])
ws.cell(row=row_idx, column=2, value=row_data['customer'])
ws.cell(row=row_idx, column=3, value=row_data['pn'])
ws.cell(row=row_idx, column=4, value=row_data['eau'])
ws.cell(row=row_idx, column=5, value=row_data['stage'])
ws.cell(row=row_idx, column=6, value=row_data['sample_order'] or '-')
ws.cell(row=row_idx, column=7, value=row_data['order_no'] or '-')
ws.cell(row=row_idx, column=8, value=row_data['order_status'] or '-')
ws.cell(row=row_idx, column=9, value=row_data['order_amount'] or 0)
# 調整欄寬
column_widths = [15, 30, 20, 12, 15, 15, 15, 12, 12]
for col, width in enumerate(column_widths, 1):
ws.column_dimensions[chr(64 + col)].width = width
# 儲存到 BytesIO
output = io.BytesIO()
wb.save(output)
output.seek(0)
return output
def generate_pdf(self) -> io.BytesIO:
"""產生 PDF 報表"""
output = io.BytesIO()
doc = SimpleDocTemplate(output, pagesize=landscape(A4))
elements = []
styles = getSampleStyleSheet()
# 標題
title = Paragraph("DIT Attribution Report", styles['Title'])
elements.append(title)
elements.append(Spacer(1, 20))
# 日期
date_text = Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}", styles['Normal'])
elements.append(date_text)
elements.append(Spacer(1, 20))
# 表格資料
data = self.get_attribution_data()
table_data = [['OP No.', 'Customer', 'P/N', 'EAU', 'Stage', 'Sample', 'Order', 'Status', 'Amount']]
for row in data:
table_data.append([
row['op_id'],
row['customer'][:20] + '...' if len(row['customer']) > 20 else row['customer'],
row['pn'],
str(row['eau']),
row['stage'] or '-',
row['sample_order'] or '-',
row['order_no'] or '-',
row['order_status'] or '-',
f"${row['order_amount']:,.0f}" if row['order_amount'] else '-'
])
# 建立表格
table = Table(table_data)
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTSIZE', (0, 0), (-1, 0), 10),
('FONTSIZE', (0, 1), (-1, -1), 8),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#F8FAFC')]),
]))
elements.append(table)
doc.build(elements)
output.seek(0)
return output

View File

@@ -0,0 +1 @@
# Utils package

View File

@@ -0,0 +1,64 @@
from datetime import datetime, timedelta
from typing import Optional
from jose import JWTError, jwt
from passlib.context import CryptContext
from fastapi import Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from sqlalchemy.orm import Session
from app.config import SECRET_KEY, ALGORITHM, ACCESS_TOKEN_EXPIRE_MINUTES
from app.models import get_db
from app.models.user import User
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login")
def verify_password(plain_password: str, hashed_password: str) -> bool:
return pwd_context.verify(plain_password, hashed_password)
def get_password_hash(password: str) -> str:
return pwd_context.hash(password)
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
to_encode = data.copy()
if expires_delta:
expire = datetime.utcnow() + expires_delta
else:
expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
to_encode.update({"exp": expire})
encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
return encoded_jwt
def decode_token(token: str) -> Optional[dict]:
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
return payload
except JWTError:
return None
async def get_current_user(
token: str = Depends(oauth2_scheme),
db: Session = Depends(get_db)
) -> User:
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
payload = decode_token(token)
if payload is None:
raise credentials_exception
user_id_raw = payload.get("sub")
if user_id_raw is None:
raise credentials_exception
try:
user_id = int(user_id_raw)
except (ValueError, TypeError):
raise credentials_exception
user = db.query(User).filter(User.id == user_id).first()
if user is None:
raise credentials_exception
return user