first commit

This commit is contained in:
2026-01-09 19:14:41 +08:00
commit 9f3c96ce73
67 changed files with 9636 additions and 0 deletions

24
backend/.env.example Normal file
View File

@@ -0,0 +1,24 @@
# Database Configuration
DB_HOST=localhost
DB_PORT=3306
DB_USER=your_user
DB_PASSWORD=your_password
DB_DATABASE=your_database
# JWT Configuration
SECRET_KEY=your-super-secret-key-change-in-production
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=1440
# Application Settings
APP_HOST=0.0.0.0
APP_PORT=8000
WORKERS=1
DEBUG=False
# Default Admin Account (created on first startup)
ADMIN_EMAIL=admin@example.com
ADMIN_PASSWORD=admin123
# CORS Settings (comma separated, for development)
CORS_ORIGINS=http://localhost:3000,http://127.0.0.1:3000,http://localhost:5173,http://127.0.0.1:5173

1
backend/app/__init__.py Normal file
View File

@@ -0,0 +1 @@
# SalesPipeline Backend

58
backend/app/config.py Normal file
View File

@@ -0,0 +1,58 @@
import os
from pathlib import Path
from dotenv import load_dotenv
# 載入環境變數
load_dotenv()
# 專案路徑
BASE_DIR = Path(__file__).resolve().parent.parent.parent
DATA_DIR = BASE_DIR / "data"
UPLOAD_DIR = DATA_DIR / "uploads"
STATIC_DIR = BASE_DIR / "backend" / "static"
# 確保目錄存在
DATA_DIR.mkdir(exist_ok=True)
UPLOAD_DIR.mkdir(exist_ok=True)
STATIC_DIR.mkdir(exist_ok=True)
# MySQL 資料庫設定
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "3306")
DB_USER = os.getenv("DB_USER", "root")
DB_PASSWORD = os.getenv("DB_PASSWORD", "")
DB_DATABASE = os.getenv("DB_DATABASE", "sales_pipeline")
# MySQL 連線字串
DATABASE_URL = f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_DATABASE}?charset=utf8mb4"
# 資料表前綴
TABLE_PREFIX = os.getenv("TABLE_PREFIX", "PJ_SOA_")
# JWT 設定
SECRET_KEY = os.getenv("SECRET_KEY", "your-secret-key-change-in-production-12345678")
ALGORITHM = os.getenv("ALGORITHM", "HS256")
ACCESS_TOKEN_EXPIRE_MINUTES = int(os.getenv("ACCESS_TOKEN_EXPIRE_MINUTES", "1440"))
# 模糊比對閾值
MATCH_THRESHOLD_AUTO = int(os.getenv("MATCH_THRESHOLD_AUTO", "95"))
MATCH_THRESHOLD_REVIEW = int(os.getenv("MATCH_THRESHOLD_REVIEW", "80"))
# Excel 解析設定
MAX_HEADER_SCAN_ROWS = int(os.getenv("MAX_HEADER_SCAN_ROWS", "20"))
# 應用設定
APP_HOST = os.getenv("APP_HOST", "0.0.0.0")
APP_PORT = int(os.getenv("APP_PORT", "8000"))
WORKERS = int(os.getenv("WORKERS", "1"))
DEBUG = os.getenv("DEBUG", "False").lower() == "true"
# CORS 設定
CORS_ORIGINS = [
origin.strip()
for origin in os.getenv(
"CORS_ORIGINS",
"http://localhost:3000,http://127.0.0.1:3000,http://localhost:5173,http://127.0.0.1:5173"
).split(",")
if origin.strip()
]

54
backend/app/init_admin.py Normal file
View File

@@ -0,0 +1,54 @@
"""
初始化管理員帳號腳本
"""
from sqlalchemy.orm import Session
from app.models import engine, Base
from app.models.user import User, UserRole
from app.utils.security import get_password_hash
import os
def create_admin_user(db: Session):
"""建立預設管理員帳號"""
admin_email = os.getenv("ADMIN_EMAIL", "admin@example.com")
admin_password = os.getenv("ADMIN_PASSWORD", "admin123")
# 檢查是否已存在
existing = db.query(User).filter(User.email == admin_email).first()
if existing:
print(f"Admin user already exists: {admin_email}")
return existing
# 建立管理員
admin = User(
email=admin_email,
password_hash=get_password_hash(admin_password),
display_name="Administrator",
language="zh-TW",
role=UserRole.admin
)
db.add(admin)
db.commit()
db.refresh(admin)
print(f"Admin user created: {admin_email}")
return admin
def init_database():
"""初始化資料庫並建立預設帳號"""
from sqlalchemy.orm import sessionmaker
# 建立所有資料表
Base.metadata.create_all(bind=engine)
print("Database tables created.")
# 建立 session
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
db = SessionLocal()
try:
create_admin_user(db)
finally:
db.close()
if __name__ == "__main__":
init_database()

71
backend/app/main.py Normal file
View File

@@ -0,0 +1,71 @@
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from app.models import init_db
from app.routers import auth, etl, match, dashboard, report, lab
from app.config import STATIC_DIR, DEBUG, CORS_ORIGINS, APP_HOST, APP_PORT
# 初始化資料庫
init_db()
app = FastAPI(
title="SalesPipeline API",
description="銷售管線管理系統 API",
version="1.0.0",
docs_url="/api/docs" if DEBUG else None,
redoc_url="/api/redoc" if DEBUG else None,
)
# CORS 設定 (開發模式需要)
if DEBUG and CORS_ORIGINS:
app.add_middleware(
CORSMiddleware,
allow_origins=CORS_ORIGINS,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 註冊 API 路由
app.include_router(auth.router, prefix="/api")
app.include_router(etl.router, prefix="/api")
app.include_router(match.router, prefix="/api")
app.include_router(dashboard.router, prefix="/api")
app.include_router(report.router, prefix="/api")
app.include_router(lab.router, prefix="/api")
@app.get("/api/health")
def health_check():
return {"status": "healthy", "version": "1.0.0"}
# 靜態檔案服務 (前端 build 後的檔案)
static_path = STATIC_DIR
if static_path.exists():
assets_dir = static_path / "assets"
if assets_dir.exists():
app.mount("/assets", StaticFiles(directory=assets_dir), name="assets")
# SPA 路由處理 - 所有非 API 路由都返回 index.html
@app.get("/{full_path:path}")
async def serve_spa(request: Request, full_path: str):
if full_path.startswith("api/"):
return {"error": "Not Found"}, 404
static_file = static_path / full_path
if static_file.exists() and static_file.is_file():
return FileResponse(static_file)
index_file = static_path / "index.html"
if index_file.exists():
return FileResponse(index_file)
return {
"message": "SalesPipeline API is running",
"docs": "/api/docs" if DEBUG else "Disabled in production",
"health": "/api/health"
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host=APP_HOST, port=APP_PORT)

View File

@@ -0,0 +1,64 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, declarative_base
from app.config import DATABASE_URL
import os
# MySQL 連線引擎設定
engine = create_engine(
DATABASE_URL,
pool_size=10,
max_overflow=20,
pool_pre_ping=True,
pool_recycle=3600,
echo=False
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
def get_db():
db = SessionLocal()
try:
yield db
finally:
db.close()
# Import models to register them
from app.models.user import User, UserRole
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, ReviewLog
def init_db():
"""初始化資料庫並建立預設管理員"""
from app.utils.security import get_password_hash
# 建立所有資料表
Base.metadata.create_all(bind=engine)
# 建立預設管理員帳號
db = SessionLocal()
try:
admin_email = os.getenv("ADMIN_EMAIL", "admin@example.com")
admin_password = os.getenv("ADMIN_PASSWORD", "admin123")
existing = db.query(User).filter(User.email == admin_email).first()
if not existing:
admin = User(
email=admin_email,
password_hash=get_password_hash(admin_password),
display_name="Administrator",
language="zh-TW",
role=UserRole.admin
)
db.add(admin)
db.commit()
print(f"[Init] Admin user created: {admin_email}")
else:
print(f"[Init] Admin user exists: {admin_email}")
except Exception as e:
print(f"[Init] Error creating admin: {e}")
db.rollback()
finally:
db.close()

22
backend/app/models/dit.py Normal file
View File

@@ -0,0 +1,22 @@
from sqlalchemy import Column, Integer, String, DateTime, Float, UniqueConstraint
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
class DitRecord(Base):
__tablename__ = f"{TABLE_PREFIX}DIT_Records"
__table_args__ = (
UniqueConstraint('op_id', 'pn', name='uix_dit_op_pn'),
)
id = Column(Integer, primary_key=True, index=True)
op_id = Column(String(255), index=True, nullable=False) # 移除 unique因為同一 op_id 可有多個 pn
erp_account = Column(String(100), index=True) # AQ 欄
customer = Column(String(255), nullable=False, index=True)
customer_normalized = Column(String(255), index=True)
pn = Column(String(100), nullable=False, index=True)
eau = Column(Integer, default=0)
stage = Column(String(50))
date = Column(String(20))
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())

View File

@@ -0,0 +1,49 @@
from sqlalchemy import Column, Integer, String, DateTime, Float, Enum, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
import enum
class TargetType(str, enum.Enum):
SAMPLE = "SAMPLE"
ORDER = "ORDER"
class MatchStatus(str, enum.Enum):
pending = "pending"
accepted = "accepted"
rejected = "rejected"
auto_matched = "auto_matched"
class ReviewAction(str, enum.Enum):
accept = "accept"
reject = "reject"
class MatchResult(Base):
__tablename__ = f"{TABLE_PREFIX}Match_Results"
id = Column(Integer, primary_key=True, index=True)
dit_id = Column(Integer, ForeignKey(f"{TABLE_PREFIX}DIT_Records.id"), nullable=False)
target_type = Column(Enum(TargetType), nullable=False)
target_id = Column(Integer, nullable=False)
score = Column(Float, nullable=False)
match_priority = Column(Integer, default=3) # 1: Oppy ID, 2: Account, 3: Name
match_source = Column(String(255)) # e.g., "Matched via Opportunity ID: OP12345"
reason = Column(String(255))
status = Column(Enum(MatchStatus), default=MatchStatus.pending)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
dit = relationship("DitRecord", backref="matches")
class ReviewLog(Base):
__tablename__ = f"{TABLE_PREFIX}Review_Logs"
id = Column(Integer, primary_key=True, index=True)
match_id = Column(Integer, ForeignKey(f"{TABLE_PREFIX}Match_Results.id"), nullable=False)
user_id = Column(Integer, ForeignKey(f"{TABLE_PREFIX}users.id"), nullable=False)
action = Column(Enum(ReviewAction), nullable=False)
timestamp = Column(DateTime(timezone=True), server_default=func.now())
match_result = relationship("MatchResult", backref="review_logs")
user = relationship("User", backref="review_logs")

View File

@@ -0,0 +1,20 @@
from sqlalchemy import Column, Integer, String, DateTime, Float
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
class OrderRecord(Base):
__tablename__ = f"{TABLE_PREFIX}Order_Records"
id = Column(Integer, primary_key=True, index=True)
order_id = Column(String(50), index=True, nullable=False) # 移除 unique訂單可能有多個項次
order_no = Column(String(50), index=True)
cust_id = Column(String(100), index=True)
customer = Column(String(255), nullable=False, index=True)
customer_normalized = Column(String(255), index=True)
pn = Column(String(100), nullable=False, index=True)
qty = Column(Integer, default=0)
status = Column(String(50), default='Backlog') # 改為 String 以支援中文狀態
amount = Column(Float, default=0.0)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())

View File

@@ -0,0 +1,20 @@
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
class SampleRecord(Base):
__tablename__ = f"{TABLE_PREFIX}Sample_Records"
id = Column(Integer, primary_key=True, index=True)
sample_id = Column(String(50), unique=True, index=True, nullable=False)
order_no = Column(String(50), index=True)
oppy_no = Column(String(100), index=True) # AU 欄
cust_id = Column(String(100), index=True) # G 欄
customer = Column(String(255), nullable=False, index=True)
customer_normalized = Column(String(255), index=True)
pn = Column(String(100), nullable=False, index=True)
qty = Column(Integer, default=0)
date = Column(String(20))
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())

View File

@@ -0,0 +1,23 @@
from sqlalchemy import Column, Integer, String, DateTime, Enum
from sqlalchemy.sql import func
from app.models import Base
from app.config import TABLE_PREFIX
import enum
class UserRole(str, enum.Enum):
admin = "admin"
user = "user"
class User(Base):
__tablename__ = f"{TABLE_PREFIX}users"
id = Column(Integer, primary_key=True, index=True)
email = Column(String(200), unique=True, index=True, nullable=False)
ad_username = Column(String(100), nullable=True) # Added to satisfy DB constraint
department = Column(String(100), nullable=True) # Added to satisfy DB constraint
password_hash = Column("local_password", String(255), nullable=True)
display_name = Column(String(100), nullable=True)
# language = Column(String(10), default="zh-TW") # Not in DB
role = Column(String(20), default="user") # Simplified from Enum
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(DateTime(timezone=True), onupdate=func.now())

View File

@@ -0,0 +1 @@
# Routers package

View File

@@ -0,0 +1,84 @@
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordRequestForm
from sqlalchemy.orm import Session
from pydantic import BaseModel, EmailStr
from app.models import get_db
from app.models.user import User, UserRole
from app.utils.security import (
get_password_hash, verify_password,
create_access_token, get_current_user
)
router = APIRouter(prefix="/auth", tags=["Authentication"])
class UserCreate(BaseModel):
email: EmailStr
password: str
class UserResponse(BaseModel):
id: int
email: str
role: str
class Config:
from_attributes = True
class TokenResponse(BaseModel):
access_token: str
token_type: str
user: UserResponse
def get_role_value(role) -> str:
"""取得 role 的字串值,相容 Enum 和字串"""
if hasattr(role, 'value'):
return role.value
return str(role) if role else 'user'
@router.post("/register", response_model=UserResponse)
def register(user_data: UserCreate, db: Session = Depends(get_db)):
"""註冊新使用者"""
existing_user = db.query(User).filter(User.email == user_data.email).first()
if existing_user:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Email already registered"
)
user = User(
email=user_data.email,
password_hash=get_password_hash(user_data.password),
role=UserRole.user
)
db.add(user)
db.commit()
db.refresh(user)
return UserResponse(id=user.id, email=user.email, role=get_role_value(user.role))
@router.post("/login", response_model=TokenResponse)
def login(form_data: OAuth2PasswordRequestForm = Depends(), db: Session = Depends(get_db)):
"""登入取得 JWT Token"""
user = db.query(User).filter(User.email == form_data.username).first()
if not user or not verify_password(form_data.password, user.password_hash):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Incorrect email or password",
headers={"WWW-Authenticate": "Bearer"},
)
access_token = create_access_token(data={"sub": str(user.id)})
return TokenResponse(
access_token=access_token,
token_type="bearer",
user=UserResponse(id=user.id, email=user.email, role=get_role_value(user.role))
)
@router.get("/me", response_model=UserResponse)
def get_me(current_user: User = Depends(get_current_user)):
"""取得當前使用者資訊"""
return UserResponse(
id=current_user.id,
email=current_user.email,
role=get_role_value(current_user.role)
)

View File

@@ -0,0 +1,225 @@
from typing import List
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from sqlalchemy import func
from pydantic import BaseModel
from app.models import get_db
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus, TargetType
router = APIRouter(prefix="/dashboard", tags=["Dashboard"])
class KPIResponse(BaseModel):
total_dit: int
sample_rate: float # 送樣轉換率
hit_rate: float # 訂單命中率
fulfillment_rate: float # EAU 達成率
orphan_sample_rate: float # 無效送樣率
total_revenue: float
class FunnelItem(BaseModel):
name: str
value: int
fill: str
class AttributionDit(BaseModel):
op_id: str
customer: str
pn: str
eau: int
stage: str
date: str
class AttributionSample(BaseModel):
order_no: str
date: str
class AttributionOrder(BaseModel):
order_no: str
status: str
qty: int
amount: float
class AttributionRow(BaseModel):
dit: AttributionDit
sample: AttributionSample | None
order: AttributionOrder | None
match_source: str | None
attributed_qty: int
fulfillment_rate: float
def get_lifo_attribution(db: Session):
"""執行 LIFO 業績分配邏輯"""
# 1. 取得所有 DIT按日期由新到舊排序 (LIFO)
dits = db.query(DitRecord).order_by(DitRecord.date.desc()).all()
# 2. 取得所有已匹配且接受的訂單
matched_orders = db.query(MatchResult, OrderRecord).join(
OrderRecord, MatchResult.target_id == OrderRecord.id
).filter(
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).all()
# 3. 建立業績池 (Revenue Pool) - 按 (客戶, 料號) 分組
order_pools = {}
for match, order in matched_orders:
key = (order.customer_normalized, order.pn)
if key not in order_pools:
order_pools[key] = 0
order_pools[key] += (order.qty or 0)
# 4. 進行分配
attribution_map = {} # dit_id -> {qty, total_eau}
for dit in dits:
key = (dit.customer_normalized, dit.pn)
eau = dit.eau or 0
allocated = 0
if key in order_pools and order_pools[key] > 0:
allocated = min(eau, order_pools[key])
order_pools[key] -= allocated
attribution_map[dit.id] = {
"qty": allocated,
"eau": eau
}
return attribution_map
@router.get("/kpi", response_model=KPIResponse)
def get_kpi(db: Session = Depends(get_db)):
"""取得 KPI 統計 (符合規格書 v1.0)"""
total_dit = db.query(DitRecord).count()
if total_dit == 0:
return KPIResponse(total_dit=0, sample_rate=0, hit_rate=0, fulfillment_rate=0, orphan_sample_rate=0, total_revenue=0)
# 1. 送樣轉換率 (Sample Rate): (有匹配到樣品的 DIT 數) / (總 DIT 數)
dits_with_sample = db.query(func.count(func.distinct(MatchResult.dit_id))).filter(
MatchResult.target_type == TargetType.SAMPLE,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
sample_rate = (dits_with_sample / total_dit * 100)
# 2. 訂單命中率 (Hit Rate): (有匹配到訂單的 DIT 數) / (總 DIT 數)
dits_with_order = db.query(func.count(func.distinct(MatchResult.dit_id))).filter(
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
hit_rate = (dits_with_order / total_dit * 100)
# 3. EAU 達成率 (Fulfillment Rate): (歸因之訂單總量) / (DIT 預估 EAU)
attribution_map = get_lifo_attribution(db)
total_attributed_qty = sum(item['qty'] for item in attribution_map.values())
total_eau = sum(item['eau'] for item in attribution_map.values())
fulfillment_rate = (total_attributed_qty / total_eau * 100) if total_eau > 0 else 0
# 4. 無效送樣率 (Orphan Sample Rate): (未匹配到 DIT 的送樣數) / (總送樣數)
total_samples = db.query(SampleRecord).count()
matched_sample_ids = db.query(func.distinct(MatchResult.target_id)).filter(
MatchResult.target_type == TargetType.SAMPLE
).all()
matched_sample_count = len(matched_sample_ids)
orphan_sample_rate = ((total_samples - matched_sample_count) / total_samples * 100) if total_samples > 0 else 0
# 5. 總營收
total_revenue = db.query(func.sum(OrderRecord.amount)).join(
MatchResult, MatchResult.target_id == OrderRecord.id
).filter(
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
return KPIResponse(
total_dit=total_dit,
sample_rate=round(sample_rate, 1),
hit_rate=round(hit_rate, 1),
fulfillment_rate=round(fulfillment_rate, 1),
orphan_sample_rate=round(orphan_sample_rate, 1),
total_revenue=total_revenue
)
@router.get("/funnel", response_model=List[FunnelItem])
def get_funnel(db: Session = Depends(get_db)):
"""取得漏斗數據"""
total_dit = db.query(DitRecord).count()
dits_with_sample = db.query(func.count(func.distinct(MatchResult.dit_id))).filter(
MatchResult.target_type == TargetType.SAMPLE,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
dits_with_order = db.query(func.count(func.distinct(MatchResult.dit_id))).filter(
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).scalar() or 0
return [
FunnelItem(name='DIT 案件', value=total_dit, fill='#6366f1'),
FunnelItem(name='成功送樣', value=dits_with_sample, fill='#8b5cf6'),
FunnelItem(name='取得訂單', value=dits_with_order, fill='#10b981'),
]
@router.get("/attribution", response_model=List[AttributionRow])
def get_attribution(db: Session = Depends(get_db)):
"""取得歸因明細 (含 LIFO 分配與追溯資訊)"""
dit_records = db.query(DitRecord).order_by(DitRecord.date.desc()).all()
attribution_map = get_lifo_attribution(db)
result = []
for dit in dit_records:
# 找到樣品匹配 (取分數最高的一個)
sample_match = db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == TargetType.SAMPLE,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).order_by(MatchResult.score.desc()).first()
sample_info = None
if sample_match:
sample = db.query(SampleRecord).filter(SampleRecord.id == sample_match.target_id).first()
if sample:
sample_info = AttributionSample(order_no=sample.order_no, date=sample.date or '')
# 找到訂單匹配 (取分數最高的一個)
order_match = db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == TargetType.ORDER,
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).order_by(MatchResult.score.desc()).first()
order_info = None
match_source = None
if order_match:
order = db.query(OrderRecord).filter(OrderRecord.id == order_match.target_id).first()
if order:
order_info = AttributionOrder(
order_no=order.order_no,
status=order.status or 'Unknown',
qty=order.qty or 0,
amount=order.amount or 0
)
match_source = order_match.match_source
attr_data = attribution_map.get(dit.id, {"qty": 0, "eau": dit.eau or 0})
fulfillment = (attr_data['qty'] / attr_data['eau'] * 100) if attr_data['eau'] > 0 else 0
result.append(AttributionRow(
dit=AttributionDit(
op_id=dit.op_id,
customer=dit.customer,
pn=dit.pn,
eau=dit.eau,
stage=dit.stage or '',
date=dit.date or ''
),
sample=sample_info,
order=order_info,
match_source=match_source,
attributed_qty=attr_data['qty'],
fulfillment_rate=round(fulfillment, 1)
))
return result

246
backend/app/routers/etl.py Normal file
View File

@@ -0,0 +1,246 @@
import shutil
from pathlib import Path
from typing import List
import pandas as pd
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form
from sqlalchemy.orm import Session
from pydantic import BaseModel
from app.models import get_db
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, TargetType, ReviewLog
from app.config import UPLOAD_DIR
from app.services.excel_parser import excel_parser
from app.services.fuzzy_matcher import normalize_customer_name, sanitize_pn
router = APIRouter(prefix="/etl", tags=["ETL"])
class ParsedFileResponse(BaseModel):
file_id: str
file_type: str
filename: str
header_row: int
row_count: int
preview: List[dict]
class ImportRequest(BaseModel):
file_id: str
class ImportResponse(BaseModel):
imported_count: int
@router.post("/upload", response_model=ParsedFileResponse)
async def upload_file(
file: UploadFile = File(...),
file_type: str = Form(...),
db: Session = Depends(get_db)
):
"""上傳並解析 Excel 檔案"""
if file_type not in ['dit', 'sample', 'order']:
raise HTTPException(status_code=400, detail="Invalid file type")
# 儲存檔案
file_path = UPLOAD_DIR / file.filename
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
try:
# 解析檔案
file_id, file_info = excel_parser.parse_file(file_path, file_type)
return ParsedFileResponse(
file_id=file_id,
file_type=file_info['file_type'],
filename=file_info['filename'],
header_row=file_info['header_row'],
row_count=file_info['row_count'],
preview=file_info['preview']
)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Failed to parse file: {str(e)}")
@router.get("/preview/{file_id}", response_model=ParsedFileResponse)
def get_preview(file_id: str):
"""取得檔案預覽"""
file_info = excel_parser.get_file_info(file_id)
if not file_info:
raise HTTPException(status_code=404, detail="File not found")
return ParsedFileResponse(
file_id=file_info['file_id'],
file_type=file_info['file_type'],
filename=file_info['filename'],
header_row=file_info['header_row'],
row_count=file_info['row_count'],
preview=file_info['preview']
)
def clean_value(val, default=''):
"""清理欄位值,處理 nan 和空值"""
if val is None or (isinstance(val, float) and pd.isna(val)):
return default
str_val = str(val).strip()
if str_val.lower() in ('nan', 'none', 'null', ''):
return default
return str_val
@router.post("/import", response_model=ImportResponse)
def import_data(request: ImportRequest, db: Session = Depends(get_db)):
"""匯入資料到資料庫"""
import traceback
try:
file_info = excel_parser.get_file_info(request.file_id)
if not file_info:
print(f"[ETL Import] Error: File not found for file_id={request.file_id}")
raise HTTPException(status_code=404, detail="File not found")
df = excel_parser.get_parsed_data(request.file_id)
if df is None:
print(f"[ETL Import] Error: Parsed data not found for file_id={request.file_id}")
raise HTTPException(status_code=404, detail="Parsed data not found")
print(f"[ETL Import] Starting import: file_type={file_info['file_type']}, rows={len(df)}")
file_type = file_info['file_type']
imported_count = 0
seen_ids = set() # 追蹤已處理的 ID避免檔案內重複
# 清除該類型的舊資料,避免重複鍵衝突
try:
if file_type == 'dit':
print("[ETL Import] Clearing old DIT records and dependent matches/logs...")
# 先清除與 DIT 相關的審核日誌與比對結果
db.query(ReviewLog).delete()
db.query(MatchResult).delete()
db.query(DitRecord).delete()
elif file_type == 'sample':
print("[ETL Import] Clearing old Sample records and dependent matches/logs...")
# 先清除與 Sample 相關的比對結果 (及其日誌)
# 這裡比較複雜,因為 ReviewLog 是透過 MatchResult 關聯的
# 但既然我們是清空整個類別,直接清空所有 ReviewLog 和對應的 MatchResult 是最安全的
db.query(ReviewLog).delete()
db.query(MatchResult).filter(MatchResult.target_type == TargetType.SAMPLE).delete()
db.query(SampleRecord).delete()
elif file_type == 'order':
print("[ETL Import] Clearing old Order records and dependent matches/logs...")
db.query(ReviewLog).delete()
db.query(MatchResult).filter(MatchResult.target_type == TargetType.ORDER).delete()
db.query(OrderRecord).delete()
db.flush() # 使用 flush 而非 commit保持在同一個事務中
print("[ETL Import] Old data cleared successfully.")
except Exception as e:
db.rollback()
print(f"[ETL Import] Error clearing old data: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Failed to clear old data: {str(e)}")
for idx, row in df.iterrows():
try:
if file_type == 'dit':
op_id = clean_value(row.get('op_id'), '')
erp_account = clean_value(row.get('erp_account'), '')
customer = clean_value(row.get('customer'))
pn = clean_value(row.get('pn'))
# 跳過無效資料列或重複的 op_id + pn 組合
unique_key = f"{op_id}|{pn}"
if not op_id or unique_key in seen_ids:
continue
seen_ids.add(unique_key)
record = DitRecord(
op_id=op_id,
erp_account=erp_account,
customer=customer,
customer_normalized=normalize_customer_name(customer),
pn=sanitize_pn(pn),
eau=int(row.get('eau', 0)) if row.get('eau') and not pd.isna(row.get('eau')) else 0,
stage=clean_value(row.get('stage')),
date=clean_value(row.get('date'))
)
elif file_type == 'sample':
sample_id = clean_value(row.get('sample_id'), f'S{idx}')
oppy_no = clean_value(row.get('oppy_no'), '')
cust_id = clean_value(row.get('cust_id'), '')
customer = clean_value(row.get('customer'))
pn = clean_value(row.get('pn'))
# 跳過重複的 sample_id
if sample_id in seen_ids:
continue
seen_ids.add(sample_id)
record = SampleRecord(
sample_id=sample_id,
order_no=clean_value(row.get('order_no')),
oppy_no=oppy_no,
cust_id=cust_id,
customer=customer,
customer_normalized=normalize_customer_name(customer),
pn=sanitize_pn(pn),
qty=int(row.get('qty', 0)) if row.get('qty') and not pd.isna(row.get('qty')) else 0,
date=clean_value(row.get('date'))
)
elif file_type == 'order':
order_id = clean_value(row.get('order_id'), f'O{idx}')
cust_id = clean_value(row.get('cust_id'), '')
customer = clean_value(row.get('customer'))
pn = clean_value(row.get('pn'))
# 跳過重複的 order_id
if order_id in seen_ids:
continue
seen_ids.add(order_id)
record = OrderRecord(
order_id=order_id,
order_no=clean_value(row.get('order_no')),
cust_id=cust_id,
customer=customer,
customer_normalized=normalize_customer_name(customer),
pn=sanitize_pn(pn),
qty=int(row.get('qty', 0)) if row.get('qty') and not pd.isna(row.get('qty')) else 0,
status=clean_value(row.get('status'), 'Backlog'),
amount=float(row.get('amount', 0)) if row.get('amount') and not pd.isna(row.get('amount')) else 0
)
else:
continue
db.add(record)
imported_count += 1
if imported_count % 500 == 0:
print(f"[ETL Import] Processed {imported_count} rows...")
except Exception as e:
print(f"[ETL Import] Error importing row {idx}: {e}")
continue
try:
print(f"[ETL Import] Committing {imported_count} records...")
db.commit()
print(f"[ETL Import] Import successful: {imported_count} records.")
except Exception as e:
db.rollback()
print(f"[ETL Import] Commit Error: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Failed to commit data: {str(e)}")
return ImportResponse(imported_count=imported_count)
except HTTPException:
raise
except Exception as e:
print(f"[ETL Import] Unhandled Exception: {traceback.format_exc()}")
raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
@router.get("/data/{data_type}")
def get_data(data_type: str, db: Session = Depends(get_db)):
"""取得已匯入的資料"""
if data_type == 'dit':
records = db.query(DitRecord).all()
elif data_type == 'sample':
records = db.query(SampleRecord).all()
elif data_type == 'order':
records = db.query(OrderRecord).all()
else:
raise HTTPException(status_code=400, detail="Invalid data type")
return [
{
**{c.name: getattr(record, c.name) for c in record.__table__.columns}
}
for record in records
]

181
backend/app/routers/lab.py Normal file
View File

@@ -0,0 +1,181 @@
from typing import List, Optional
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session
from sqlalchemy import func, and_
from pydantic import BaseModel
from app.models import get_db
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
router = APIRouter(prefix="/lab", tags=["Lab"])
class LabKPI(BaseModel):
avg_velocity: float # 平均轉換時間 (天)
conversion_rate: float # 轉換比例 (%)
orphan_count: int # 孤兒樣品總數
class ScatterPoint(BaseModel):
customer: str
pn: str
sample_qty: int
order_qty: int
class OrphanSample(BaseModel):
customer: str
pn: str
days_since_sent: int
order_no: str
date: str
def parse_date(date_str: str) -> Optional[datetime]:
try:
return datetime.strptime(date_str, "%Y-%m-%d")
except:
return None
@router.get("/kpi", response_model=LabKPI)
def get_lab_kpi(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
# 1. 取得所有樣品與訂單
samples_query = db.query(SampleRecord)
orders_query = db.query(OrderRecord)
if start_date:
samples_query = samples_query.filter(SampleRecord.date >= start_date)
orders_query = orders_query.filter(OrderRecord.created_at >= start_date) # 訂單使用 created_at or date? OrderRecord 只有 created_at 欄位是 DateTime
if end_date:
samples_query = samples_query.filter(SampleRecord.date <= end_date)
# Note: OrderRecord 只有 created_at
samples = samples_query.all()
orders = orders_query.all()
# 建立群組 (ERP Code + PN)
# ERP Code correspond to cust_id
sample_groups = {}
for s in samples:
key = (s.cust_id, s.pn)
if key not in sample_groups:
sample_groups[key] = []
sample_groups[key].append(s)
order_groups = {}
for o in orders:
key = (o.cust_id, o.pn)
if key not in order_groups:
order_groups[key] = []
order_groups[key].append(o)
# 計算 Velocity 與 轉換率
velocities = []
converted_samples_count = 0
total_samples_count = len(samples)
for key, group_samples in sample_groups.items():
if key in order_groups:
# 轉換成功
converted_samples_count += len(group_samples)
# 計算 Velocity: First Order Date - Earliest Sample Date
earliest_sample_date = min([parse_date(s.date) for s in group_samples if s.date] or [datetime.max])
first_order_date = min([o.created_at for o in order_groups[key] if o.created_at] or [datetime.max])
if earliest_sample_date != datetime.max and first_order_date != datetime.max:
diff = (first_order_date - earliest_sample_date).days
if diff >= 0:
velocities.append(diff)
avg_velocity = sum(velocities) / len(velocities) if velocities else 0
conversion_rate = (converted_samples_count / total_samples_count * 100) if total_samples_count > 0 else 0
# 孤兒樣品: > 90天且無訂單
now = datetime.now()
orphan_count = 0
for key, group_samples in sample_groups.items():
if key not in order_groups:
for s in group_samples:
s_date = parse_date(s.date)
if s_date and (now - s_date).days > 90:
orphan_count += 1
return LabKPI(
avg_velocity=round(avg_velocity, 1),
conversion_rate=round(conversion_rate, 1),
orphan_count=orphan_count
)
@router.get("/scatter", response_model=List[ScatterPoint])
def get_scatter_data(
start_date: Optional[str] = Query(None),
end_date: Optional[str] = Query(None),
db: Session = Depends(get_db)
):
samples_query = db.query(SampleRecord)
orders_query = db.query(OrderRecord)
if start_date:
samples_query = samples_query.filter(SampleRecord.date >= start_date)
if end_date:
samples_query = samples_query.filter(SampleRecord.date <= end_date)
samples = samples_query.all()
orders = orders_query.all()
# 聚合資料
data_map = {} # (cust_id, pn) -> {sample_qty, order_qty, customer_name}
for s in samples:
key = (s.cust_id, s.pn)
if key not in data_map:
data_map[key] = {"sample_qty": 0, "order_qty": 0, "customer": s.customer}
data_map[key]["sample_qty"] += (s.qty or 0)
for o in orders:
key = (o.cust_id, o.pn)
if key in data_map:
data_map[key]["order_qty"] += (o.qty or 0)
# 如果有訂單但沒樣品,我們在 ROI 分析中可能不顯示,或者顯示在 Y 軸上 X=0。
# 根據需求:分析「樣品寄送」與「訂單接收」的關聯,通常以有送樣的為基底。
return [
ScatterPoint(
customer=v["customer"],
pn=key[1],
sample_qty=v["sample_qty"],
order_qty=v["order_qty"]
)
for key, v in data_map.items()
]
@router.get("/orphans", response_model=List[OrphanSample])
def get_orphans(db: Session = Depends(get_db)):
now = datetime.now()
threshold_date = now - timedelta(days=90)
# 找出所有樣品
samples = db.query(SampleRecord).all()
# 找出有訂單的人 (cust_id, pn)
orders_keys = set(db.query(OrderRecord.cust_id, OrderRecord.pn).distinct().all())
orphans = []
for s in samples:
key = (s.cust_id, s.pn)
s_date = parse_date(s.date)
if key not in orders_keys:
if s_date and s_date < threshold_date:
orphans.append(OrphanSample(
customer=s.customer,
pn=s.pn,
days_since_sent=(now - s_date).days,
order_no=s.order_no,
date=s.date
))
return sorted(orphans, key=lambda x: x.days_since_sent, reverse=True)

View File

@@ -0,0 +1,171 @@
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from pydantic import BaseModel
from app.models import get_db
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus, TargetType
from app.services.fuzzy_matcher import FuzzyMatcher
router = APIRouter(prefix="/match", tags=["Matching"])
class MatchRunResponse(BaseModel):
match_count: int
auto_matched: int
pending_review: int
class DitInfo(BaseModel):
id: int
op_id: str
customer: str
pn: str
eau: int
stage: Optional[str]
class Config:
from_attributes = True
class TargetInfo(BaseModel):
id: int
customer: str
pn: str
order_no: Optional[str]
qty: Optional[int]
class MatchResultResponse(BaseModel):
id: int
dit_id: int
target_type: str
target_id: int
score: float
reason: str
status: str
dit: Optional[DitInfo]
target: Optional[TargetInfo]
class Config:
from_attributes = True
class ReviewRequest(BaseModel):
action: str # 'accept' or 'reject'
@router.post("/run", response_model=MatchRunResponse)
def run_matching(db: Session = Depends(get_db)):
"""執行模糊比對"""
matcher = FuzzyMatcher(db)
result = matcher.run_matching()
return MatchRunResponse(**result)
@router.get("/results", response_model=List[MatchResultResponse])
def get_results(db: Session = Depends(get_db)):
"""取得所有比對結果"""
matches = db.query(MatchResult).all()
results = []
for match in matches:
# 取得 DIT 資訊
dit = db.query(DitRecord).filter(DitRecord.id == match.dit_id).first()
dit_info = DitInfo(
id=dit.id,
op_id=dit.op_id,
customer=dit.customer,
pn=dit.pn,
eau=dit.eau,
stage=dit.stage
) if dit else None
# 取得目標資訊
target_info = None
if match.target_type == TargetType.SAMPLE:
sample = db.query(SampleRecord).filter(SampleRecord.id == match.target_id).first()
if sample:
target_info = TargetInfo(
id=sample.id,
customer=sample.customer,
pn=sample.pn,
order_no=sample.order_no,
qty=sample.qty
)
elif match.target_type == TargetType.ORDER:
order = db.query(OrderRecord).filter(OrderRecord.id == match.target_id).first()
if order:
target_info = TargetInfo(
id=order.id,
customer=order.customer,
pn=order.pn,
order_no=order.order_no,
qty=order.qty
)
results.append(MatchResultResponse(
id=match.id,
dit_id=match.dit_id,
target_type=match.target_type.value,
target_id=match.target_id,
score=match.score,
reason=match.reason,
status=match.status.value,
dit=dit_info,
target=target_info
))
return results
@router.put("/{match_id}/review", response_model=MatchResultResponse)
def review_match(match_id: int, request: ReviewRequest, db: Session = Depends(get_db)):
"""審核比對結果"""
if request.action not in ['accept', 'reject']:
raise HTTPException(status_code=400, detail="Invalid action")
matcher = FuzzyMatcher(db)
match = matcher.review_match(match_id, request.action)
if not match:
raise HTTPException(status_code=404, detail="Match not found")
# 取得相關資訊
dit = db.query(DitRecord).filter(DitRecord.id == match.dit_id).first()
dit_info = DitInfo(
id=dit.id,
op_id=dit.op_id,
customer=dit.customer,
pn=dit.pn,
eau=dit.eau,
stage=dit.stage
) if dit else None
target_info = None
if match.target_type == TargetType.SAMPLE:
sample = db.query(SampleRecord).filter(SampleRecord.id == match.target_id).first()
if sample:
target_info = TargetInfo(
id=sample.id,
customer=sample.customer,
pn=sample.pn,
order_no=sample.order_no,
qty=sample.qty
)
elif match.target_type == TargetType.ORDER:
order = db.query(OrderRecord).filter(OrderRecord.id == match.target_id).first()
if order:
target_info = TargetInfo(
id=order.id,
customer=order.customer,
pn=order.pn,
order_no=order.order_no,
qty=order.qty
)
return MatchResultResponse(
id=match.id,
dit_id=match.dit_id,
target_type=match.target_type.value,
target_id=match.target_id,
score=match.score,
reason=match.reason,
status=match.status.value,
dit=dit_info,
target=target_info
)

View File

@@ -0,0 +1,32 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import StreamingResponse
from sqlalchemy.orm import Session
from app.models import get_db
from app.services.report_generator import ReportGenerator
router = APIRouter(prefix="/report", tags=["Report"])
@router.get("/export")
def export_report(format: str = "xlsx", db: Session = Depends(get_db)):
"""匯出報表"""
if format not in ['xlsx', 'pdf']:
raise HTTPException(status_code=400, detail="Invalid format. Use 'xlsx' or 'pdf'")
generator = ReportGenerator(db)
if format == 'xlsx':
output = generator.generate_excel()
media_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
filename = "dit_attribution_report.xlsx"
else:
output = generator.generate_pdf()
media_type = "application/pdf"
filename = "dit_attribution_report.pdf"
return StreamingResponse(
output,
media_type=media_type,
headers={
"Content-Disposition": f"attachment; filename={filename}"
}
)

View File

@@ -0,0 +1 @@
# Services package

View File

@@ -0,0 +1,175 @@
import re
import uuid
import math
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import pandas as pd
import chardet
from openpyxl import load_workbook
from app.config import MAX_HEADER_SCAN_ROWS, UPLOAD_DIR
def clean_value(val):
"""清理單一值,將 NaN/Inf 轉換為 None 以便 JSON 序列化"""
if val is None:
return None
if isinstance(val, float):
if math.isnan(val) or math.isinf(val):
return None
return val
def clean_dict(d: Dict) -> Dict:
"""清理字典中的所有 NaN/Inf 值"""
return {k: clean_value(v) for k, v in d.items()}
def clean_records(records: List[Dict]) -> List[Dict]:
"""清理記錄列表中的所有 NaN/Inf 值"""
return [clean_dict(r) for r in records]
# 欄位名稱對應表
COLUMN_MAPPING = {
'dit': {
'op_id': ['opportunity name', 'opportunity no', 'opportunity', 'op編號', 'op 編號', 'op_id', 'opid', '案件編號', '案號', 'opportunity id'],
'erp_account': ['erp account', 'account no', 'erp account no', '客戶代碼', '客戶編號', 'erp_account'],
'customer': ['account name', 'branding customer', '客戶', '客戶名稱', 'customer', 'customer name', '公司名稱'],
'pn': ['product name', '料號', 'part number', 'pn', 'part no', 'part_number', '產品料號', 'stage/part'],
'eau': ['eau quantity', 'eau quantity (pcs)', 'eau', '年預估量', 'annual usage', '預估用量'],
'stage': ['stage', 'oppty product stage', '階段', 'status', '狀態', '專案階段'],
'date': ['created date', '日期', 'date', '建立日期', 'create date']
},
'sample': {
'sample_id': ['樣品訂單號碼', 'item', '樣品編號', 'sample_id', 'sample id', '編號'],
'order_no': ['樣品訂單號碼', '單號', 'order_no', 'order no', '樣品單號', '申請單號'],
'oppy_no': ['oppy no', 'oppy_no', '案號', '案件編號', 'opportunity no'],
'cust_id': ['cust id', 'cust_id', '客戶編號', '客戶代碼', '客戶代號'],
'customer': ['客戶名稱', '客戶簡稱', '客戶', 'customer', 'customer name'],
'pn': ['item', 'type', '料號', 'part number', 'pn', 'part no', '產品料號', '索樣數量'],
'qty': ['索樣數量pcs', '索樣數量 k', '數量', 'qty', 'quantity', '申請數量'],
'date': ['需求日', '日期', 'date', '申請日期']
},
'order': {
'order_id': ['項次', '訂單編號', 'order_id', 'order id'],
'order_no': ['訂單單號', '訂單號', 'order_no', 'order no', '銷貨單號'],
'cust_id': ['客戶編號', '客戶代碼', '客戶代號', 'cust_id', 'cust id'],
'customer': ['客戶', '客戶名稱', 'customer', 'customer name'],
'pn': ['type', '內部料號', '料號', 'part number', 'pn', 'part no', '產品料號'],
'qty': ['訂單量', '數量', 'qty', 'quantity', '訂購數量', '出貨數量'],
'status': ['狀態', 'status', '訂單狀態'],
'amount': ['原幣金額(含稅)', '台幣金額(未稅)', '金額', 'amount', 'total', '訂單金額']
}
}
class ExcelParser:
def __init__(self):
self.parsed_files: Dict[str, Dict] = {}
def detect_encoding(self, file_path: Path) -> str:
"""偵測檔案編碼"""
with open(file_path, 'rb') as f:
result = chardet.detect(f.read(10000))
return result.get('encoding', 'utf-8')
def find_header_row(self, df: pd.DataFrame, file_type: str) -> int:
"""自動偵測表頭位置"""
expected_columns = set()
for variants in COLUMN_MAPPING[file_type].values():
expected_columns.update([v.lower() for v in variants])
for idx in range(min(MAX_HEADER_SCAN_ROWS, len(df))):
row = df.iloc[idx]
row_values = [str(v).lower().strip() for v in row.values if pd.notna(v)]
# 檢查是否有匹配的欄位名稱
matches = sum(1 for v in row_values if any(exp in v for exp in expected_columns))
if matches >= 2: # 至少匹配 2 個欄位
return idx
return 0 # 預設第一行為表頭
def map_columns(self, df: pd.DataFrame, file_type: str) -> Dict[str, str]:
"""將 DataFrame 欄位對應到標準欄位名稱"""
mapping = {}
column_map = COLUMN_MAPPING[file_type]
df_columns = [str(c).lower().strip() for c in df.columns]
for standard_name, variants in column_map.items():
for variant in variants:
variant_lower = variant.lower()
for idx, col in enumerate(df_columns):
if variant_lower in col or col in variant_lower:
mapping[df.columns[idx]] = standard_name
break
if standard_name in mapping.values():
break
return mapping
def parse_file(self, file_path: Path, file_type: str) -> Tuple[str, Dict[str, Any]]:
"""解析 Excel/CSV 檔案"""
file_id = str(uuid.uuid4())
# 讀取檔案
if file_path.suffix.lower() == '.csv':
encoding = self.detect_encoding(file_path)
df = pd.read_csv(file_path, encoding=encoding, header=None)
else:
df = pd.read_excel(file_path, header=None)
# 找到表頭
header_row = self.find_header_row(df, file_type)
# 重新讀取,以正確的表頭
if file_path.suffix.lower() == '.csv':
df = pd.read_csv(file_path, encoding=encoding, header=header_row)
else:
df = pd.read_excel(file_path, header=header_row)
# 欄位對應
column_mapping = self.map_columns(df, file_type)
df = df.rename(columns=column_mapping)
# 只保留需要的欄位
required_columns = list(COLUMN_MAPPING[file_type].keys())
available_columns = [c for c in required_columns if c in df.columns]
df = df[available_columns]
# 清理資料
df = df.dropna(how='all')
# 產生預覽資料(清理 NaN 值以便 JSON 序列化)
preview = clean_records(df.head(10).to_dict(orient='records'))
# 儲存解析結果
parsed_data = {
'file_id': file_id,
'file_type': file_type,
'filename': file_path.name,
'header_row': header_row,
'row_count': len(df),
'columns': list(df.columns),
'preview': preview,
'dataframe': df
}
self.parsed_files[file_id] = parsed_data
return file_id, {k: v for k, v in parsed_data.items() if k != 'dataframe'}
def get_parsed_data(self, file_id: str) -> Optional[pd.DataFrame]:
"""取得解析後的 DataFrame"""
if file_id in self.parsed_files:
return self.parsed_files[file_id].get('dataframe')
return None
def get_file_info(self, file_id: str) -> Optional[Dict]:
"""取得檔案資訊"""
if file_id in self.parsed_files:
data = self.parsed_files[file_id]
return {k: v for k, v in data.items() if k != 'dataframe'}
return None
# 全域實例
excel_parser = ExcelParser()

View File

@@ -0,0 +1,277 @@
import re
from typing import List, Tuple, Optional
from rapidfuzz import fuzz, process
from sqlalchemy.orm import Session
from app.config import MATCH_THRESHOLD_AUTO, MATCH_THRESHOLD_REVIEW
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus, TargetType, ReviewLog
import pandas as pd
from datetime import timedelta
# 公司後綴清單(用於正規化)
COMPANY_SUFFIXES = [
'股份有限公司', '有限公司', '公司',
'株式会社', '株式會社',
'Co., Ltd.', 'Co.,Ltd.', 'Co. Ltd.', 'Co.Ltd.',
'Corporation', 'Corp.', 'Corp',
'Inc.', 'Inc',
'Limited', 'Ltd.', 'Ltd',
'LLC', 'L.L.C.',
]
def sanitize_pn(pn: str) -> str:
"""去除非字母數字字元並轉大寫 (PMSM-808-LL -> PMSM808LL)"""
if not pn:
return ""
return re.sub(r'[^a-zA-Z0-9]', '', str(pn)).upper()
def normalize_customer_name(name: str) -> str:
"""正規化客戶名稱 (轉大寫)"""
if not name:
return ""
# 轉換為大寫
normalized = name.strip()
# 移除公司後綴
for suffix in COMPANY_SUFFIXES:
normalized = re.sub(re.escape(suffix), '', normalized, flags=re.IGNORECASE)
# 移除括號及其內容
normalized = re.sub(r'\([^)]*\)', '', normalized)
normalized = re.sub(r'[^]*', '', normalized)
# 全形轉半形
normalized = normalized.replace(' ', ' ')
# 移除多餘空白
normalized = re.sub(r'\s+', ' ', normalized).strip()
return normalized.upper()
def calculate_similarity(name1: str, name2: str) -> Tuple[float, str]:
"""計算兩個名稱的相似度"""
# 正規化
norm1 = normalize_customer_name(name1)
norm2 = normalize_customer_name(name2)
if not norm1 or not norm2:
return 0.0, "Empty name"
# 完全匹配
if norm1 == norm2:
return 100.0, "Exact Match"
# 使用多種比對方法
ratio = fuzz.ratio(norm1, norm2)
partial_ratio = fuzz.partial_ratio(norm1, norm2)
token_sort_ratio = fuzz.token_sort_ratio(norm1, norm2)
token_set_ratio = fuzz.token_set_ratio(norm1, norm2)
# 取最高分
best_score = max(ratio, partial_ratio, token_sort_ratio, token_set_ratio)
# 決定原因
if ratio == best_score:
reason = "Character Similarity"
elif partial_ratio == best_score:
reason = "Partial Match"
elif token_sort_ratio == best_score:
reason = "Token Order Match"
else:
reason = "Token Set Match"
# 檢查是否為後綴差異
if best_score >= 80:
for suffix in COMPANY_SUFFIXES[:3]: # 只檢查常見後綴
if (suffix in name1 and suffix not in name2) or \
(suffix not in name1 and suffix in name2):
reason = "Corporate Suffix Mismatch"
break
return best_score, reason
class FuzzyMatcher:
def __init__(self, db: Session):
self.db = db
def run_matching(self) -> dict:
"""執行瀑布式模糊比對 (Waterfall Matching)"""
# 1. 取得所有 DIT 記錄
dit_records = self.db.query(DitRecord).all()
# 2. 取得所有樣品和訂單記錄並按 PN 分組
sample_records = self.db.query(SampleRecord).all()
order_records = self.db.query(OrderRecord).all()
samples_by_pn = {}
samples_by_oppy = {}
for s in sample_records:
if s.pn:
if s.pn not in samples_by_pn:
samples_by_pn[s.pn] = []
samples_by_pn[s.pn].append(s)
if s.oppy_no:
if s.oppy_no not in samples_by_oppy:
samples_by_oppy[s.oppy_no] = []
samples_by_oppy[s.oppy_no].append(s)
orders_by_pn = {}
for o in order_records:
if o.pn not in orders_by_pn:
orders_by_pn[o.pn] = []
orders_by_pn[o.pn].append(o)
# 3. 清除舊的比對結果
self.db.query(ReviewLog).delete()
self.db.query(MatchResult).delete()
match_count = 0
auto_matched = 0
pending_review = 0
for dit in dit_records:
dit_date = pd.to_datetime(dit.date, errors='coerce')
# --- 比對樣品 (DIT -> Sample) ---
# 收集所有可能的樣品 (Priority 1: Oppy ID, Priority 2/3: PN)
potential_samples = []
if dit.op_id:
potential_samples.extend(samples_by_oppy.get(dit.op_id, []))
if dit.pn:
potential_samples.extend(samples_by_pn.get(dit.pn, []))
# 去重
seen_sample_ids = set()
unique_potential_samples = []
for s in potential_samples:
if s.id not in seen_sample_ids:
seen_sample_ids.add(s.id)
unique_potential_samples.append(s)
for sample in unique_potential_samples:
sample_date = pd.to_datetime(sample.date, errors='coerce')
# 時間窗檢查: Sample Date 必須在 DIT Date 的 前 30 天 至 今日 之間
if pd.notna(dit_date) and pd.notna(sample_date):
if sample_date < (dit_date - timedelta(days=30)):
continue
match_priority = 0
match_source = ""
score = 0.0
reason = ""
# Priority 1: 案號精準比對 (Golden Key)
if dit.op_id and sample.oppy_no and dit.op_id == sample.oppy_no:
match_priority = 1
match_source = f"Matched via Opportunity ID: {dit.op_id}"
score = 100.0
reason = "Golden Key Match"
# Priority 2 & 3 則限制在相同 PN
elif dit.pn == sample.pn:
# Priority 2: 客戶代碼比對 (Silver Key)
if dit.erp_account and sample.cust_id and dit.erp_account == sample.cust_id:
match_priority = 2
match_source = f"Matched via ERP Account: {dit.erp_account}"
score = 99.0
reason = "Silver Key Match"
# Priority 3: 名稱模糊比對 (Fallback)
else:
score, reason = calculate_similarity(dit.customer, sample.customer)
if score >= MATCH_THRESHOLD_REVIEW:
match_priority = 3
match_source = f"Matched via Name Similarity ({reason})"
if match_priority > 0:
status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
match = MatchResult(
dit_id=dit.id,
target_type=TargetType.SAMPLE,
target_id=sample.id,
score=score,
match_priority=match_priority,
match_source=match_source,
reason=reason,
status=status
)
self.db.add(match)
match_count += 1
if status == MatchStatus.auto_matched:
auto_matched += 1
else:
pending_review += 1
# --- 比對訂單 (DIT -> Order) ---
# 訂單比對通常基於 PN
for order in orders_by_pn.get(dit.pn, []):
match_priority = 0
match_source = ""
score = 0.0
reason = ""
# Priority 2: 客戶代碼比對 (Silver Key)
if dit.erp_account and order.cust_id and dit.erp_account == order.cust_id:
match_priority = 2
match_source = f"Matched via ERP Account: {dit.erp_account}"
score = 99.0
reason = "Silver Key Match"
# Priority 3: 名稱模糊比對 (Fallback)
else:
score, reason = calculate_similarity(dit.customer, order.customer)
if score >= MATCH_THRESHOLD_REVIEW:
match_priority = 3
match_source = f"Matched via Name Similarity ({reason})"
if match_priority > 0:
status = MatchStatus.auto_matched if score >= MATCH_THRESHOLD_AUTO else MatchStatus.pending
match = MatchResult(
dit_id=dit.id,
target_type=TargetType.ORDER,
target_id=order.id,
score=score,
match_priority=match_priority,
match_source=match_source,
reason=reason,
status=status
)
self.db.add(match)
match_count += 1
if status == MatchStatus.auto_matched:
auto_matched += 1
else:
pending_review += 1
self.db.commit()
return {
'match_count': match_count,
'auto_matched': auto_matched,
'pending_review': pending_review
}
def get_pending_reviews(self) -> List[MatchResult]:
"""取得待審核的比對結果"""
return self.db.query(MatchResult).filter(
MatchResult.status == MatchStatus.pending
).all()
def review_match(self, match_id: int, action: str) -> Optional[MatchResult]:
"""審核比對結果"""
match = self.db.query(MatchResult).filter(MatchResult.id == match_id).first()
if not match:
return None
if action == 'accept':
match.status = MatchStatus.accepted
elif action == 'reject':
match.status = MatchStatus.rejected
self.db.commit()
return match

View File

@@ -0,0 +1,171 @@
import io
from typing import List, Dict, Any
from datetime import datetime
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4, landscape
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from sqlalchemy.orm import Session
from app.models.dit import DitRecord
from app.models.sample import SampleRecord
from app.models.order import OrderRecord
from app.models.match import MatchResult, MatchStatus
class ReportGenerator:
def __init__(self, db: Session):
self.db = db
def get_attribution_data(self) -> List[Dict[str, Any]]:
"""取得歸因明細資料"""
dit_records = self.db.query(DitRecord).all()
result = []
for dit in dit_records:
row = {
'op_id': dit.op_id,
'customer': dit.customer,
'pn': dit.pn,
'eau': dit.eau,
'stage': dit.stage,
'sample_order': None,
'order_no': None,
'order_status': None,
'order_amount': None
}
# 找到已接受的樣品匹配
sample_match = self.db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == 'SAMPLE',
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).first()
if sample_match:
sample = self.db.query(SampleRecord).filter(
SampleRecord.id == sample_match.target_id
).first()
if sample:
row['sample_order'] = sample.order_no
# 找到已接受的訂單匹配
order_match = self.db.query(MatchResult).filter(
MatchResult.dit_id == dit.id,
MatchResult.target_type == 'ORDER',
MatchResult.status.in_([MatchStatus.accepted, MatchStatus.auto_matched])
).first()
if order_match:
order = self.db.query(OrderRecord).filter(
OrderRecord.id == order_match.target_id
).first()
if order:
row['order_no'] = order.order_no
row['order_status'] = order.status.value if order.status else None
row['order_amount'] = order.amount
result.append(row)
return result
def generate_excel(self) -> io.BytesIO:
"""產生 Excel 報表"""
wb = Workbook()
ws = wb.active
ws.title = "DIT Attribution Report"
# 標題樣式
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill(start_color="4F46E5", end_color="4F46E5", fill_type="solid")
header_alignment = Alignment(horizontal="center", vertical="center")
# 表頭
headers = ['OP編號', '客戶名稱', '料號', 'EAU', '階段', '樣品單號', '訂單單號', '訂單狀態', '訂單金額']
for col, header in enumerate(headers, 1):
cell = ws.cell(row=1, column=col, value=header)
cell.font = header_font
cell.fill = header_fill
cell.alignment = header_alignment
# 資料
data = self.get_attribution_data()
for row_idx, row_data in enumerate(data, 2):
ws.cell(row=row_idx, column=1, value=row_data['op_id'])
ws.cell(row=row_idx, column=2, value=row_data['customer'])
ws.cell(row=row_idx, column=3, value=row_data['pn'])
ws.cell(row=row_idx, column=4, value=row_data['eau'])
ws.cell(row=row_idx, column=5, value=row_data['stage'])
ws.cell(row=row_idx, column=6, value=row_data['sample_order'] or '-')
ws.cell(row=row_idx, column=7, value=row_data['order_no'] or '-')
ws.cell(row=row_idx, column=8, value=row_data['order_status'] or '-')
ws.cell(row=row_idx, column=9, value=row_data['order_amount'] or 0)
# 調整欄寬
column_widths = [15, 30, 20, 12, 15, 15, 15, 12, 12]
for col, width in enumerate(column_widths, 1):
ws.column_dimensions[chr(64 + col)].width = width
# 儲存到 BytesIO
output = io.BytesIO()
wb.save(output)
output.seek(0)
return output
def generate_pdf(self) -> io.BytesIO:
"""產生 PDF 報表"""
output = io.BytesIO()
doc = SimpleDocTemplate(output, pagesize=landscape(A4))
elements = []
styles = getSampleStyleSheet()
# 標題
title = Paragraph("DIT Attribution Report", styles['Title'])
elements.append(title)
elements.append(Spacer(1, 20))
# 日期
date_text = Paragraph(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}", styles['Normal'])
elements.append(date_text)
elements.append(Spacer(1, 20))
# 表格資料
data = self.get_attribution_data()
table_data = [['OP No.', 'Customer', 'P/N', 'EAU', 'Stage', 'Sample', 'Order', 'Status', 'Amount']]
for row in data:
table_data.append([
row['op_id'],
row['customer'][:20] + '...' if len(row['customer']) > 20 else row['customer'],
row['pn'],
str(row['eau']),
row['stage'] or '-',
row['sample_order'] or '-',
row['order_no'] or '-',
row['order_status'] or '-',
f"${row['order_amount']:,.0f}" if row['order_amount'] else '-'
])
# 建立表格
table = Table(table_data)
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4F46E5')),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTSIZE', (0, 0), (-1, 0), 10),
('FONTSIZE', (0, 1), (-1, -1), 8),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#F8FAFC')]),
]))
elements.append(table)
doc.build(elements)
output.seek(0)
return output

View File

@@ -0,0 +1 @@
# Utils package

View File

@@ -0,0 +1,64 @@
from datetime import datetime, timedelta
from typing import Optional
from jose import JWTError, jwt
from passlib.context import CryptContext
from fastapi import Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from sqlalchemy.orm import Session
from app.config import SECRET_KEY, ALGORITHM, ACCESS_TOKEN_EXPIRE_MINUTES
from app.models import get_db
from app.models.user import User
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login")
def verify_password(plain_password: str, hashed_password: str) -> bool:
return pwd_context.verify(plain_password, hashed_password)
def get_password_hash(password: str) -> str:
return pwd_context.hash(password)
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
to_encode = data.copy()
if expires_delta:
expire = datetime.utcnow() + expires_delta
else:
expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
to_encode.update({"exp": expire})
encoded_jwt = jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
return encoded_jwt
def decode_token(token: str) -> Optional[dict]:
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
return payload
except JWTError:
return None
async def get_current_user(
token: str = Depends(oauth2_scheme),
db: Session = Depends(get_db)
) -> User:
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
payload = decode_token(token)
if payload is None:
raise credentials_exception
user_id_raw = payload.get("sub")
if user_id_raw is None:
raise credentials_exception
try:
user_id = int(user_id_raw)
except (ValueError, TypeError):
raise credentials_exception
user = db.query(User).filter(User.id == user_id).first()
if user is None:
raise credentials_exception
return user

34
backend/create_admin.py Normal file
View File

@@ -0,0 +1,34 @@
from app.models import init_db, SessionLocal
from app.models.user import User, UserRole
from app.utils.security import get_password_hash
def create_admin_user():
init_db()
db = SessionLocal()
email = "admin@example.com"
password = "admin"
# Check if user exists
user = db.query(User).filter(User.email == email).first()
if user:
print(f"User {email} already exists.")
return
# Create new admin user
new_user = User(
email=email,
password_hash=get_password_hash(password),
role="admin", # String type now
display_name="Administrator",
ad_username="admin_local",
department="IT"
)
db.add(new_user)
db.commit()
print(f"Admin user created successfully.\nEmail: {email}\nPassword: {password}")
db.close()
if __name__ == "__main__":
create_admin_user()

17
backend/drop_tables.py Normal file
View File

@@ -0,0 +1,17 @@
from sqlalchemy import create_engine, text
from app.config import DATABASE_URL, TABLE_PREFIX
def drop_user_table():
engine = create_engine(DATABASE_URL)
table_name = f"{TABLE_PREFIX}Users"
lower_table_name = f"{TABLE_PREFIX}users"
with engine.connect() as conn:
# Try dropping both case variants to be sure
conn.execute(text(f"DROP TABLE IF EXISTS {table_name}"))
conn.execute(text(f"DROP TABLE IF EXISTS {lower_table_name}"))
conn.commit()
print(f"Dropped table {table_name} (and lowercase variant if existed).")
if __name__ == "__main__":
drop_user_table()

25
backend/inspect_db.py Normal file
View File

@@ -0,0 +1,25 @@
from sqlalchemy import create_engine, inspect
from app.config import DATABASE_URL, TABLE_PREFIX
def inspect_schema():
engine = create_engine(DATABASE_URL)
inspector = inspect(engine)
tables = [
f"{TABLE_PREFIX}DIT_Records",
f"{TABLE_PREFIX}Sample_Records",
f"{TABLE_PREFIX}Order_Records",
f"{TABLE_PREFIX}Match_Results"
]
print("All tables:", inspector.get_table_names())
for table_name in tables:
if table_name in inspector.get_table_names():
print(f"\nTable {table_name} exists. Columns:")
columns = inspector.get_columns(table_name)
for column in columns:
print(f"- {column['name']} ({column['type']})")
else:
print(f"\nTable {table_name} does not exist.")
if __name__ == "__main__":
inspect_schema()

27
backend/read_spec.py Normal file
View File

@@ -0,0 +1,27 @@
import docx
import sys
def read_docx(file_path):
doc = docx.Document(file_path)
content = []
# Iterate through all elements in the document in order
for element in doc.element.body:
if element.tag.endswith('p'): # Paragraph
para = docx.text.paragraph.Paragraph(element, doc)
if para.text.strip():
content.append(para.text)
elif element.tag.endswith('tbl'): # Table
table = docx.table.Table(element, doc)
for row in table.rows:
row_text = [cell.text.strip() for cell in row.cells]
content.append(" | ".join(row_text))
return '\n'.join(content)
if __name__ == "__main__":
path = r"c:\Users\USER\Desktop\SampleOrderAssistant\data\業務資料比對與轉換率分析系統 - 邏輯規格書 (v1.0).docx"
content = read_docx(path)
with open("spec_content.txt", "w", encoding="utf-8") as f:
f.write(content)
print("Content written to spec_content.txt")

16
backend/requirements.txt Normal file
View File

@@ -0,0 +1,16 @@
fastapi>=0.115.0
uvicorn[standard]==0.24.0
sqlalchemy==2.0.23
python-multipart==0.0.6
python-jose[cryptography]==3.3.0
passlib[bcrypt]==1.7.4
openpyxl==3.1.2
pandas==2.1.3
rapidfuzz==3.5.2
reportlab==4.0.7
chardet==5.2.0
opencc-python-reimplemented==0.1.7
pymysql==1.1.2
cryptography==41.0.7
python-dotenv==1.0.0
email-validator==2.3.0

30
backend/run.py Normal file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env python3
"""
SalesPipeline 應用程式啟動腳本
用於開發與生產環境
"""
import sys
import os
# 確保可以匯入 app 模組
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import uvicorn
from app.config import APP_HOST, APP_PORT, WORKERS, DEBUG
def main():
"""啟動應用程式"""
print(f"Starting SalesPipeline on {APP_HOST}:{APP_PORT}")
print(f"Workers: {WORKERS}, Debug: {DEBUG}")
uvicorn.run(
"app.main:app",
host=APP_HOST,
port=APP_PORT,
workers=WORKERS if not DEBUG else 1,
reload=DEBUG,
access_log=True,
)
if __name__ == "__main__":
main()

55
backend/schema_check.txt Normal file
View File

@@ -0,0 +1,55 @@
All tables: ['dit_records', 'match_results', 'order_records', 'pj_abc', 'PJ_SOA_DIT_Records', 'PJ_SOA_Match_Results', 'PJ_SOA_Order_Records', 'PJ_SOA_orders', 'PJ_SOA_Review_Logs', 'PJ_SOA_Sample_Records', 'PJ_SOA_samples', 'PJ_SOA_users', 'review_logs', 'sample_records', 'users']
Table PJ_SOA_DIT_Records exists. Columns:
- id (INTEGER)
- op_id (VARCHAR(255))
- customer (VARCHAR(255))
- customer_normalized (VARCHAR(255))
- pn (VARCHAR(100))
- eau (INTEGER)
- stage (VARCHAR(50))
- date (VARCHAR(20))
- created_at (DATETIME)
- updated_at (DATETIME)
- erp_account (VARCHAR(100))
Table PJ_SOA_Sample_Records exists. Columns:
- id (INTEGER)
- sample_id (VARCHAR(50))
- order_no (VARCHAR(50))
- customer (VARCHAR(255))
- customer_normalized (VARCHAR(255))
- pn (VARCHAR(100))
- qty (INTEGER)
- date (VARCHAR(20))
- created_at (DATETIME)
- updated_at (DATETIME)
- oppy_no (VARCHAR(100))
- cust_id (VARCHAR(100))
Table PJ_SOA_Order_Records exists. Columns:
- id (INTEGER)
- order_id (VARCHAR(50))
- order_no (VARCHAR(50))
- customer (VARCHAR(255))
- customer_normalized (VARCHAR(255))
- pn (VARCHAR(100))
- qty (INTEGER)
- status (VARCHAR(50))
- amount (FLOAT)
- created_at (DATETIME)
- updated_at (DATETIME)
- cust_id (VARCHAR(100))
Table PJ_SOA_Match_Results exists. Columns:
- id (INTEGER)
- dit_id (INTEGER)
- target_type (ENUM)
- target_id (INTEGER)
- score (FLOAT)
- reason (VARCHAR(255))
- status (ENUM)
- created_at (DATETIME)
- updated_at (DATETIME)
- match_priority (INTEGER)
- match_source (VARCHAR(255))

63
backend/spec_content.txt Normal file
View File

@@ -0,0 +1,63 @@
業務資料比對與轉換率分析系統 - 邏輯規格書 (v1.0)
文件日期2026-01-09
適用範圍:半導體製造業銷售漏斗分析 (Sales Pipeline Analysis)
部署環境On-Premise (地端)
1. 資料源與 ETL 前處理策略 (Data Ingestion)
系統針對三份異質資料來源進行標準化清洗,具備「動態表頭偵測」能力以適應 ERP 匯出的非結構化報表。
1.1 資料來源定義
資料類型 | 檔案特徵 | 關鍵欄位識別 (Key Columns) | 處理邏輯
DIT Report | 含 Metadata (前 ~15 行) | R欄 (Opportunity ID), AQ欄 (ERP Account), Customer, Part No, EAU, Stage | 自動跳過 Metadata定位至 "Stage/Part" 所在行作為表頭。
樣品紀錄 | 含 Metadata (前 ~8 行) | AU欄 (Oppy No), G欄 (Cust ID), Customer, Part No, Qty | 自動跳過 Metadata定位至 "索樣數量" 所在行。
訂單明細 | 標準格式 (第 1 行) | Order No, Customer, Part No, Qty, Status (Backlog/Shipped) | 識別 Big5/CP950 編碼,標準化讀取。
1.2 資料清洗規則 (Sanitization)
Part Number (PN): 去除所有分隔符 (-, _, )統一轉大寫。例PMSM-808-LL ➔ PMSM808LL。
Customer Name: 移除法律實體後綴 (如 "Inc.", "Co., Ltd"),全形轉半形,統一轉大寫。
日期格式: 統一轉換為 YYYY-MM-DD無效日期視為 Null。
2. 核心比對引擎 (Matching Engine) - 瀑布式邏輯
為解決客戶名稱不一致(如別名、子公司)問題,系統採用 三層級瀑布式比對 (Waterfall Matching)。優先級由高至低,一旦上層匹配成功,即鎖定關聯,不再向下尋找。
優先級 1案號精準比對 (Golden Key) 🥇
邏輯:直接透過 CRM/ERP 系統生成的唯一案號進行勾稽。
對應欄位:
DIT Report: R 欄 (Opportunity ID / 案號)
Sample Log: AU 欄 (Oppy No)
信心水準100% (絕對準確)
適用情境:業務在申請樣品時已正確填寫案號。
優先級 2客戶代碼比對 (Silver Key) 🥈
邏輯:若無案號,則比對 ERP 客戶代碼 (Account Number)。
對應欄位:
DIT Report: AQ 欄 (ERP Account No)
Sample Log: G 欄 (客戶編號)
信心水準99% (解決同名異字問題,如 "Liteon" vs "光寶")。
限制:需同時滿足 Account Match AND Normalized Part Number Match。
優先級 3名稱模糊比對 (Fallback Mechanism) 🥉
邏輯:前兩者皆空值時,使用 Levenshtein Distance 演算法計算名稱相似度。
對應欄位Customer Name vs 客戶名稱
信心水準80% ~ 90% (不確定性高)
處理機制:系統標記為 Pending Review強制進入 Human-in-the-Loop (人工審核) 流程,需人工確認後才計入績效。
註:訂單 (Order) 資料通常無 Oppy ID故訂單比對主要依賴 Priority 2 (Account + PN) 與 Priority 3 (Name + PN)。
3. 歸因與時間窗邏輯 (Attribution & Time Window)
定義「何時」發生的送樣與訂單可以算在該 DIT 的績效上。
3.1 時間窗 (Time Window)
DIT → Sample:
Sample Date 必須在 DIT Date 的 前 30 天 (容許先跑流程後補單) 至 今日 之間。
DIT → Order:
Order Date 必須在 DIT Date (或 First Sample Date) 的 前 30 天 之後。
目的:排除 DIT 建立很久之前的舊訂單(那些屬於舊案子維護,非新開發案)。
3.2 多對多歸因法則 (LIFO Logic)
針對「同一客戶、同一料號」有多筆 DIT 的情況,採用 LIFO (Last-In-First-Out) 庫存扣抵法 進行業績分配:
將同料號的 DIT 按建立日期 由新到舊 排序。
將該料號的總訂單量 (Backlog + Shipped) 放入「業績池 (Revenue Pool)」。
優先滿足 最新 的 DIT EAU 額度。
若有剩餘業績,再分配給次新的 DIT依此類推。
目的:確保業績優先反映在最新的開發專案上,避免舊案子無限期佔用新訂單的功勞。
4. 關鍵績效指標定義 (KPI Definitions)
系統最終產出的量化指標,用於衡量業務轉換效率。
指標名稱 | 計算公式 | 業務意涵
送樣轉換率 (Sample Rate) | (有匹配到樣品的 DIT 數) / (總 DIT 數) | 衡量前端 Design-In 開案後,成功推進到送樣階段的能力。
訂單命中率 (Hit Rate) | (有匹配到訂單的 DIT 數) / (總 DIT 數) | 衡量開發案最終轉化為實際營收的成功率 (Binary)。
EAU 達成率 (Fulfillment Rate) | (歸因之訂單總量) / (DIT 預估 EAU) | 衡量客戶預估量 (Forecast) 的準確度與實際拉貨力道。
無效送樣率 (Orphan Sample) | (未匹配到 DIT 的送樣數) / (總送樣數) | 監控是否有「偷跑」或「未立案」即送樣的資源浪費行為。
5. 系統輸出與審核
DIT 歸因明細表:每一列 DIT 清楚標示匹配到的 Sample No 與 Order No。
可追溯性 (Traceability):滑鼠懸停 (Hover) 可顯示匹配邏輯來源 (如 "Matched via Opportunity ID: OP12345")。
人工介入:對於模糊比對的案件,提供 UI 介面供使用者點選 Accept / Reject並將結果回寫至資料庫作為日後自動比對的訓練樣本。

68
backend/test_etl.py Normal file
View File

@@ -0,0 +1,68 @@
import sys
import os
from pathlib import Path
import pandas as pd
# Add backend to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from app.models import SessionLocal, init_db
from app.routers.etl import import_data, ImportRequest
from app.services.excel_parser import excel_parser
def test_import():
db = SessionLocal()
try:
# 1. Upload/Parse DIT
base_dir = Path(__file__).resolve().parent.parent
dit_path = base_dir / "data" / "uploads" / "DIT report (by Focus Item)_app_樣本 2.xlsx"
print(f"Parsing {dit_path}...")
file_id, info = excel_parser.parse_file(dit_path, "dit")
print(f"File ID: {file_id}, Rows: {info['row_count']}")
# 2. Import DIT
print("Importing DIT...")
req = ImportRequest(file_id=file_id)
res = import_data(req, db)
print(f"Imported {res.imported_count} DIT records.")
# 3. Upload/Parse Sample
sample_path = base_dir / "data" / "uploads" / "樣品申請紀錄_樣本_9月.xlsx"
print(f"Parsing {sample_path}...")
file_id_s, info_s = excel_parser.parse_file(sample_path, "sample")
print(f"File ID: {file_id_s}, Rows: {info_s['row_count']}")
# 4. Import Sample
print("Importing Sample...")
req_s = ImportRequest(file_id=file_id_s)
res_s = import_data(req_s, db)
print(f"Imported {res_s.imported_count} Sample records.")
# 5. Upload/Parse Order
order_path = base_dir / "data" / "uploads" / "訂單樣本_20251217_調整.xlsx"
print(f"Parsing {order_path}...")
file_id_o, info_o = excel_parser.parse_file(order_path, "order")
print(f"File ID: {file_id_o}, Rows: {info_o['row_count']}")
# 6. Import Order
print("Importing Order...")
req_o = ImportRequest(file_id=file_id_o)
res_o = import_data(req_o, db)
print(f"Imported {res_o.imported_count} Order records.")
# 7. Run Matching
from app.services.fuzzy_matcher import FuzzyMatcher
print("Running Matching...")
matcher = FuzzyMatcher(db)
match_res = matcher.run_matching()
print(f"Matching completed: {match_res}")
except Exception as e:
import traceback
print("Error during test:")
print(traceback.format_exc())
finally:
db.close()
if __name__ == "__main__":
test_import()

53
backend/update_db.py Normal file
View File

@@ -0,0 +1,53 @@
from sqlalchemy import create_engine, text
from app.config import DATABASE_URL, TABLE_PREFIX
def update_schema():
engine = create_engine(DATABASE_URL)
with engine.connect() as conn:
print("Updating schema...")
# Add erp_account to DitRecord
try:
conn.execute(text(f"ALTER TABLE {TABLE_PREFIX}DIT_Records ADD COLUMN erp_account VARCHAR(100)"))
print("Added erp_account to DIT_Records")
except Exception as e:
print(f"erp_account might already exist: {e}")
# Add oppy_no and cust_id to SampleRecord
try:
conn.execute(text(f"ALTER TABLE {TABLE_PREFIX}Sample_Records ADD COLUMN oppy_no VARCHAR(100)"))
print("Added oppy_no to Sample_Records")
except Exception as e:
print(f"oppy_no might already exist: {e}")
try:
conn.execute(text(f"ALTER TABLE {TABLE_PREFIX}Sample_Records ADD COLUMN cust_id VARCHAR(100)"))
print("Added cust_id to Sample_Records")
except Exception as e:
print(f"cust_id might already exist: {e}")
# Add cust_id to OrderRecord
try:
conn.execute(text(f"ALTER TABLE {TABLE_PREFIX}Order_Records ADD COLUMN cust_id VARCHAR(100)"))
print("Added cust_id to Order_Records")
except Exception as e:
print(f"cust_id might already exist: {e}")
# Add match_priority and match_source to MatchResult
try:
conn.execute(text(f"ALTER TABLE {TABLE_PREFIX}Match_Results ADD COLUMN match_priority INTEGER DEFAULT 3"))
print("Added match_priority to Match_Results")
except Exception as e:
print(f"match_priority might already exist: {e}")
try:
conn.execute(text(f"ALTER TABLE {TABLE_PREFIX}Match_Results ADD COLUMN match_source VARCHAR(255)"))
print("Added match_source to Match_Results")
except Exception as e:
print(f"match_source might already exist: {e}")
conn.commit()
print("Schema update completed.")
if __name__ == "__main__":
update_schema()