""" DIT 智能分析模組 解析 DIT CSV 報表,產出行動建議卡片 (Action Cards) """ import pandas as pd import numpy as np from datetime import datetime from typing import List, Dict, Optional, Any class DITAnalyzer: """DIT 報表分析器""" def __init__(self, file_path: Optional[str] = None, dataframe: Optional[pd.DataFrame] = None): """ 初始化分析器 Args: file_path: CSV 檔案路徑 dataframe: 或直接傳入 DataFrame """ self.df: Optional[pd.DataFrame] = None self.processed: bool = False if file_path: self.load_data(file_path) elif dataframe is not None: self.df = dataframe.copy() self._preprocess() def load_data(self, file_path: str) -> 'DITAnalyzer': """ 載入 CSV 資料 Args: file_path: CSV 檔案路徑 Returns: self (支援鏈式呼叫) """ try: self.df = pd.read_csv(file_path, encoding='utf-8') except UnicodeDecodeError: self.df = pd.read_csv(file_path, encoding='cp950') except Exception as e: raise DITAnalyzerError(f"無法載入檔案: {e}") self._preprocess() return self def _preprocess(self) -> None: """執行資料清洗與預處理""" if self.df is None: raise DITAnalyzerError("尚未載入資料") # 1. 欄位清洗:移除欄位名稱前後空白 self.df.columns = self.df.columns.str.strip() # 2. 日期轉換 date_columns = ['Created Date', 'Approved date', 'Close Date'] for col in date_columns: if col in self.df.columns: self.df[col] = pd.to_datetime(self.df[col], errors='coerce') # 3. 數值轉換:Total Price if 'Total Price' in self.df.columns: self.df['Total Price'] = pd.to_numeric( self.df['Total Price'].astype(str).str.replace(',', ''), errors='coerce' ).fillna(0) # 4. 應用領域推導 (Derived_Application) self.df['Derived_Application'] = self._derive_application() # 5. 狀態標記 if 'Stage' in self.df.columns: self.df['Is_Lost'] = self.df['Stage'].str.contains( 'Lost', case=False, na=False ) self.df['Is_Active'] = ~self.df['Is_Lost'] else: self.df['Is_Lost'] = False self.df['Is_Active'] = True self.processed = True def _derive_application(self) -> pd.Series: """ 推導應用領域 優先順序: Application → Application Detail → Opportunity Name → "Unknown" """ def get_app(row): # 檢查 Application if 'Application' in row.index: val = row.get('Application') if pd.notna(val) and str(val).strip(): return str(val).strip() # 檢查 Application Detail if 'Application Detail' in row.index: val = row.get('Application Detail') if pd.notna(val) and str(val).strip(): return str(val).strip() # 檢查 Opportunity Name if 'Opportunity Name' in row.index: val = row.get('Opportunity Name') if pd.notna(val) and str(val).strip(): return str(val).strip() return "Unknown" return self.df.apply(get_app, axis=1) def analyze_resource_allocation( self, top_percent: float = 0.2, low_win_rate: float = 0.1 ) -> List[Dict[str, Any]]: """ Feature 6.2: 高價值資源分配建議 找出「金礦區」— 金額大但勝率低的應用領域 Args: top_percent: 金額排名前 X% (預設 20%) low_win_rate: 勝率門檻 (預設 10%) Returns: Action Cards 列表 """ if not self.processed: raise DITAnalyzerError("資料尚未預處理") # 依 Derived_Application 分組 grouped = self.df.groupby('Derived_Application').agg({ 'Total Price': 'sum', 'Is_Active': 'mean', 'Account Name': lambda x: x.value_counts().head(3).index.tolist() }).reset_index() grouped.columns = ['Application', 'Sum_Total_Price', 'Win_Rate', 'Top_Accounts'] # 排序並取 Top 20% grouped = grouped.sort_values('Sum_Total_Price', ascending=False) top_n = max(1, int(len(grouped) * top_percent)) top_apps = grouped.head(top_n) # 篩選勝率低於門檻的 low_win_apps = top_apps[top_apps['Win_Rate'] < low_win_rate] # 產出 Action Cards action_cards = [] for _, row in low_win_apps.iterrows(): money_formatted = f"${row['Sum_Total_Price']:,.0f}" win_rate_pct = f"{row['Win_Rate'] * 100:.1f}" top_accounts = ', '.join(row['Top_Accounts'][:3]) if row['Top_Accounts'] else '無' action_cards.append({ "type": "resource_allocation", "title": "高潛力市場攻堅提醒", "application": row['Application'], "money": money_formatted, "money_raw": row['Sum_Total_Price'], "win_rate": win_rate_pct, "win_rate_raw": row['Win_Rate'], "top_accounts": row['Top_Accounts'][:3] if row['Top_Accounts'] else [], "suggestion": ( f"{row['Application']} 領域潛在商機巨大 ({money_formatted})," f"但目前勝率偏低 ({win_rate_pct}%)。" f"建議指派資深 FAE 介入該領域的前三大案子 (如 {top_accounts})。" ) }) return action_cards def analyze_stagnant_deals( self, threshold_days: int = 60, reference_date: Optional[datetime] = None ) -> List[Dict[str, Any]]: """ Feature 6.3: 呆滯案件警示 針對技術已承認但商務卡關的案子進行催單 Args: threshold_days: 呆滯天數門檻 (預設 60 天) reference_date: 參考日期 (預設為當前日期) Returns: Action Cards 列表 """ if not self.processed: raise DITAnalyzerError("資料尚未預處理") if reference_date is None: reference_date = datetime.now() # 檢查必要欄位 if 'Approved date' not in self.df.columns: return [] # 篩選條件 mask = ( (self.df['Stage'].str.contains('Negotiation', case=False, na=False)) & (self.df['Approved date'].notna()) ) filtered = self.df[mask].copy() if filtered.empty: return [] # 計算呆滯天數 filtered['Days_Since_Approved'] = ( reference_date - filtered['Approved date'] ).dt.days # 篩選超過門檻的 stagnant = filtered[filtered['Days_Since_Approved'] > threshold_days] # 產出 Action Cards action_cards = [] for _, row in stagnant.iterrows(): days = int(row['Days_Since_Approved']) months = days // 30 account = row.get('Account Name', 'Unknown') project = row.get('Opportunity Name', 'Unknown') approved_date = row['Approved date'].strftime('%Y-%m-%d') if pd.notna(row['Approved date']) else 'N/A' action_cards.append({ "type": "stagnant_deal", "title": "呆滯案件喚醒", "account": account, "project": project, "approved_date": approved_date, "days_pending": days, "months_pending": months, "suggestion": ( f"客戶 {account} 的 {project} 已承認超過 {months} 個月 ({days} 天),仍未轉單。" f"請業務確認是否為「價格」或「庫存」問題。若無下文,應要求客戶給出 Forecast。" ) }) # 依天數排序 (最久的在前) action_cards.sort(key=lambda x: x['days_pending'], reverse=True) return action_cards def generate_report( self, top_percent: float = 0.2, low_win_rate: float = 0.1, threshold_days: int = 60 ) -> Dict[str, Any]: """ 彙整所有分析結果 Args: top_percent: 高價值分析的金額門檻 low_win_rate: 高價值分析的勝率門檻 threshold_days: 呆滯分析的天數門檻 Returns: 完整分析報告 (Dict) """ if not self.processed: raise DITAnalyzerError("資料尚未預處理") allocation_suggestions = self.analyze_resource_allocation(top_percent, low_win_rate) stagnant_alerts = self.analyze_stagnant_deals(threshold_days) # 統計摘要 summary = self._generate_summary() return { "generated_at": datetime.now().isoformat(), "summary": summary, "action_cards": { "resource_allocation": allocation_suggestions, "stagnant_deals": stagnant_alerts }, "total_alerts": len(allocation_suggestions) + len(stagnant_alerts) } def _generate_summary(self) -> Dict[str, Any]: """產生統計摘要""" total_records = len(self.df) total_value = self.df['Total Price'].sum() active_count = self.df['Is_Active'].sum() lost_count = self.df['Is_Lost'].sum() # 各階段統計 stage_stats = {} if 'Stage' in self.df.columns: stage_stats = self.df['Stage'].value_counts().to_dict() # 應用領域 Top 5 app_stats = self.df.groupby('Derived_Application')['Total Price'].sum() top_apps = app_stats.nlargest(5).to_dict() return { "total_records": total_records, "total_value": f"${total_value:,.0f}", "total_value_raw": total_value, "active_count": int(active_count), "lost_count": int(lost_count), "win_rate": f"{(active_count / total_records * 100):.1f}%" if total_records > 0 else "0%", "stage_distribution": stage_stats, "top_applications": {k: f"${v:,.0f}" for k, v in top_apps.items()} } def get_dataframe(self) -> pd.DataFrame: """取得處理後的 DataFrame""" if self.df is None: raise DITAnalyzerError("尚未載入資料") return self.df.copy() class DITAnalyzerError(Exception): """DIT 分析器錯誤""" pass