- 新增 _calculate_lane_conflicts_v2() 分開返回標籤重疊和線穿框分數 - 修改泳道選擇算法,優先選擇無標籤重疊的泳道 - 兩階段搜尋:優先側別無可用泳道則嘗試另一側 - 增強日誌輸出,顯示標籤範圍和詳細衝突分數 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
518 lines
17 KiB
Python
518 lines
17 KiB
Python
"""
|
||
CSV/XLSX 匯入模組
|
||
|
||
本模組負責處理時間軸事件的資料匯入。
|
||
支援 CSV 和 XLSX 格式,包含欄位自動對應與格式容錯功能。
|
||
|
||
Author: AI Agent
|
||
Version: 1.0.0
|
||
DocID: SDD-IMP-001
|
||
Related: TDD-UT-IMP-001
|
||
Rationale: 實現 SDD.md 定義的 POST /import API 功能
|
||
"""
|
||
|
||
import csv
|
||
import re
|
||
from datetime import datetime, timedelta
|
||
from pathlib import Path
|
||
from typing import List, Dict, Any, Optional, Union
|
||
import logging
|
||
|
||
try:
|
||
import pandas as pd
|
||
PANDAS_AVAILABLE = True
|
||
except ImportError:
|
||
PANDAS_AVAILABLE = False
|
||
|
||
from .schemas import Event, ImportResult, EventType
|
||
|
||
# 設定日誌
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class ImporterError(Exception):
|
||
"""匯入器錯誤基礎類別"""
|
||
pass
|
||
|
||
|
||
class FieldMapper:
|
||
"""
|
||
欄位對應器
|
||
|
||
負責將不同的欄位名稱映射到標準欄位。
|
||
支援多語言和不同命名習慣。
|
||
"""
|
||
|
||
# 欄位對應字典
|
||
FIELD_MAPPING = {
|
||
'id': ['id', 'ID', '編號', '序號', 'identifier'],
|
||
'title': ['title', 'Title', '標題', '名稱', 'name', 'event'],
|
||
'start': ['start', 'Start', '開始', '開始時間', 'start_time', 'begin', 'time', 'Time', '時間', 'date', 'Date', '日期'],
|
||
'group': ['group', 'Group', '群組', '分類', 'category', 'phase'],
|
||
'description': ['description', 'Description', '描述', '說明', 'detail', 'note'],
|
||
'color': ['color', 'Color', '顏色', 'colour'],
|
||
}
|
||
|
||
@classmethod
|
||
def map_fields(cls, headers: List[str]) -> Dict[str, str]:
|
||
"""
|
||
將 CSV/XLSX 的欄位名稱映射到標準欄位
|
||
|
||
Args:
|
||
headers: 原始欄位名稱列表
|
||
|
||
Returns:
|
||
映射字典 {標準欄位: 原始欄位}
|
||
"""
|
||
mapping = {}
|
||
headers_lower = [h.strip() for h in headers]
|
||
|
||
for standard_field, variants in cls.FIELD_MAPPING.items():
|
||
for header in headers_lower:
|
||
if header in variants or header.lower() in [v.lower() for v in variants]:
|
||
# 找到原始 header(保留大小寫)
|
||
original_header = headers[headers_lower.index(header)]
|
||
mapping[standard_field] = original_header
|
||
break
|
||
|
||
return mapping
|
||
|
||
@classmethod
|
||
def validate_required_fields(cls, mapping: Dict[str, str]) -> List[str]:
|
||
"""
|
||
驗證必要欄位是否存在
|
||
|
||
Args:
|
||
mapping: 欄位映射字典
|
||
|
||
Returns:
|
||
缺少的必要欄位列表
|
||
"""
|
||
required_fields = ['id', 'title', 'start']
|
||
missing_fields = [f for f in required_fields if f not in mapping]
|
||
return missing_fields
|
||
|
||
|
||
class DateParser:
|
||
"""
|
||
日期解析器
|
||
|
||
支援多種日期格式的容錯解析。
|
||
"""
|
||
|
||
# 支援的日期格式列表
|
||
DATE_FORMATS = [
|
||
'%Y-%m-%d %H:%M:%S',
|
||
'%Y/%m/%d %H:%M:%S',
|
||
'%Y-%m-%d',
|
||
'%Y/%m/%d',
|
||
'%d-%m-%Y',
|
||
'%d/%m/%Y',
|
||
'%Y年%m月%d日',
|
||
'%Y年%m月%d日 %H:%M:%S',
|
||
'%Y-%m-%dT%H:%M:%S',
|
||
'%Y-%m-%dT%H:%M:%S.%f',
|
||
]
|
||
|
||
@classmethod
|
||
def parse(cls, date_str: str) -> Optional[datetime]:
|
||
"""
|
||
解析日期字串
|
||
|
||
Args:
|
||
date_str: 日期字串或 Excel 日期序列號
|
||
|
||
Returns:
|
||
datetime 物件,解析失敗則回傳 None
|
||
"""
|
||
if not date_str or (isinstance(date_str, str) and not date_str.strip()):
|
||
return None
|
||
|
||
# 如果是數字(Excel 日期序列號),先轉換
|
||
if isinstance(date_str, (int, float)):
|
||
if PANDAS_AVAILABLE:
|
||
try:
|
||
# Excel 日期從 1899-12-30 開始計算
|
||
return pd.to_datetime(date_str, origin='1899-12-30', unit='D')
|
||
except Exception as e:
|
||
logger.warning(f"無法解析 Excel 日期序列號 {date_str}: {str(e)}")
|
||
return None
|
||
else:
|
||
# 沒有 pandas,使用標準庫手動計算
|
||
try:
|
||
excel_epoch = datetime(1899, 12, 30)
|
||
return excel_epoch + timedelta(days=int(date_str))
|
||
except Exception as e:
|
||
logger.warning(f"無法解析 Excel 日期序列號 {date_str}: {str(e)}")
|
||
return None
|
||
|
||
date_str = str(date_str).strip()
|
||
|
||
# 嘗試各種格式
|
||
for fmt in cls.DATE_FORMATS:
|
||
try:
|
||
return datetime.strptime(date_str, fmt)
|
||
except ValueError:
|
||
continue
|
||
|
||
# 嘗試使用 pandas 的智能解析(如果可用)
|
||
if PANDAS_AVAILABLE:
|
||
try:
|
||
return pd.to_datetime(date_str)
|
||
except Exception:
|
||
pass
|
||
|
||
logger.warning(f"無法解析日期: {date_str}")
|
||
return None
|
||
|
||
|
||
class ColorValidator:
|
||
"""
|
||
顏色格式驗證器
|
||
"""
|
||
|
||
# HEX 顏色正則表達式
|
||
HEX_PATTERN = re.compile(r'^#[0-9A-Fa-f]{6}$')
|
||
|
||
# 預設顏色
|
||
DEFAULT_COLORS = [
|
||
'#3B82F6', # 藍色
|
||
'#10B981', # 綠色
|
||
'#F59E0B', # 橙色
|
||
'#EF4444', # 紅色
|
||
'#8B5CF6', # 紫色
|
||
'#EC4899', # 粉色
|
||
'#14B8A6', # 青色
|
||
'#F97316', # 深橙
|
||
]
|
||
|
||
@classmethod
|
||
def validate(cls, color: str, index: int = 0) -> str:
|
||
"""
|
||
驗證顏色格式
|
||
|
||
Args:
|
||
color: 顏色字串
|
||
index: 索引(用於選擇預設顏色)
|
||
|
||
Returns:
|
||
有效的 HEX 顏色代碼
|
||
"""
|
||
# 確保 index 是整數(防止 pandas 傳入 float)
|
||
index = int(index) if index is not None else 0
|
||
|
||
if not color:
|
||
return cls.DEFAULT_COLORS[index % len(cls.DEFAULT_COLORS)]
|
||
|
||
color = str(color).strip().upper()
|
||
|
||
# 補充 # 符號
|
||
if not color.startswith('#'):
|
||
color = '#' + color
|
||
|
||
# 驗證格式
|
||
if cls.HEX_PATTERN.match(color):
|
||
return color
|
||
|
||
# 格式無效,使用預設顏色
|
||
logger.warning(f"無效的顏色格式: {color},使用預設顏色")
|
||
return cls.DEFAULT_COLORS[index % len(cls.DEFAULT_COLORS)]
|
||
|
||
|
||
class CSVImporter:
|
||
"""
|
||
CSV/XLSX 匯入器
|
||
|
||
負責讀取 CSV 或 XLSX 檔案並轉換為 Event 物件列表。
|
||
"""
|
||
|
||
def __init__(self):
|
||
self.field_mapper = FieldMapper()
|
||
self.date_parser = DateParser()
|
||
self.color_validator = ColorValidator()
|
||
|
||
def import_file(self, file_path: Union[str, Path]) -> ImportResult:
|
||
"""
|
||
匯入 CSV 或 XLSX 檔案
|
||
|
||
Args:
|
||
file_path: 檔案路徑
|
||
|
||
Returns:
|
||
ImportResult 物件
|
||
"""
|
||
file_path = Path(file_path)
|
||
|
||
if not file_path.exists():
|
||
return ImportResult(
|
||
success=False,
|
||
errors=[f"檔案不存在: {file_path}"],
|
||
total_rows=0,
|
||
imported_count=0
|
||
)
|
||
|
||
# 根據副檔名選擇處理方式
|
||
if file_path.suffix.lower() == '.csv':
|
||
return self._import_csv(file_path)
|
||
elif file_path.suffix.lower() in ['.xlsx', '.xls']:
|
||
return self._import_xlsx(file_path)
|
||
else:
|
||
return ImportResult(
|
||
success=False,
|
||
errors=[f"不支援的檔案格式: {file_path.suffix}"],
|
||
total_rows=0,
|
||
imported_count=0
|
||
)
|
||
|
||
def _import_csv(self, file_path: Path) -> ImportResult:
|
||
"""
|
||
匯入 CSV 檔案
|
||
|
||
Args:
|
||
file_path: CSV 檔案路徑
|
||
|
||
Returns:
|
||
ImportResult 物件
|
||
"""
|
||
events = []
|
||
errors = []
|
||
|
||
try:
|
||
with open(file_path, 'r', encoding='utf-8-sig') as f:
|
||
reader = csv.DictReader(f)
|
||
headers = reader.fieldnames
|
||
|
||
if not headers:
|
||
return ImportResult(
|
||
success=False,
|
||
errors=["CSV 檔案為空"],
|
||
total_rows=0,
|
||
imported_count=0
|
||
)
|
||
|
||
# 欄位映射
|
||
field_mapping = self.field_mapper.map_fields(headers)
|
||
logger.info(f"CSV 欄位映射結果: {field_mapping}")
|
||
logger.info(f"原始欄位: {headers}")
|
||
|
||
missing_fields = self.field_mapper.validate_required_fields(field_mapping)
|
||
|
||
if missing_fields:
|
||
logger.error(f"缺少必要欄位: {missing_fields}")
|
||
return ImportResult(
|
||
success=False,
|
||
errors=[f"缺少必要欄位: {', '.join(missing_fields)}"],
|
||
total_rows=0,
|
||
imported_count=0
|
||
)
|
||
|
||
# 逐行處理
|
||
row_num = 1
|
||
for row in reader:
|
||
row_num += 1
|
||
try:
|
||
logger.debug(f"處理第 {row_num} 行: {row}")
|
||
event = self._parse_row(row, field_mapping, row_num)
|
||
if event:
|
||
events.append(event)
|
||
logger.debug(f"成功匯入第 {row_num} 行")
|
||
else:
|
||
logger.warning(f"第 {row_num} 行返回 None")
|
||
except Exception as e:
|
||
error_msg = f"第 {row_num} 行錯誤: {str(e)}"
|
||
errors.append(error_msg)
|
||
logger.error(error_msg)
|
||
|
||
return ImportResult(
|
||
success=True,
|
||
events=events,
|
||
errors=errors,
|
||
total_rows=int(row_num - 1),
|
||
imported_count=int(len(events))
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"CSV 匯入失敗: {str(e)}")
|
||
return ImportResult(
|
||
success=False,
|
||
errors=[f"CSV 匯入失敗: {str(e)}"],
|
||
total_rows=0,
|
||
imported_count=0
|
||
)
|
||
|
||
def _import_xlsx(self, file_path: Path) -> ImportResult:
|
||
"""
|
||
匯入 XLSX 檔案
|
||
|
||
Args:
|
||
file_path: XLSX 檔案路徑
|
||
|
||
Returns:
|
||
ImportResult 物件
|
||
"""
|
||
if not PANDAS_AVAILABLE:
|
||
return ImportResult(
|
||
success=False,
|
||
errors=["需要安裝 pandas 和 openpyxl 以支援 XLSX 匯入"],
|
||
total_rows=0,
|
||
imported_count=0
|
||
)
|
||
|
||
try:
|
||
# 讀取第一個工作表
|
||
df = pd.read_excel(file_path, sheet_name=0)
|
||
|
||
if df.empty:
|
||
return ImportResult(
|
||
success=False,
|
||
errors=["XLSX 檔案為空"],
|
||
total_rows=0,
|
||
imported_count=0
|
||
)
|
||
|
||
# 轉換為字典列表
|
||
records = df.to_dict('records')
|
||
headers = df.columns.tolist()
|
||
|
||
# 欄位映射
|
||
field_mapping = self.field_mapper.map_fields(headers)
|
||
logger.info(f"XLSX 欄位映射結果: {field_mapping}")
|
||
logger.info(f"原始欄位: {headers}")
|
||
|
||
missing_fields = self.field_mapper.validate_required_fields(field_mapping)
|
||
|
||
if missing_fields:
|
||
logger.error(f"缺少必要欄位: {missing_fields}")
|
||
return ImportResult(
|
||
success=False,
|
||
errors=[f"缺少必要欄位: {', '.join(missing_fields)}"],
|
||
total_rows=0,
|
||
imported_count=0
|
||
)
|
||
|
||
# 逐行處理
|
||
events = []
|
||
errors = []
|
||
|
||
for idx, row in enumerate(records, start=2): # Excel 從第 2 行開始(第 1 行是標題)
|
||
try:
|
||
event = self._parse_row(row, field_mapping, idx)
|
||
if event:
|
||
events.append(event)
|
||
except Exception as e:
|
||
errors.append(f"第 {idx} 行錯誤: {str(e)}")
|
||
|
||
return ImportResult(
|
||
success=True,
|
||
events=events,
|
||
errors=errors,
|
||
total_rows=int(len(records)),
|
||
imported_count=int(len(events))
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"XLSX 匯入失敗: {str(e)}")
|
||
return ImportResult(
|
||
success=False,
|
||
errors=[f"XLSX 匯入失敗: {str(e)}"],
|
||
total_rows=0,
|
||
imported_count=0
|
||
)
|
||
|
||
def _parse_row(self, row: Dict[str, Any], field_mapping: Dict[str, str], row_num: int) -> Optional[Event]:
|
||
"""
|
||
解析單行資料
|
||
|
||
Args:
|
||
row: 行資料字典
|
||
field_mapping: 欄位映射
|
||
row_num: 行號
|
||
|
||
Returns:
|
||
Event 物件或 None
|
||
"""
|
||
# 輔助函數:安全地轉換為字串(處理 NaN、None、float 等)
|
||
def safe_str(value):
|
||
if pd.isna(value) if PANDAS_AVAILABLE else (value is None or value == ''):
|
||
return ''
|
||
# 如果是 float 且接近整數,轉為整數後再轉字串
|
||
if isinstance(value, float):
|
||
if value == int(value):
|
||
return str(int(value))
|
||
return str(value).strip()
|
||
|
||
# 🔍 DEBUG: 顯示原始 row 和 field_mapping
|
||
logger.debug(f" Row keys: {list(row.keys())}")
|
||
logger.debug(f" Field mapping: {field_mapping}")
|
||
|
||
# 提取欄位值
|
||
event_id = safe_str(row.get(field_mapping['id'], ''))
|
||
title = safe_str(row.get(field_mapping['title'], ''))
|
||
start_str = safe_str(row.get(field_mapping['start'], '')) # 🔧 修復:也要使用 safe_str 轉換
|
||
group = safe_str(row.get(field_mapping.get('group', ''), '')) or None
|
||
description = safe_str(row.get(field_mapping.get('description', ''), '')) or None
|
||
color = safe_str(row.get(field_mapping.get('color', ''), ''))
|
||
|
||
# 🔍 DEBUG: 顯示提取的欄位值
|
||
logger.debug(f" 提取欄位 - ID: '{event_id}', 標題: '{title}', 時間: '{start_str}'")
|
||
|
||
# 驗證必要欄位
|
||
if not event_id or not title:
|
||
raise ValueError("缺少 ID 或標題")
|
||
|
||
if not start_str:
|
||
raise ValueError("缺少時間欄位")
|
||
|
||
# 解析時間(只有一個時間欄位)
|
||
start = self.date_parser.parse(start_str)
|
||
if not start:
|
||
raise ValueError(f"無效的時間: {start_str}")
|
||
|
||
# 🔧 修復:將 pandas Timestamp 轉換為標準 datetime
|
||
if PANDAS_AVAILABLE:
|
||
if isinstance(start, pd.Timestamp):
|
||
start = start.to_pydatetime()
|
||
|
||
# 驗證顏色(確保返回的是字串,不是 None)
|
||
color = self.color_validator.validate(color, int(row_num))
|
||
if not color: # 防禦性檢查
|
||
color = self.color_validator.DEFAULT_COLORS[0]
|
||
|
||
# 所有事件都是時間點類型(不再有區間)
|
||
event_type = EventType.POINT
|
||
end = None # 不再使用 end 欄位
|
||
|
||
# 建立 Event 物件
|
||
try:
|
||
event = Event(
|
||
id=event_id,
|
||
title=title,
|
||
start=start,
|
||
end=end,
|
||
group=group,
|
||
description=description,
|
||
color=color,
|
||
event_type=event_type
|
||
)
|
||
# 調試:確認所有欄位類型
|
||
logger.debug(f"Event 創建成功: id={type(event.id).__name__}, title={type(event.title).__name__}, "
|
||
f"start={type(event.start).__name__}, end={type(event.end).__name__ if event.end else 'None'}, "
|
||
f"group={type(event.group).__name__ if event.group else 'None'}, "
|
||
f"description={type(event.description).__name__ if event.description else 'None'}, "
|
||
f"color={type(event.color).__name__}")
|
||
return event
|
||
except Exception as e:
|
||
logger.error(f"創建 Event 失敗: {str(e)}")
|
||
logger.error(f" id={event_id} ({type(event_id).__name__})")
|
||
logger.error(f" title={title} ({type(title).__name__})")
|
||
logger.error(f" start={start} ({type(start).__name__})")
|
||
logger.error(f" end={end} ({type(end).__name__ if end else 'None'})")
|
||
logger.error(f" group={group} ({type(group).__name__ if group else 'None'})")
|
||
logger.error(f" description={description} ({type(description).__name__ if description else 'None'})")
|
||
logger.error(f" color={color} ({type(color).__name__})")
|
||
raise
|
||
|
||
|
||
# 匯出主要介面
|
||
__all__ = ['CSVImporter', 'ImportResult', 'ImporterError']
|