hr-position-system/scripts/convert_to_table.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# 讀取原始文件
with open('excel.md', 'r', encoding='utf-8') as f:
    lines = f.readlines()

# 跳過表頭，處理數據
data_lines = lines[1:]  # 跳過第一行表頭

result = []
current_values = {
    '事業體': '',
    '處級單位': '',
    '部級單位': '',
}

for line in data_lines:
    line = line.rstrip('\n\r')
    if not line.strip():
        continue

    # 分割 Tab
    parts = line.split('\t')

    # 確保至少有4個元素
    while len(parts) < 4:
        parts.append('')

    # 處理每一列
    # 第1列：事業體
    if parts[0].strip():
        current_values['事業體'] = parts[0].strip()

    # 第2列：處別（處級單位）
    if parts[1].strip():
        current_values['處級單位'] = parts[1].strip()

    # 第3列：單位名稱（可能是處級、部級或其他）
    if parts[2].strip():
        unit_name = parts[2].strip()
        # 判斷單位級別
        if unit_name.endswith('處'):
            # 如果是處級單位，更新處級單位，清空部級單位
            current_values['處級單位'] = unit_name
            current_values['部級單位'] = ''
        elif unit_name.endswith('部'):
            # 如果是部級單位，更新部級單位
            current_values['部級單位'] = unit_name
        elif '部' in unit_name:
            # 如果包含"部"，視為部級單位
            current_values['部級單位'] = unit_name
        elif '處' in unit_name:
            # 如果包含"處"，視為處級單位
            current_values['處級單位'] = unit_name
            current_values['部級單位'] = ''

    # 第4列：崗位名稱
    position_name = parts[3].strip() if len(parts) > 3 else ''

    # 如果沒有崗位名稱，嘗試從其他列找（可能崗位名稱在其他位置）
    if not position_name:
        # 從後往前找第一個非空值作為崗位名稱
        for i in range(len(parts) - 1, -1, -1):
            if parts[i].strip() and i != 0 and i != 1 and i != 2:
                position_name = parts[i].strip()
                break

    # 只有當有崗位名稱時才加入結果
    if position_name:
        result.append([
            current_values['事業體'],
            current_values['處級單位'],
            current_values['部級單位'],
            position_name
        ])

# 生成 Markdown 表格
output = []
output.append('| 事業體 | 處級單位 | 部級單位 | 崗位名稱 |')
output.append('|--------|----------|----------|----------|')

for row in result:
    # 轉義管道符號
    row_escaped = [cell.replace('|', '\\|') if cell else '' for cell in row]
    output.append(f"| {row_escaped[0]} | {row_escaped[1]} | {row_escaped[2]} | {row_escaped[3]} |")

# 寫入新文件
with open('excel_table.md', 'w', encoding='utf-8') as f:
    f.write('\n'.join(output))

print(f"轉換完成！共生成 {len(result)} 行數據。")
print("輸出文件：excel_table.md")