Files
OCR/setup_dev_env.sh
egg d7f7166a2d feat: unify environment scripts with start.sh
- Add unified start.sh script with subcommands (all/backend/frontend)
- Add process management (--stop, --status)
- Remove separate start_backend.sh and start_frontend.sh
- Update setup_dev_env.sh with pre-flight checks and --cpu-only/--skip-db options
- Update .env.example to remove sensitive data and add DIFY translation config
- Add .pid/ to .gitignore for process management

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-02 12:48:52 +08:00

423 lines
12 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Tool_OCR WSL Ubuntu 開發環境設置腳本
#
# Usage:
# ./setup_dev_env.sh 完整安裝 (自動偵測 GPU)
# ./setup_dev_env.sh --cpu-only 強制使用 CPU 版本
# ./setup_dev_env.sh --skip-db 跳過數據庫初始化
# ./setup_dev_env.sh --help 顯示幫助
set -e # 遇到錯誤時停止
# 顏色定義
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 配置變數
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
MIN_DISK_GB=10
MIN_MEMORY_GB=4
MIN_PYTHON_VERSION="3.10"
FORCE_CPU=false
SKIP_DB=false
# ===== 幫助信息 =====
show_help() {
echo "Tool_OCR 開發環境設置腳本"
echo ""
echo "Usage: ./setup_dev_env.sh [options]"
echo ""
echo "Options:"
echo " --cpu-only 強制使用 CPU 版本的 PaddlePaddle (跳過 GPU 偵測)"
echo " --skip-db 跳過數據庫初始化步驟"
echo " --help 顯示此幫助信息"
echo ""
echo "Examples:"
echo " ./setup_dev_env.sh # 完整安裝"
echo " ./setup_dev_env.sh --cpu-only # 不使用 GPU"
echo ""
}
# 解析參數
while [[ $# -gt 0 ]]; do
case $1 in
--cpu-only)
FORCE_CPU=true
shift
;;
--skip-db)
SKIP_DB=true
shift
;;
--help|-h)
show_help
exit 0
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
show_help
exit 1
;;
esac
done
# ===== 工具函數 =====
print_header() {
echo ""
echo -e "${BLUE}================================${NC}"
echo -e "${BLUE} Tool_OCR 開發環境設置${NC}"
echo -e "${BLUE}================================${NC}"
echo ""
}
print_step() {
local step=$1
local total=$2
local msg=$3
echo ""
echo -e "${YELLOW}[$step/$total] $msg${NC}"
}
print_success() {
echo -e "${GREEN}$1${NC}"
}
print_warning() {
echo -e "${YELLOW}$1${NC}"
}
print_error() {
echo -e "${RED}$1${NC}"
}
# ===== 預檢函數 =====
check_not_root() {
if [ "$EUID" -eq 0 ]; then
print_error "請不要使用 sudo 運行此腳本"
echo "腳本會在需要時提示輸入 sudo 密碼"
exit 1
fi
}
check_disk_space() {
local available_gb=$(df -BG "$SCRIPT_DIR" | awk 'NR==2 {print $4}' | sed 's/G//')
if [ "$available_gb" -lt "$MIN_DISK_GB" ]; then
print_error "磁碟空間不足: ${available_gb}GB 可用, 需要至少 ${MIN_DISK_GB}GB"
echo ""
echo "解決方案:"
echo " 1. 清理不需要的文件: sudo apt autoremove && sudo apt clean"
echo " 2. 刪除舊的 Docker 映像: docker system prune -a"
echo " 3. 清理 pip 緩存: pip cache purge"
exit 1
fi
print_success "磁碟空間: ${available_gb}GB 可用"
}
check_memory() {
local total_gb=$(free -g | awk '/^Mem:/{print $2}')
if [ "$total_gb" -lt "$MIN_MEMORY_GB" ]; then
print_warning "記憶體較低: ${total_gb}GB (建議 ${MIN_MEMORY_GB}GB+)"
echo " OCR 處理可能會較慢,建議增加記憶體或使用 swap"
else
print_success "系統記憶體: ${total_gb}GB"
fi
}
check_python_version() {
if ! command -v python3 &> /dev/null; then
print_error "未找到 Python3"
echo ""
echo "解決方案:"
echo " sudo apt update && sudo apt install python3 python3-pip python3-venv"
exit 1
fi
local py_version=$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')
local py_major=$(echo $py_version | cut -d. -f1)
local py_minor=$(echo $py_version | cut -d. -f2)
local min_major=$(echo $MIN_PYTHON_VERSION | cut -d. -f1)
local min_minor=$(echo $MIN_PYTHON_VERSION | cut -d. -f2)
if [ "$py_major" -lt "$min_major" ] || ([ "$py_major" -eq "$min_major" ] && [ "$py_minor" -lt "$min_minor" ]); then
print_error "Python 版本過低: $py_version (需要 $MIN_PYTHON_VERSION+)"
echo ""
echo "解決方案:"
echo " sudo apt update && sudo apt install python3.10 python3.10-venv"
exit 1
fi
print_success "Python 版本: $py_version"
}
run_preflight_checks() {
echo -e "${BLUE}執行預檢...${NC}"
check_not_root
check_disk_space
check_memory
check_python_version
echo ""
print_success "預檢完成"
}
# ===== GPU 偵測函數 =====
detect_gpu() {
if [ "$FORCE_CPU" = true ]; then
echo -e "${YELLOW} 已指定 --cpu-only跳過 GPU 偵測${NC}"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
return
fi
if command -v nvidia-smi &> /dev/null; then
print_success "偵測到 NVIDIA GPU"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || true
CUDA_VERSION=$(nvidia-smi 2>/dev/null | grep "CUDA Version" | awk '{print $9}')
if [ -n "$CUDA_VERSION" ]; then
print_success "CUDA 版本: $CUDA_VERSION"
CUDA_MAJOR=$(echo $CUDA_VERSION | cut -d. -f1)
if [ "$CUDA_MAJOR" -ge 12 ]; then
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 12.x)"
USE_GPU=true
PADDLE_PACKAGE="paddlepaddle-gpu>=3.2.1"
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu123/"
elif [ "$CUDA_MAJOR" -eq 11 ]; then
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 11.x)"
USE_GPU=true
PADDLE_PACKAGE="paddlepaddle-gpu>=3.2.1"
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu118/"
else
print_warning "CUDA 版本不支援 ($CUDA_VERSION),將使用 CPU 版本"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
fi
else
print_warning "無法獲取 CUDA 版本,將使用 CPU 版本"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
fi
else
echo -e "${YELLOW} 未偵測到 NVIDIA GPU將使用 CPU 版本${NC}"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
fi
}
# ===== 數據庫初始化 =====
init_database() {
if [ "$SKIP_DB" = true ]; then
print_warning "已指定 --skip-db跳過數據庫初始化"
return
fi
echo "初始化數據庫..."
cd "$SCRIPT_DIR/backend"
# 檢查 .env.local 是否存在
if [ ! -f "$SCRIPT_DIR/.env.local" ]; then
print_warning "未找到 .env.local跳過數據庫初始化"
echo " 請複製 .env.example 到 .env.local 並配置後,手動執行:"
echo " cd backend && alembic upgrade head"
cd "$SCRIPT_DIR"
return
fi
# 載入環境變量
export $(grep -v '^#' "$SCRIPT_DIR/.env.local" | xargs)
# 執行遷移
if alembic upgrade head 2>/dev/null; then
print_success "數據庫遷移完成"
else
print_warning "數據庫遷移失敗 (可能數據庫未連接)"
echo " 請確認 .env.local 中的數據庫配置正確後,手動執行:"
echo " cd backend && alembic upgrade head"
fi
cd "$SCRIPT_DIR"
}
# ===== 主流程 =====
print_header
# 預檢
run_preflight_checks
# Step 1: 更新系統
print_step 1 10 "更新系統套件列表..."
sudo apt update
# Step 2: Python 開發工具
print_step 2 10 "安裝 Python 開發工具..."
sudo apt install -y \
python3-pip \
python3-venv \
python3-dev \
build-essential \
pkg-config
# Step 3: 系統依賴
print_step 3 10 "安裝系統層級依賴..."
sudo apt install -y \
pandoc \
libmagic1 \
libmagic-dev \
fonts-noto-cjk \
fonts-noto-cjk-extra \
fonts-liberation \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libcairo2 \
libcairo2-dev \
libgdk-pixbuf2.0-0 \
libgdk-pixbuf-2.0-dev \
libffi-dev \
libffi8 \
shared-mime-info \
poppler-utils \
libgl1 \
libglib2.0-0 \
libglib2.0-dev \
libgomp1 \
libjpeg-dev \
libpng-dev \
libtiff-dev \
libopencv-dev \
libsqlite3-dev \
libreoffice-core-nogui \
libreoffice-writer-nogui \
libreoffice-impress-nogui \
ca-certificates \
curl \
wget \
libxml2 \
libxslt1-dev \
python3-cffi
# Step 4: Node.js
print_step 4 10 "安裝 Node.js..."
if [ ! -d "$HOME/.nvm" ]; then
echo "安裝 nvm..."
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash
fi
export NVM_DIR="$HOME/.nvm"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
nvm install --lts
nvm use --lts
print_success "Node.js $(node -v) 已安裝"
# Step 5: Python 虛擬環境
print_step 5 10 "創建 Python 虛擬環境..."
if [ ! -d "$SCRIPT_DIR/venv" ]; then
python3 -m venv "$SCRIPT_DIR/venv"
print_success "虛擬環境已創建"
else
print_success "虛擬環境已存在"
fi
# Step 6: GPU 偵測
print_step 6 10 "偵測 GPU 支援..."
detect_gpu
# Step 7: Python 依賴
print_step 7 10 "安裝 Python 依賴..."
source "$SCRIPT_DIR/venv/bin/activate"
pip install --upgrade pip setuptools wheel
echo ""
echo "安裝 PaddlePaddle..."
if [ "$USE_GPU" = true ]; then
echo "安裝 GPU 版本: $PADDLE_PACKAGE"
pip install "$PADDLE_PACKAGE" -i "$PADDLE_INDEX"
# WSL CUDA 路徑配置
if grep -qi microsoft /proc/version && [ -d "/usr/lib/wsl/lib" ]; then
if ! grep -q "export LD_LIBRARY_PATH=/usr/lib/wsl/lib" ~/.bashrc; then
echo "" >> ~/.bashrc
echo "# WSL CUDA Libraries Path (Added by Tool_OCR setup)" >> ~/.bashrc
echo "export LD_LIBRARY_PATH=/usr/lib/wsl/lib:\$LD_LIBRARY_PATH" >> ~/.bashrc
fi
export LD_LIBRARY_PATH=/usr/lib/wsl/lib:$LD_LIBRARY_PATH
print_success "WSL CUDA 路徑已配置"
fi
else
echo "安裝 CPU 版本..."
pip install 'paddlepaddle>=3.2.1'
fi
echo ""
echo "安裝其他 Python 依賴..."
pip install -r "$SCRIPT_DIR/requirements.txt"
# Step 8: 驗證安裝
print_step 8 10 "驗證安裝..."
python -c "import magic; print('✓ python-magic')" || print_warning "python-magic 未正確安裝"
python -c "from weasyprint import HTML; print('✓ WeasyPrint')" || print_warning "WeasyPrint 未正確安裝"
python -c "import cv2; print('✓ OpenCV')" || print_warning "OpenCV 未正確安裝"
python -c "import paddle; print('✓ PaddlePaddle', paddle.__version__)" || print_warning "PaddlePaddle 未正確安裝"
# Step 9: 前端依賴
print_step 9 10 "安裝前端依賴..."
cd "$SCRIPT_DIR/frontend"
if [ -d "node_modules" ]; then
echo "清理現有 node_modules..."
rm -rf node_modules package-lock.json
fi
npm cache clean --force
npm install --force
cd "$SCRIPT_DIR"
print_success "前端依賴已安裝"
# Step 10: 創建目錄和初始化數據庫
print_step 10 10 "創建目錄和初始化數據庫..."
mkdir -p backend/uploads/{temp,processed,images}
mkdir -p backend/storage/{markdown,json,exports,results}
mkdir -p backend/models/paddleocr
mkdir -p backend/logs
init_database
# ===== 完成 =====
echo ""
echo -e "${GREEN}================================${NC}"
echo -e "${GREEN} 環境設置完成!${NC}"
echo -e "${GREEN}================================${NC}"
echo ""
echo "系統配置:"
if [ "$USE_GPU" = true ]; then
echo -e " GPU 加速: ${GREEN}已啟用${NC}"
echo " PaddlePaddle: GPU 版本"
else
echo -e " GPU 加速: ${YELLOW}未啟用 (CPU 模式)${NC}"
echo " PaddlePaddle: CPU 版本"
fi
echo ""
echo "下一步操作:"
echo ""
echo "1. 配置環境變量 (如尚未配置):"
echo " cp .env.example .env.local"
echo " # 編輯 .env.local 填入實際配置"
echo ""
echo "2. 啟動應用:"
echo " ./start.sh # 同時啟動前後端"
echo " ./start.sh backend # 僅啟動後端"
echo " ./start.sh frontend # 僅啟動前端"
echo ""
echo "3. 訪問應用:"
echo " 前端: http://localhost:5173"
echo " API文檔: http://localhost:8000/docs"
echo ""
if [ "$USE_GPU" = true ] && grep -qi microsoft /proc/version; then
echo -e "${YELLOW}注意: WSL GPU 用戶需要重新啟動終端以應用 CUDA 路徑配置${NC}"
echo ""
fi