Files
OCR/setup_dev_env.sh
egg 3f41a33877 docs: update documentation for chart recognition enablement
Updates all project documentation to reflect that chart recognition
is now fully enabled with PaddlePaddle 3.2.1+.

Changes:
- README.md: Remove Known Limitations section about chart recognition,
  update tech stack and prerequisites to include PaddlePaddle 3.2.1+,
  add WSL CUDA configuration notes
- openspec/project.md: Add comprehensive chart recognition feature
  descriptions, update system requirements for GPU/CUDA support
- openspec/changes/add-gpu-acceleration-support/tasks.md: Mark task
  5.4 as completed with resolution details
- openspec/changes/add-gpu-acceleration-support/proposal.md: Update
  Known Issues section to show chart recognition is now resolved
- setup_dev_env.sh: Upgrade PaddlePaddle from 3.0.0 to 3.2.1+, add
  WSL CUDA library path configuration, add chart recognition API
  verification

All documentation now accurately reflects:
 Chart recognition fully enabled
 PaddlePaddle 3.2.1+ with fused_rms_norm_ext API
 WSL CUDA path auto-configuration
 Comprehensive PP-StructureV3 capabilities

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 19:04:30 +08:00

341 lines
10 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Tool_OCR WSL Ubuntu 開發環境設置腳本
set -e # 遇到錯誤時停止
echo "================================"
echo "Tool_OCR 開發環境設置"
echo "================================"
echo ""
# 顏色定義
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# 檢查是否為 root
if [ "$EUID" -eq 0 ]; then
echo -e "${RED}請不要使用 sudo 運行此腳本${NC}"
echo "腳本會在需要時提示輸入 sudo 密碼"
exit 1
fi
echo -e "${YELLOW}[1/8] 更新系統套件列表...${NC}"
sudo apt update
echo ""
echo -e "${YELLOW}[2/8] 安裝 Python 開發工具...${NC}"
sudo apt install -y \
python3-pip \
python3-venv \
python3-dev \
build-essential \
pkg-config
echo ""
echo -e "${YELLOW}[3/8] 安裝系統層級依賴...${NC}"
sudo apt install -y \
pandoc \
libmagic1 \
libmagic-dev \
fonts-noto-cjk \
fonts-noto-cjk-extra \
fonts-liberation \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libcairo2 \
libcairo2-dev \
libgdk-pixbuf2.0-0 \
libgdk-pixbuf-2.0-dev \
libffi-dev \
libffi8 \
shared-mime-info \
poppler-utils \
libgl1 \
libglib2.0-0 \
libglib2.0-dev \
libgomp1 \
libjpeg-dev \
libpng-dev \
libtiff-dev \
libopencv-dev \
libsqlite3-dev \
libreoffice-core-nogui \
libreoffice-writer-nogui \
libreoffice-impress-nogui \
ca-certificates \
curl \
wget \
libxml2 \
libxslt1-dev \
python3-cffi
echo ""
echo -e "${YELLOW}[4/8] 安裝 Node.js 和 npm...${NC}"
# 檢查是否已安裝 nvm
if [ ! -d "$HOME/.nvm" ]; then
echo "安裝 nvm..."
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash
# 載入 nvm
export NVM_DIR="$HOME/.nvm"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
else
echo "nvm 已安裝"
export NVM_DIR="$HOME/.nvm"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
fi
# 安裝 Node.js LTS
echo "安裝 Node.js LTS..."
nvm install --lts
nvm use --lts
echo ""
echo -e "${YELLOW}[5/8] 創建 Python 虛擬環境...${NC}"
if [ ! -d "venv" ]; then
python3 -m venv venv
echo "虛擬環境已創建"
else
echo "虛擬環境已存在"
fi
echo ""
echo -e "${YELLOW}[6/9] 偵測 GPU 和 CUDA 支援...${NC}"
# GPU 偵測函數
detect_gpu() {
# 檢查是否有 NVIDIA GPU
if command -v nvidia-smi &> /dev/null; then
echo -e "${GREEN}✓ 偵測到 NVIDIA GPU${NC}"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
# 獲取 CUDA 版本
CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}')
if [ -n "$CUDA_VERSION" ]; then
echo -e "${GREEN}✓ CUDA 版本: $CUDA_VERSION${NC}"
# 根據 CUDA 版本選擇對應的 PaddlePaddle
CUDA_MAJOR=$(echo $CUDA_VERSION | cut -d. -f1)
CUDA_MINOR=$(echo $CUDA_VERSION | cut -d. -f2)
if [ "$CUDA_MAJOR" -ge 13 ]; then
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 13.x)"
echo "使用版本 3.2.1+ (兼容 CUDA 12.6+, 支援圖表識別)"
USE_GPU=true
PADDLE_PACKAGE="paddlepaddle-gpu>=3.2.1"
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu126/"
elif [ "$CUDA_MAJOR" -eq 12 ]; then
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 12.x)"
echo "使用版本 3.2.1+ (兼容 CUDA 12.3+, 支援圖表識別)"
USE_GPU=true
PADDLE_PACKAGE="paddlepaddle-gpu>=3.2.1"
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu123/"
elif [ "$CUDA_MAJOR" -eq 11 ]; then
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 11.x)"
echo "使用版本 3.2.1+ (兼容 CUDA 11.8+, 支援圖表識別)"
USE_GPU=true
PADDLE_PACKAGE="paddlepaddle-gpu>=3.2.1"
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu118/"
else
echo -e "${YELLOW}⚠ CUDA 版本不支援 ($CUDA_VERSION)${NC}"
echo "將安裝 CPU 版本"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
fi
else
echo -e "${YELLOW}⚠ 無法獲取 CUDA 版本${NC}"
echo "將安裝 CPU 版本"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
fi
else
echo -e "${YELLOW} 未偵測到 NVIDIA GPU 或 nvidia-smi${NC}"
echo "將安裝 CPU 版本的 PaddlePaddle"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle>=3.2.1"
fi
}
# 執行 GPU 偵測
detect_gpu
echo ""
echo -e "${YELLOW}[7/9] 安裝 Python 依賴...${NC}"
source venv/bin/activate
pip install --upgrade pip setuptools wheel
# 先安裝 PaddlePaddle
echo ""
echo -e "${YELLOW}安裝 PaddlePaddle...${NC}"
if [ "$USE_GPU" = true ]; then
echo "安裝 GPU 加速版本: $PADDLE_PACKAGE"
if [ -n "$PADDLE_INDEX" ]; then
echo "使用官方索引: $PADDLE_INDEX"
pip install "$PADDLE_PACKAGE" -i "$PADDLE_INDEX"
else
pip install "$PADDLE_PACKAGE"
fi
else
echo "安裝 CPU 版本..."
pip install 'paddlepaddle>=3.2.1'
fi
# WSL CUDA 路徑配置 (針對 WSL GPU 用戶)
if [ "$USE_GPU" = true ]; then
echo ""
echo -e "${YELLOW}配置 WSL CUDA 庫路徑...${NC}"
# 檢查是否在 WSL 環境中
if grep -qi microsoft /proc/version; then
echo "偵測到 WSL 環境,配置 CUDA 庫路徑..."
# 檢查 CUDA 庫是否存在於 WSL 路徑
if [ -d "/usr/lib/wsl/lib" ]; then
# 檢查 ~/.bashrc 中是否已包含此配置
if ! grep -q "export LD_LIBRARY_PATH=/usr/lib/wsl/lib" ~/.bashrc; then
echo "" >> ~/.bashrc
echo "# WSL CUDA Libraries Path for PaddlePaddle GPU support (Added by Tool_OCR setup)" >> ~/.bashrc
echo "export LD_LIBRARY_PATH=/usr/lib/wsl/lib:\$LD_LIBRARY_PATH" >> ~/.bashrc
echo "✓ 已將 WSL CUDA 路徑添加到 ~/.bashrc"
else
echo "✓ WSL CUDA 路徑已存在於 ~/.bashrc"
fi
# 立即應用到當前 session
export LD_LIBRARY_PATH=/usr/lib/wsl/lib:$LD_LIBRARY_PATH
echo "✓ 已應用 CUDA 路徑到當前環境"
else
echo -e "${YELLOW}⚠ 未找到 /usr/lib/wsl/lib 目錄${NC}"
echo "請確認 NVIDIA CUDA 驅動已安裝於 WSL"
fi
else
echo "非 WSL 環境,跳過 WSL CUDA 路徑配置"
fi
fi
# 安裝其他依賴(跳過 requirements.txt 中的 paddlepaddle
echo ""
echo -e "${YELLOW}安裝其他 Python 依賴...${NC}"
pip install -r requirements.txt
echo ""
echo -e "${YELLOW}測試關鍵套件...${NC}"
python -c "import magic; print('✓ python-magic')" || echo "✗ python-magic failed"
python -c "from weasyprint import HTML; print('✓ WeasyPrint')" || echo "✗ WeasyPrint failed"
python -c "import cv2; print('✓ OpenCV')" || echo "✗ OpenCV failed"
# 驗證 PaddlePaddle GPU 可用性
echo ""
echo -e "${YELLOW}驗證 PaddlePaddle 設置...${NC}"
python -c "
import paddle
print('✓ PaddlePaddle 版本:', paddle.__version__)
try:
if paddle.is_compiled_with_cuda():
gpu_count = paddle.device.cuda.device_count()
if gpu_count > 0:
print('✓ GPU 加速: 已啟用')
print('✓ GPU 數量:', gpu_count)
for i in range(gpu_count):
gpu_name = paddle.device.cuda.get_device_properties(i).name
print(f' GPU {i}: {gpu_name}')
else:
print(' GPU 加速: CUDA 已編譯但無可用 GPU')
else:
print(' GPU 加速: 未啟用 (CPU 模式)')
except Exception as e:
print('⚠ GPU 檢測失敗:', str(e))
print(' 將使用 CPU 模式')
" || echo "⚠ PaddlePaddle 驗證失敗,但可繼續使用"
# 驗證圖表識別 API 可用性
echo ""
echo -e "${YELLOW}驗證圖表識別 API...${NC}"
python -c "
import paddle.incubate.nn.functional as F
# 檢查 API 可用性
has_base = hasattr(F, 'fused_rms_norm')
has_ext = hasattr(F, 'fused_rms_norm_ext')
print('📊 圖表識別 API 檢查:')
print(f' - fused_rms_norm: {'✅ 可用' if has_base else '❌ 不可用'}')
print(f' - fused_rms_norm_ext: {'✅ 可用' if has_ext else '❌ 不可用'}')
if has_ext:
print('🎉 圖表識別功能: ✅ 可啟用')
else:
print('⚠️ 圖表識別功能: ❌ 不可用 (需要 PaddlePaddle 3.2.0+)')
" || echo "⚠ 圖表識別 API 驗證失敗"
echo ""
echo -e "${YELLOW}[8/9] 安裝前端依賴...${NC}"
cd frontend
# 清理可能存在的鎖定文件
if [ -d "node_modules" ]; then
echo "清理現有 node_modules..."
rm -rf node_modules package-lock.json
fi
# 清理 npm 緩存
npm cache clean --force
# 安裝依賴(使用 --force 避免鎖定問題)
echo "安裝前端依賴..."
npm install --force
cd ..
echo ""
echo -e "${YELLOW}[9/9] 創建必要的目錄...${NC}"
mkdir -p backend/uploads/{temp,processed,images}
mkdir -p backend/storage/{markdown,json,exports}
mkdir -p backend/models/paddleocr
mkdir -p backend/logs
echo ""
echo -e "${GREEN}================================${NC}"
echo -e "${GREEN}環境設置完成!${NC}"
echo -e "${GREEN}================================${NC}"
echo ""
echo "系統配置:"
if [ "$USE_GPU" = true ]; then
echo -e " GPU 加速: ${GREEN}已啟用${NC}"
echo " PaddlePaddle: GPU 版本 (3.2.1+)"
echo -e " 圖表識別: ${GREEN}已啟用${NC}"
if grep -qi microsoft /proc/version; then
echo " WSL CUDA 路徑: 已配置於 ~/.bashrc"
fi
else
echo -e " GPU 加速: ${YELLOW}未啟用 (CPU 模式)${NC}"
echo " PaddlePaddle: CPU 版本 (3.2.1+)"
echo -e " 圖表識別: ${GREEN}已啟用${NC} (CPU 模式)"
fi
echo ""
echo "下一步操作:"
echo "1. 初始化數據庫:"
echo " source venv/bin/activate"
echo " cd backend"
echo " alembic upgrade head"
echo " python create_test_user.py"
echo " cd .."
echo ""
echo "2. 啟動後端:"
echo " ./start_backend.sh"
echo ""
echo "3. 啟動前端 (新終端):"
echo " ./start_frontend.sh"
echo ""
echo "4. 訪問應用:"
echo " 前端: http://localhost:5173"
echo " API文檔: http://localhost:8000/docs"
echo " 健康檢查: http://localhost:8000/health"
echo ""
if [ "$USE_GPU" = true ] && grep -qi microsoft /proc/version; then
echo "注意: WSL GPU 用戶需要重新啟動終端或執行 'source ~/.bashrc' 以應用 CUDA 路徑配置"
echo ""
fi