PaddleOCR-VL chart recognition model requires `fused_rms_norm_ext` API which is not available in PaddlePaddle 3.0.0 stable release. Changes: - Set use_chart_recognition=False in PP-StructureV3 initialization - Remove unsupported show_log parameter from PaddleOCR 3.x API calls - Document known limitation in openspec proposal - Add limitation documentation to README - Update tasks.md with documentation task for known issues Impact: - Layout analysis still detects/extracts charts as images ✓ - Tables, formulas, and text recognition work normally ✓ - Deep chart understanding (type detection, data extraction) disabled ✗ - Chart to structured data conversion disabled ✗ Workaround: Charts saved as image files for manual review 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
279 lines
8.0 KiB
Bash
Executable File
279 lines
8.0 KiB
Bash
Executable File
#!/bin/bash
|
||
# Tool_OCR WSL Ubuntu 開發環境設置腳本
|
||
|
||
set -e # 遇到錯誤時停止
|
||
|
||
echo "================================"
|
||
echo "Tool_OCR 開發環境設置"
|
||
echo "================================"
|
||
echo ""
|
||
|
||
# 顏色定義
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
RED='\033[0;31m'
|
||
NC='\033[0m' # No Color
|
||
|
||
# 檢查是否為 root
|
||
if [ "$EUID" -eq 0 ]; then
|
||
echo -e "${RED}請不要使用 sudo 運行此腳本${NC}"
|
||
echo "腳本會在需要時提示輸入 sudo 密碼"
|
||
exit 1
|
||
fi
|
||
|
||
echo -e "${YELLOW}[1/8] 更新系統套件列表...${NC}"
|
||
sudo apt update
|
||
|
||
echo ""
|
||
echo -e "${YELLOW}[2/8] 安裝 Python 開發工具...${NC}"
|
||
sudo apt install -y \
|
||
python3-pip \
|
||
python3-venv \
|
||
python3-dev \
|
||
build-essential \
|
||
pkg-config
|
||
|
||
echo ""
|
||
echo -e "${YELLOW}[3/8] 安裝系統層級依賴...${NC}"
|
||
sudo apt install -y \
|
||
pandoc \
|
||
libmagic1 \
|
||
libmagic-dev \
|
||
fonts-noto-cjk \
|
||
fonts-noto-cjk-extra \
|
||
fonts-liberation \
|
||
libpango-1.0-0 \
|
||
libpangocairo-1.0-0 \
|
||
libcairo2 \
|
||
libcairo2-dev \
|
||
libgdk-pixbuf2.0-0 \
|
||
libgdk-pixbuf-2.0-dev \
|
||
libffi-dev \
|
||
libffi8 \
|
||
shared-mime-info \
|
||
poppler-utils \
|
||
libgl1 \
|
||
libglib2.0-0 \
|
||
libglib2.0-dev \
|
||
libgomp1 \
|
||
libjpeg-dev \
|
||
libpng-dev \
|
||
libtiff-dev \
|
||
libopencv-dev \
|
||
libsqlite3-dev \
|
||
libreoffice-core-nogui \
|
||
libreoffice-writer-nogui \
|
||
libreoffice-impress-nogui \
|
||
ca-certificates \
|
||
curl \
|
||
wget \
|
||
libxml2 \
|
||
libxslt1-dev \
|
||
python3-cffi
|
||
|
||
echo ""
|
||
echo -e "${YELLOW}[4/8] 安裝 Node.js 和 npm...${NC}"
|
||
# 檢查是否已安裝 nvm
|
||
if [ ! -d "$HOME/.nvm" ]; then
|
||
echo "安裝 nvm..."
|
||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash
|
||
|
||
# 載入 nvm
|
||
export NVM_DIR="$HOME/.nvm"
|
||
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
||
else
|
||
echo "nvm 已安裝"
|
||
export NVM_DIR="$HOME/.nvm"
|
||
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
||
fi
|
||
|
||
# 安裝 Node.js LTS
|
||
echo "安裝 Node.js LTS..."
|
||
nvm install --lts
|
||
nvm use --lts
|
||
|
||
echo ""
|
||
echo -e "${YELLOW}[5/8] 創建 Python 虛擬環境...${NC}"
|
||
if [ ! -d "venv" ]; then
|
||
python3 -m venv venv
|
||
echo "虛擬環境已創建"
|
||
else
|
||
echo "虛擬環境已存在"
|
||
fi
|
||
|
||
echo ""
|
||
echo -e "${YELLOW}[6/9] 偵測 GPU 和 CUDA 支援...${NC}"
|
||
|
||
# GPU 偵測函數
|
||
detect_gpu() {
|
||
# 檢查是否有 NVIDIA GPU
|
||
if command -v nvidia-smi &> /dev/null; then
|
||
echo -e "${GREEN}✓ 偵測到 NVIDIA GPU${NC}"
|
||
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
|
||
|
||
# 獲取 CUDA 版本
|
||
CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}')
|
||
if [ -n "$CUDA_VERSION" ]; then
|
||
echo -e "${GREEN}✓ CUDA 版本: $CUDA_VERSION${NC}"
|
||
|
||
# 根據 CUDA 版本選擇對應的 PaddlePaddle
|
||
CUDA_MAJOR=$(echo $CUDA_VERSION | cut -d. -f1)
|
||
CUDA_MINOR=$(echo $CUDA_VERSION | cut -d. -f2)
|
||
|
||
if [ "$CUDA_MAJOR" -ge 13 ]; then
|
||
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 13.x)"
|
||
echo "使用穩定版本 3.0.0 (兼容 CUDA 12.6+)"
|
||
USE_GPU=true
|
||
PADDLE_PACKAGE="paddlepaddle-gpu==3.0.0"
|
||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu126/"
|
||
elif [ "$CUDA_MAJOR" -eq 12 ]; then
|
||
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 12.x)"
|
||
echo "使用穩定版本 3.0.0 (兼容 CUDA 12.3+)"
|
||
USE_GPU=true
|
||
PADDLE_PACKAGE="paddlepaddle-gpu==3.0.0"
|
||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu123/"
|
||
elif [ "$CUDA_MAJOR" -eq 11 ]; then
|
||
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 11.x)"
|
||
echo "使用穩定版本 3.0.0 (兼容 CUDA 11.8+)"
|
||
USE_GPU=true
|
||
PADDLE_PACKAGE="paddlepaddle-gpu==3.0.0"
|
||
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu118/"
|
||
else
|
||
echo -e "${YELLOW}⚠ CUDA 版本不支援 ($CUDA_VERSION)${NC}"
|
||
echo "將安裝 CPU 版本"
|
||
USE_GPU=false
|
||
PADDLE_PACKAGE="paddlepaddle"
|
||
fi
|
||
else
|
||
echo -e "${YELLOW}⚠ 無法獲取 CUDA 版本${NC}"
|
||
echo "將安裝 CPU 版本"
|
||
USE_GPU=false
|
||
PADDLE_PACKAGE="paddlepaddle"
|
||
fi
|
||
else
|
||
echo -e "${YELLOW}ℹ 未偵測到 NVIDIA GPU 或 nvidia-smi${NC}"
|
||
echo "將安裝 CPU 版本的 PaddlePaddle"
|
||
USE_GPU=false
|
||
PADDLE_PACKAGE="paddlepaddle"
|
||
fi
|
||
}
|
||
|
||
# 執行 GPU 偵測
|
||
detect_gpu
|
||
|
||
echo ""
|
||
echo -e "${YELLOW}[7/9] 安裝 Python 依賴...${NC}"
|
||
source venv/bin/activate
|
||
pip install --upgrade pip setuptools wheel
|
||
|
||
# 先安裝 PaddlePaddle
|
||
echo ""
|
||
echo -e "${YELLOW}安裝 PaddlePaddle...${NC}"
|
||
if [ "$USE_GPU" = true ]; then
|
||
echo "安裝 GPU 加速版本: $PADDLE_PACKAGE"
|
||
if [ -n "$PADDLE_INDEX" ]; then
|
||
echo "使用官方索引: $PADDLE_INDEX"
|
||
pip install "$PADDLE_PACKAGE" -i "$PADDLE_INDEX"
|
||
else
|
||
pip install "$PADDLE_PACKAGE"
|
||
fi
|
||
else
|
||
echo "安裝 CPU 版本..."
|
||
pip install paddlepaddle
|
||
fi
|
||
|
||
# 安裝其他依賴(跳過 requirements.txt 中的 paddlepaddle)
|
||
echo ""
|
||
echo -e "${YELLOW}安裝其他 Python 依賴...${NC}"
|
||
pip install -r requirements.txt
|
||
|
||
echo ""
|
||
echo -e "${YELLOW}測試關鍵套件...${NC}"
|
||
python -c "import magic; print('✓ python-magic')" || echo "✗ python-magic failed"
|
||
python -c "from weasyprint import HTML; print('✓ WeasyPrint')" || echo "✗ WeasyPrint failed"
|
||
python -c "import cv2; print('✓ OpenCV')" || echo "✗ OpenCV failed"
|
||
|
||
# 驗證 PaddlePaddle GPU 可用性
|
||
echo ""
|
||
echo -e "${YELLOW}驗證 PaddlePaddle 設置...${NC}"
|
||
python -c "
|
||
import paddle
|
||
print('✓ PaddlePaddle 版本:', paddle.__version__)
|
||
try:
|
||
if paddle.is_compiled_with_cuda():
|
||
gpu_count = paddle.device.cuda.device_count()
|
||
if gpu_count > 0:
|
||
print('✓ GPU 加速: 已啟用')
|
||
print('✓ GPU 數量:', gpu_count)
|
||
for i in range(gpu_count):
|
||
gpu_name = paddle.device.cuda.get_device_properties(i).name
|
||
print(f' GPU {i}: {gpu_name}')
|
||
else:
|
||
print('ℹ GPU 加速: CUDA 已編譯但無可用 GPU')
|
||
else:
|
||
print('ℹ GPU 加速: 未啟用 (CPU 模式)')
|
||
except Exception as e:
|
||
print('⚠ GPU 檢測失敗:', str(e))
|
||
print('ℹ 將使用 CPU 模式')
|
||
" || echo "⚠ PaddlePaddle 驗證失敗,但可繼續使用"
|
||
|
||
echo ""
|
||
echo -e "${YELLOW}[8/9] 安裝前端依賴...${NC}"
|
||
cd frontend
|
||
|
||
# 清理可能存在的鎖定文件
|
||
if [ -d "node_modules" ]; then
|
||
echo "清理現有 node_modules..."
|
||
rm -rf node_modules package-lock.json
|
||
fi
|
||
|
||
# 清理 npm 緩存
|
||
npm cache clean --force
|
||
|
||
# 安裝依賴(使用 --force 避免鎖定問題)
|
||
echo "安裝前端依賴..."
|
||
npm install --force
|
||
|
||
cd ..
|
||
|
||
echo ""
|
||
echo -e "${YELLOW}[9/9] 創建必要的目錄...${NC}"
|
||
mkdir -p backend/uploads/{temp,processed,images}
|
||
mkdir -p backend/storage/{markdown,json,exports}
|
||
mkdir -p backend/models/paddleocr
|
||
mkdir -p backend/logs
|
||
|
||
echo ""
|
||
echo -e "${GREEN}================================${NC}"
|
||
echo -e "${GREEN}環境設置完成!${NC}"
|
||
echo -e "${GREEN}================================${NC}"
|
||
echo ""
|
||
echo "系統配置:"
|
||
if [ "$USE_GPU" = true ]; then
|
||
echo -e " GPU 加速: ${GREEN}已啟用${NC}"
|
||
echo " PaddlePaddle: GPU 版本"
|
||
else
|
||
echo -e " GPU 加速: ${YELLOW}未啟用 (CPU 模式)${NC}"
|
||
echo " PaddlePaddle: CPU 版本"
|
||
fi
|
||
echo ""
|
||
echo "下一步操作:"
|
||
echo "1. 初始化數據庫:"
|
||
echo " source venv/bin/activate"
|
||
echo " cd backend"
|
||
echo " alembic upgrade head"
|
||
echo " python create_test_user.py"
|
||
echo " cd .."
|
||
echo ""
|
||
echo "2. 啟動後端:"
|
||
echo " ./start_backend.sh"
|
||
echo ""
|
||
echo "3. 啟動前端 (新終端):"
|
||
echo " ./start_frontend.sh"
|
||
echo ""
|
||
echo "4. 訪問應用:"
|
||
echo " 前端: http://localhost:5173"
|
||
echo " API文檔: http://localhost:8000/docs"
|
||
echo " 健康檢查: http://localhost:8000/health"
|
||
echo ""
|