Files
OCR/setup_dev_env.sh
egg b048f2d640 fix: disable chart recognition due to PaddlePaddle 3.0.0 API limitation
PaddleOCR-VL chart recognition model requires `fused_rms_norm_ext` API
which is not available in PaddlePaddle 3.0.0 stable release.

Changes:
- Set use_chart_recognition=False in PP-StructureV3 initialization
- Remove unsupported show_log parameter from PaddleOCR 3.x API calls
- Document known limitation in openspec proposal
- Add limitation documentation to README
- Update tasks.md with documentation task for known issues

Impact:
- Layout analysis still detects/extracts charts as images ✓
- Tables, formulas, and text recognition work normally ✓
- Deep chart understanding (type detection, data extraction) disabled ✗
- Chart to structured data conversion disabled ✗

Workaround: Charts saved as image files for manual review

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-14 13:16:17 +08:00

279 lines
8.0 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# Tool_OCR WSL Ubuntu 開發環境設置腳本
set -e # 遇到錯誤時停止
echo "================================"
echo "Tool_OCR 開發環境設置"
echo "================================"
echo ""
# 顏色定義
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# 檢查是否為 root
if [ "$EUID" -eq 0 ]; then
echo -e "${RED}請不要使用 sudo 運行此腳本${NC}"
echo "腳本會在需要時提示輸入 sudo 密碼"
exit 1
fi
echo -e "${YELLOW}[1/8] 更新系統套件列表...${NC}"
sudo apt update
echo ""
echo -e "${YELLOW}[2/8] 安裝 Python 開發工具...${NC}"
sudo apt install -y \
python3-pip \
python3-venv \
python3-dev \
build-essential \
pkg-config
echo ""
echo -e "${YELLOW}[3/8] 安裝系統層級依賴...${NC}"
sudo apt install -y \
pandoc \
libmagic1 \
libmagic-dev \
fonts-noto-cjk \
fonts-noto-cjk-extra \
fonts-liberation \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libcairo2 \
libcairo2-dev \
libgdk-pixbuf2.0-0 \
libgdk-pixbuf-2.0-dev \
libffi-dev \
libffi8 \
shared-mime-info \
poppler-utils \
libgl1 \
libglib2.0-0 \
libglib2.0-dev \
libgomp1 \
libjpeg-dev \
libpng-dev \
libtiff-dev \
libopencv-dev \
libsqlite3-dev \
libreoffice-core-nogui \
libreoffice-writer-nogui \
libreoffice-impress-nogui \
ca-certificates \
curl \
wget \
libxml2 \
libxslt1-dev \
python3-cffi
echo ""
echo -e "${YELLOW}[4/8] 安裝 Node.js 和 npm...${NC}"
# 檢查是否已安裝 nvm
if [ ! -d "$HOME/.nvm" ]; then
echo "安裝 nvm..."
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash
# 載入 nvm
export NVM_DIR="$HOME/.nvm"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
else
echo "nvm 已安裝"
export NVM_DIR="$HOME/.nvm"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
fi
# 安裝 Node.js LTS
echo "安裝 Node.js LTS..."
nvm install --lts
nvm use --lts
echo ""
echo -e "${YELLOW}[5/8] 創建 Python 虛擬環境...${NC}"
if [ ! -d "venv" ]; then
python3 -m venv venv
echo "虛擬環境已創建"
else
echo "虛擬環境已存在"
fi
echo ""
echo -e "${YELLOW}[6/9] 偵測 GPU 和 CUDA 支援...${NC}"
# GPU 偵測函數
detect_gpu() {
# 檢查是否有 NVIDIA GPU
if command -v nvidia-smi &> /dev/null; then
echo -e "${GREEN}✓ 偵測到 NVIDIA GPU${NC}"
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader
# 獲取 CUDA 版本
CUDA_VERSION=$(nvidia-smi | grep "CUDA Version" | awk '{print $9}')
if [ -n "$CUDA_VERSION" ]; then
echo -e "${GREEN}✓ CUDA 版本: $CUDA_VERSION${NC}"
# 根據 CUDA 版本選擇對應的 PaddlePaddle
CUDA_MAJOR=$(echo $CUDA_VERSION | cut -d. -f1)
CUDA_MINOR=$(echo $CUDA_VERSION | cut -d. -f2)
if [ "$CUDA_MAJOR" -ge 13 ]; then
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 13.x)"
echo "使用穩定版本 3.0.0 (兼容 CUDA 12.6+)"
USE_GPU=true
PADDLE_PACKAGE="paddlepaddle-gpu==3.0.0"
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu126/"
elif [ "$CUDA_MAJOR" -eq 12 ]; then
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 12.x)"
echo "使用穩定版本 3.0.0 (兼容 CUDA 12.3+)"
USE_GPU=true
PADDLE_PACKAGE="paddlepaddle-gpu==3.0.0"
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu123/"
elif [ "$CUDA_MAJOR" -eq 11 ]; then
echo "將安裝 PaddlePaddle GPU 版本 (CUDA 11.x)"
echo "使用穩定版本 3.0.0 (兼容 CUDA 11.8+)"
USE_GPU=true
PADDLE_PACKAGE="paddlepaddle-gpu==3.0.0"
PADDLE_INDEX="https://www.paddlepaddle.org.cn/packages/stable/cu118/"
else
echo -e "${YELLOW}⚠ CUDA 版本不支援 ($CUDA_VERSION)${NC}"
echo "將安裝 CPU 版本"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle"
fi
else
echo -e "${YELLOW}⚠ 無法獲取 CUDA 版本${NC}"
echo "將安裝 CPU 版本"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle"
fi
else
echo -e "${YELLOW} 未偵測到 NVIDIA GPU 或 nvidia-smi${NC}"
echo "將安裝 CPU 版本的 PaddlePaddle"
USE_GPU=false
PADDLE_PACKAGE="paddlepaddle"
fi
}
# 執行 GPU 偵測
detect_gpu
echo ""
echo -e "${YELLOW}[7/9] 安裝 Python 依賴...${NC}"
source venv/bin/activate
pip install --upgrade pip setuptools wheel
# 先安裝 PaddlePaddle
echo ""
echo -e "${YELLOW}安裝 PaddlePaddle...${NC}"
if [ "$USE_GPU" = true ]; then
echo "安裝 GPU 加速版本: $PADDLE_PACKAGE"
if [ -n "$PADDLE_INDEX" ]; then
echo "使用官方索引: $PADDLE_INDEX"
pip install "$PADDLE_PACKAGE" -i "$PADDLE_INDEX"
else
pip install "$PADDLE_PACKAGE"
fi
else
echo "安裝 CPU 版本..."
pip install paddlepaddle
fi
# 安裝其他依賴(跳過 requirements.txt 中的 paddlepaddle
echo ""
echo -e "${YELLOW}安裝其他 Python 依賴...${NC}"
pip install -r requirements.txt
echo ""
echo -e "${YELLOW}測試關鍵套件...${NC}"
python -c "import magic; print('✓ python-magic')" || echo "✗ python-magic failed"
python -c "from weasyprint import HTML; print('✓ WeasyPrint')" || echo "✗ WeasyPrint failed"
python -c "import cv2; print('✓ OpenCV')" || echo "✗ OpenCV failed"
# 驗證 PaddlePaddle GPU 可用性
echo ""
echo -e "${YELLOW}驗證 PaddlePaddle 設置...${NC}"
python -c "
import paddle
print('✓ PaddlePaddle 版本:', paddle.__version__)
try:
if paddle.is_compiled_with_cuda():
gpu_count = paddle.device.cuda.device_count()
if gpu_count > 0:
print('✓ GPU 加速: 已啟用')
print('✓ GPU 數量:', gpu_count)
for i in range(gpu_count):
gpu_name = paddle.device.cuda.get_device_properties(i).name
print(f' GPU {i}: {gpu_name}')
else:
print(' GPU 加速: CUDA 已編譯但無可用 GPU')
else:
print(' GPU 加速: 未啟用 (CPU 模式)')
except Exception as e:
print('⚠ GPU 檢測失敗:', str(e))
print(' 將使用 CPU 模式')
" || echo "⚠ PaddlePaddle 驗證失敗,但可繼續使用"
echo ""
echo -e "${YELLOW}[8/9] 安裝前端依賴...${NC}"
cd frontend
# 清理可能存在的鎖定文件
if [ -d "node_modules" ]; then
echo "清理現有 node_modules..."
rm -rf node_modules package-lock.json
fi
# 清理 npm 緩存
npm cache clean --force
# 安裝依賴(使用 --force 避免鎖定問題)
echo "安裝前端依賴..."
npm install --force
cd ..
echo ""
echo -e "${YELLOW}[9/9] 創建必要的目錄...${NC}"
mkdir -p backend/uploads/{temp,processed,images}
mkdir -p backend/storage/{markdown,json,exports}
mkdir -p backend/models/paddleocr
mkdir -p backend/logs
echo ""
echo -e "${GREEN}================================${NC}"
echo -e "${GREEN}環境設置完成!${NC}"
echo -e "${GREEN}================================${NC}"
echo ""
echo "系統配置:"
if [ "$USE_GPU" = true ]; then
echo -e " GPU 加速: ${GREEN}已啟用${NC}"
echo " PaddlePaddle: GPU 版本"
else
echo -e " GPU 加速: ${YELLOW}未啟用 (CPU 模式)${NC}"
echo " PaddlePaddle: CPU 版本"
fi
echo ""
echo "下一步操作:"
echo "1. 初始化數據庫:"
echo " source venv/bin/activate"
echo " cd backend"
echo " alembic upgrade head"
echo " python create_test_user.py"
echo " cd .."
echo ""
echo "2. 啟動後端:"
echo " ./start_backend.sh"
echo ""
echo "3. 啟動前端 (新終端):"
echo " ./start_frontend.sh"
echo ""
echo "4. 訪問應用:"
echo " 前端: http://localhost:5173"
echo " API文檔: http://localhost:8000/docs"
echo " 健康檢查: http://localhost:8000/health"
echo ""