feat: migrate to WSL Ubuntu native development environment
從 Docker/macOS+Conda 部署遷移到 WSL2 Ubuntu 原生開發環境 主要變更: - 移除所有 Docker 相關配置檔案 (Dockerfile, docker-compose.yml, .dockerignore 等) - 移除 macOS/Conda 設置腳本 (SETUP.md, setup_conda.sh) - 新增 WSL Ubuntu 自動化環境設置腳本 (setup_dev_env.sh) - 新增後端/前端快速啟動腳本 (start_backend.sh, start_frontend.sh) - 統一開發端口配置 (backend: 8000, frontend: 5173) - 改進資料庫連接穩定性(連接池、超時設置、重試機制) - 更新專案文檔以反映當前 WSL 開發環境 Technical improvements: - Database connection pooling with health checks and auto-reconnection - Retry logic for long-running OCR tasks to prevent DB timeouts - Extended JWT token expiration to 24 hours - Support for Office documents (pptx, docx) via LibreOffice headless - Comprehensive system dependency installation in single script Environment: - OS: WSL2 Ubuntu 24.04 - Python: 3.12 (venv) - Node.js: 24.x LTS (nvm) - Backend Port: 8000 - Frontend Port: 5173 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -53,7 +53,13 @@
|
||||
"Bash(done)",
|
||||
"Bash(git add:*)",
|
||||
"Bash(git commit:*)",
|
||||
"Bash(git push)"
|
||||
"Bash(git push)",
|
||||
"Bash(docker --version:*)",
|
||||
"Bash(dpkg:*)",
|
||||
"Bash(pip3:*)",
|
||||
"Bash(chmod:*)",
|
||||
"Bash(sudo apt install:*)",
|
||||
"Bash(/usr/bin/soffice:*)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": []
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
.gitattributes
|
||||
|
||||
# Python
|
||||
__pycache__
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
*.egg
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
|
||||
# Node
|
||||
node_modules/
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.npm
|
||||
.yarn
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
.DS_Store
|
||||
|
||||
# Environment files
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# Logs
|
||||
logs/
|
||||
*.log
|
||||
|
||||
# Data directories (will be mounted as volumes)
|
||||
data/
|
||||
uploads/
|
||||
storage/
|
||||
models/
|
||||
|
||||
# Backend specific
|
||||
backend/uploads/
|
||||
backend/storage/
|
||||
backend/models/
|
||||
backend/logs/
|
||||
backend/__pycache__/
|
||||
backend/*.egg-info/
|
||||
|
||||
# Frontend specific
|
||||
frontend/node_modules/
|
||||
frontend/dist/
|
||||
frontend/.vite/
|
||||
frontend/.cache/
|
||||
|
||||
# Documentation (not needed in container)
|
||||
*.md
|
||||
!README.md
|
||||
docs/
|
||||
demo_docs/
|
||||
|
||||
# Claude and OpenSpec
|
||||
.claude/
|
||||
openspec/
|
||||
|
||||
# OS
|
||||
Thumbs.db
|
||||
Desktop.ini
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.bak
|
||||
*.swp
|
||||
temp/
|
||||
tmp/
|
||||
82
.env.docker
82
.env.docker
@@ -1,82 +0,0 @@
|
||||
# Tool_OCR - Docker Environment Configuration
|
||||
# Copy this file to .env when deploying with Docker
|
||||
|
||||
# ===== Database Configuration =====
|
||||
MYSQL_HOST=mysql.theaken.com
|
||||
MYSQL_PORT=33306
|
||||
MYSQL_USER=A060
|
||||
MYSQL_PASSWORD=WLeSCi0yhtc7
|
||||
MYSQL_DATABASE=db_A060
|
||||
|
||||
# ===== Application Configuration =====
|
||||
# External port (exposed to host)
|
||||
FRONTEND_PORT=12015
|
||||
|
||||
# Security (IMPORTANT: Change SECRET_KEY in production!)
|
||||
SECRET_KEY=your-secret-key-here-please-change-this-to-random-string
|
||||
ALGORITHM=HS256
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES=1440
|
||||
|
||||
# ===== OCR Configuration =====
|
||||
# PaddleOCR model directory (inside container)
|
||||
PADDLEOCR_MODEL_DIR=/app/backend/models/paddleocr
|
||||
# Supported languages (comma-separated)
|
||||
OCR_LANGUAGES=ch,en,japan,korean
|
||||
# Default confidence threshold
|
||||
OCR_CONFIDENCE_THRESHOLD=0.5
|
||||
# Maximum concurrent OCR workers
|
||||
MAX_OCR_WORKERS=4
|
||||
|
||||
# ===== File Upload Configuration =====
|
||||
# Maximum file size in bytes (50MB default)
|
||||
MAX_UPLOAD_SIZE=52428800
|
||||
# Allowed file extensions (comma-separated)
|
||||
ALLOWED_EXTENSIONS=png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx
|
||||
# Upload directories (inside container)
|
||||
UPLOAD_DIR=/app/backend/uploads
|
||||
TEMP_DIR=/app/backend/uploads/temp
|
||||
PROCESSED_DIR=/app/backend/uploads/processed
|
||||
IMAGES_DIR=/app/backend/uploads/images
|
||||
|
||||
# ===== Export Configuration =====
|
||||
# Storage directories (inside container)
|
||||
STORAGE_DIR=/app/backend/storage
|
||||
MARKDOWN_DIR=/app/backend/storage/markdown
|
||||
JSON_DIR=/app/backend/storage/json
|
||||
EXPORTS_DIR=/app/backend/storage/exports
|
||||
|
||||
# ===== PDF Generation Configuration =====
|
||||
# Pandoc path (inside container)
|
||||
PANDOC_PATH=/usr/bin/pandoc
|
||||
# Font directory (inside container)
|
||||
FONT_DIR=/usr/share/fonts
|
||||
# Default PDF page size
|
||||
PDF_PAGE_SIZE=A4
|
||||
# Default PDF margins (mm)
|
||||
PDF_MARGIN_TOP=20
|
||||
PDF_MARGIN_BOTTOM=20
|
||||
PDF_MARGIN_LEFT=20
|
||||
PDF_MARGIN_RIGHT=20
|
||||
|
||||
# ===== Translation Configuration (Reserved) =====
|
||||
# Enable translation feature (reserved for future)
|
||||
ENABLE_TRANSLATION=false
|
||||
# Translation engine: offline (argostranslate) or api (future)
|
||||
TRANSLATION_ENGINE=offline
|
||||
# Argostranslate models directory (inside container)
|
||||
ARGOSTRANSLATE_MODELS_DIR=/app/backend/models/argostranslate
|
||||
|
||||
# ===== Background Tasks Configuration =====
|
||||
# Task queue type: memory (default) or redis (future)
|
||||
TASK_QUEUE_TYPE=memory
|
||||
# Redis URL (if using redis)
|
||||
# REDIS_URL=redis://localhost:6379/0
|
||||
|
||||
# ===== CORS Configuration =====
|
||||
# Allowed origins (comma-separated, * for all)
|
||||
# For Docker, use the external URL
|
||||
CORS_ORIGINS=http://localhost:12015,http://127.0.0.1:12015
|
||||
|
||||
# ===== Logging Configuration =====
|
||||
LOG_LEVEL=INFO
|
||||
LOG_FILE=/app/backend/logs/app.log
|
||||
@@ -1,524 +0,0 @@
|
||||
# Tool_OCR Docker 部署指南
|
||||
|
||||
## 架構說明
|
||||
|
||||
Tool_OCR 使用統一容器架構,將前端和後端封裝在同一個容器中:
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────┐
|
||||
│ Container (tool_ocr) │
|
||||
│ │
|
||||
│ ┌──────────────────────────────┐ │
|
||||
│ │ Nginx :12010 (External) │ │
|
||||
│ │ - Frontend Static Files │ │
|
||||
│ │ - Reverse Proxy for API │ │
|
||||
│ └─────────┬────────────────────┘ │
|
||||
│ │ proxy_pass │
|
||||
│ ▼ │
|
||||
│ ┌──────────────────────────────┐ │
|
||||
│ │ FastAPI :8000 (Internal) │ │
|
||||
│ │ - OCR Processing │ │
|
||||
│ │ - File Management │ │
|
||||
│ │ - Export Services │ │
|
||||
│ └──────────────────────────────┘ │
|
||||
│ │
|
||||
│ Supervisor manages both services │
|
||||
└─────────────────────────────────────┘
|
||||
│
|
||||
│ Port 12010 only!
|
||||
▼
|
||||
External Access
|
||||
```
|
||||
|
||||
### 優勢
|
||||
|
||||
1. **單一端口**: 只需要暴露一個端口 (12010)
|
||||
2. **簡化部署**: 一個容器包含完整應用
|
||||
3. **統一管理**: Supervisor 管理所有服務
|
||||
4. **生產就緒**: Nginx 提供高性能靜態文件服務和反向代理
|
||||
|
||||
## 快速開始
|
||||
|
||||
### 前置要求
|
||||
|
||||
- Docker Engine 20.10+
|
||||
- Docker Compose 2.0+
|
||||
- 至少 4GB 可用內存
|
||||
- 至少 10GB 可用磁碟空間
|
||||
|
||||
### 1. 準備環境配置
|
||||
|
||||
**複製環境配置範本:**
|
||||
|
||||
Linux/Mac:
|
||||
```bash
|
||||
cp .env.docker .env
|
||||
```
|
||||
|
||||
Windows (PowerShell):
|
||||
```powershell
|
||||
Copy-Item .env.docker .env
|
||||
```
|
||||
|
||||
**編輯 `.env` 文件,至少修改以下重要配置:**
|
||||
|
||||
```bash
|
||||
# 修改為安全的密鑰
|
||||
SECRET_KEY=your-very-secure-random-key-here
|
||||
|
||||
# 根據需要調整端口
|
||||
FRONTEND_PORT=12010
|
||||
|
||||
# 根據實際情況配置 CORS
|
||||
CORS_ORIGINS=http://your-domain.com:12010,http://localhost:12010
|
||||
```
|
||||
|
||||
### 2. 創建數據目錄
|
||||
|
||||
Linux/Mac:
|
||||
```bash
|
||||
mkdir -p data/{uploads,storage,models,logs}
|
||||
```
|
||||
|
||||
Windows (PowerShell):
|
||||
```powershell
|
||||
mkdir -p data/uploads, data/storage, data/models, data/logs
|
||||
```
|
||||
|
||||
或使用跨平台命令:
|
||||
```bash
|
||||
mkdir -p data/uploads data/storage data/models data/logs
|
||||
```
|
||||
|
||||
### 3. 構建並啟動容器
|
||||
|
||||
```bash
|
||||
# 構建映像
|
||||
docker compose build
|
||||
|
||||
# 啟動服務
|
||||
docker compose up -d
|
||||
|
||||
# 查看日誌
|
||||
docker compose logs -f
|
||||
```
|
||||
|
||||
> 注意:舊版本 Docker 使用 `docker-compose`(帶連字符),新版本使用 `docker compose`(無連字符)。兩者都支持。
|
||||
|
||||
### 4. 驗證部署
|
||||
|
||||
Linux/Mac:
|
||||
```bash
|
||||
# 檢查健康狀態
|
||||
curl http://localhost:12010/health
|
||||
|
||||
# 訪問 API 文檔
|
||||
open http://localhost:12010/docs
|
||||
|
||||
# 訪問前端界面
|
||||
open http://localhost:12010
|
||||
```
|
||||
|
||||
Windows (PowerShell):
|
||||
```powershell
|
||||
# 檢查健康狀態
|
||||
curl http://localhost:12010/health
|
||||
|
||||
# 在瀏覽器中打開
|
||||
Start-Process "http://localhost:12010"
|
||||
Start-Process "http://localhost:12010/docs"
|
||||
```
|
||||
|
||||
## 管理命令
|
||||
|
||||
> 提示:以下命令在 Windows、Linux 和 Mac 上通用。如果您使用舊版 Docker,將 `docker compose` 替換為 `docker-compose`。
|
||||
|
||||
### 查看狀態
|
||||
|
||||
```bash
|
||||
# 查看容器狀態
|
||||
docker compose ps
|
||||
|
||||
# 查看實時日誌
|
||||
docker compose logs -f
|
||||
|
||||
# 查看特定服務日誌
|
||||
docker compose exec tool_ocr tail -f /var/log/nginx/tool_ocr_access.log
|
||||
docker compose exec tool_ocr tail -f /app/backend/logs/app.log
|
||||
```
|
||||
|
||||
### 重啟服務
|
||||
|
||||
```bash
|
||||
# 重啟容器
|
||||
docker compose restart
|
||||
|
||||
# 重啟 Nginx (容器內)
|
||||
docker compose exec tool_ocr supervisorctl restart nginx
|
||||
|
||||
# 重啟 Backend (容器內)
|
||||
docker compose exec tool_ocr supervisorctl restart backend
|
||||
```
|
||||
|
||||
### 停止和清理
|
||||
|
||||
```bash
|
||||
# 停止服務
|
||||
docker compose stop
|
||||
|
||||
# 停止並移除容器
|
||||
docker compose down
|
||||
|
||||
# 完全清理(包括數據卷)⚠️ 慎用
|
||||
docker compose down -v
|
||||
```
|
||||
|
||||
### 進入容器調試
|
||||
|
||||
```bash
|
||||
# 進入容器 shell
|
||||
docker compose exec tool_ocr bash
|
||||
|
||||
# 查看 Supervisor 狀態
|
||||
docker compose exec tool_ocr supervisorctl status
|
||||
|
||||
# 查看進程
|
||||
docker compose exec tool_ocr ps aux
|
||||
```
|
||||
|
||||
## 數據持久化
|
||||
|
||||
以下目錄會持久化到主機的 `./data/` 目錄:
|
||||
|
||||
| 容器內路徑 | 主機路徑 | 說明 |
|
||||
|-----------|---------|------|
|
||||
| `/app/backend/uploads` | `./data/uploads` | 上傳文件 |
|
||||
| `/app/backend/storage` | `./data/storage` | 處理結果 |
|
||||
| `/app/backend/models` | `./data/models` | OCR 模型 |
|
||||
| `/app/backend/logs` | `./data/logs` | 應用日誌 |
|
||||
|
||||
### 備份數據
|
||||
|
||||
Linux/Mac:
|
||||
```bash
|
||||
# 備份所有數據
|
||||
tar -czf tool_ocr_backup_$(date +%Y%m%d).tar.gz data/
|
||||
|
||||
# 只備份重要數據
|
||||
tar -czf tool_ocr_data_$(date +%Y%m%d).tar.gz data/uploads data/storage
|
||||
```
|
||||
|
||||
Windows (PowerShell):
|
||||
```powershell
|
||||
# 備份所有數據(需要安裝 7-Zip 或使用 Compress-Archive)
|
||||
$date = Get-Date -Format "yyyyMMdd"
|
||||
Compress-Archive -Path data -DestinationPath "tool_ocr_backup_$date.zip"
|
||||
|
||||
# 只備份重要數據
|
||||
Compress-Archive -Path data/uploads, data/storage -DestinationPath "tool_ocr_data_$date.zip"
|
||||
```
|
||||
|
||||
### 恢復數據
|
||||
|
||||
Linux/Mac:
|
||||
```bash
|
||||
# 停止容器
|
||||
docker compose stop
|
||||
|
||||
# 恢復數據
|
||||
tar -xzf tool_ocr_backup_20250113.tar.gz
|
||||
|
||||
# 啟動容器
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Windows (PowerShell):
|
||||
```powershell
|
||||
# 停止容器
|
||||
docker compose stop
|
||||
|
||||
# 恢復數據
|
||||
Expand-Archive -Path tool_ocr_backup_20250113.zip -DestinationPath . -Force
|
||||
|
||||
# 啟動容器
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
## 1Panel 部署指南
|
||||
|
||||
### 1. 準備項目文件
|
||||
|
||||
在 1Panel 的應用目錄中創建項目:
|
||||
|
||||
```bash
|
||||
cd /opt/1panel/apps
|
||||
mkdir -p tool_ocr
|
||||
cd tool_ocr
|
||||
|
||||
# 上傳項目文件
|
||||
# - Dockerfile
|
||||
# - docker-compose.yml
|
||||
# - docker/ 目錄
|
||||
# - backend/ 目錄
|
||||
# - frontend/ 目錄
|
||||
# - requirements.txt
|
||||
# - .env
|
||||
```
|
||||
|
||||
### 2. 在 1Panel 中創建應用
|
||||
|
||||
1. 登入 1Panel 管理面板
|
||||
2. 進入「應用商店」→「自定義應用」
|
||||
3. 選擇「Docker Compose」
|
||||
4. 上傳或粘貼 `docker-compose.yml` 內容
|
||||
5. 配置環境變量
|
||||
6. 點擊「創建」
|
||||
|
||||
### 3. 配置反向代理(可選)
|
||||
|
||||
如果需要通過域名訪問:
|
||||
|
||||
1. 在 1Panel 中創建網站
|
||||
2. 配置反向代理:
|
||||
- 目標地址: `http://127.0.0.1:12010`
|
||||
- 啟用 WebSocket 支援(如需要)
|
||||
|
||||
### 4. 配置 SSL 證書(可選)
|
||||
|
||||
1. 在 1Panel 網站設置中
|
||||
2. 申請或上傳 SSL 證書
|
||||
3. 啟用 HTTPS
|
||||
|
||||
## 更新部署
|
||||
|
||||
### 更新代碼
|
||||
|
||||
```bash
|
||||
# 停止容器
|
||||
docker compose stop
|
||||
|
||||
# 拉取最新代碼
|
||||
git pull
|
||||
|
||||
# 重新構建映像
|
||||
docker compose build --no-cache
|
||||
|
||||
# 啟動容器
|
||||
docker compose up -d
|
||||
|
||||
# 查看日誌確認啟動成功
|
||||
docker compose logs -f
|
||||
```
|
||||
|
||||
### 數據庫遷移
|
||||
|
||||
如果有數據庫結構變更:
|
||||
|
||||
```bash
|
||||
# 進入容器
|
||||
docker compose exec tool_ocr bash
|
||||
|
||||
# 運行遷移
|
||||
cd /app/backend
|
||||
alembic upgrade head
|
||||
|
||||
# 退出容器
|
||||
exit
|
||||
```
|
||||
|
||||
## 故障排除
|
||||
|
||||
### 1. 容器無法啟動
|
||||
|
||||
```bash
|
||||
# 查看詳細錯誤
|
||||
docker compose logs
|
||||
```
|
||||
|
||||
檢查端口占用:
|
||||
|
||||
Linux/Mac:
|
||||
```bash
|
||||
netstat -tuln | grep 12010
|
||||
# 或
|
||||
lsof -i :12010
|
||||
```
|
||||
|
||||
Windows (PowerShell):
|
||||
```powershell
|
||||
netstat -ano | findstr 12010
|
||||
# 或
|
||||
Get-NetTCPConnection -LocalPort 12010
|
||||
```
|
||||
|
||||
檢查磁碟空間:
|
||||
|
||||
Linux/Mac:
|
||||
```bash
|
||||
df -h
|
||||
```
|
||||
|
||||
Windows (PowerShell):
|
||||
```powershell
|
||||
Get-PSDrive
|
||||
```
|
||||
|
||||
### 2. Nginx 無法啟動
|
||||
|
||||
```bash
|
||||
# 檢查 Nginx 配置語法
|
||||
docker compose exec tool_ocr nginx -t
|
||||
|
||||
# 查看 Nginx 錯誤日誌
|
||||
docker compose exec tool_ocr cat /var/log/nginx/error.log
|
||||
```
|
||||
|
||||
### 3. Backend API 無法訪問
|
||||
|
||||
```bash
|
||||
# 檢查 Backend 是否運行
|
||||
docker compose exec tool_ocr supervisorctl status backend
|
||||
|
||||
# 查看 Backend 日誌
|
||||
docker compose exec tool_ocr cat /app/backend/logs/app.log
|
||||
|
||||
# 重啟 Backend
|
||||
docker compose exec tool_ocr supervisorctl restart backend
|
||||
```
|
||||
|
||||
### 4. 數據庫連接失敗
|
||||
|
||||
```bash
|
||||
# 測試數據庫連接
|
||||
docker compose exec tool_ocr python -c "
|
||||
from app.core.database import engine
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
print('Database connection successful!')
|
||||
except Exception as e:
|
||||
print(f'Database connection failed: {e}')
|
||||
"
|
||||
```
|
||||
|
||||
### 5. OCR 處理失敗
|
||||
|
||||
```bash
|
||||
# 檢查 PaddleOCR 模型
|
||||
docker compose exec tool_ocr ls -la /app/backend/models/paddleocr/
|
||||
|
||||
# 測試 OCR 功能
|
||||
docker compose exec tool_ocr python -c "
|
||||
from paddleocr import PaddleOCR
|
||||
ocr = PaddleOCR(lang='ch')
|
||||
print('PaddleOCR initialized successfully!')
|
||||
"
|
||||
```
|
||||
|
||||
### 6. 前端頁面無法訪問
|
||||
|
||||
```bash
|
||||
# 檢查前端文件是否存在
|
||||
docker compose exec tool_ocr ls -la /app/frontend/dist/
|
||||
|
||||
# 檢查 Nginx 配置
|
||||
docker compose exec tool_ocr cat /etc/nginx/conf.d/default.conf
|
||||
```
|
||||
|
||||
## 性能優化
|
||||
|
||||
### 1. 調整 OCR 工作進程數
|
||||
|
||||
根據 CPU 核心數調整:
|
||||
|
||||
```bash
|
||||
# 在 .env 中設置
|
||||
MAX_OCR_WORKERS=8 # 建議設置為 CPU 核心數
|
||||
```
|
||||
|
||||
### 2. 調整 Nginx Worker 進程數
|
||||
|
||||
編輯 `docker/nginx.conf`:
|
||||
|
||||
```nginx
|
||||
worker_processes auto; # 自動根據 CPU 核心數
|
||||
```
|
||||
|
||||
### 3. 優化 Upload 大小限制
|
||||
|
||||
根據實際需求調整:
|
||||
|
||||
```bash
|
||||
# 在 .env 中設置(以字節為單位)
|
||||
MAX_UPLOAD_SIZE=104857600 # 100MB
|
||||
```
|
||||
|
||||
同時修改 `docker/nginx.conf`:
|
||||
|
||||
```nginx
|
||||
client_max_body_size 100M;
|
||||
```
|
||||
|
||||
## 監控和日誌
|
||||
|
||||
### 日誌位置
|
||||
|
||||
| 服務 | 容器內路徑 | 主機路徑 |
|
||||
|------|-----------|---------|
|
||||
| Nginx Access | `/var/log/nginx/tool_ocr_access.log` | - |
|
||||
| Nginx Error | `/var/log/nginx/tool_ocr_error.log` | - |
|
||||
| Backend | `/app/backend/logs/app.log` | `./data/logs/app.log` |
|
||||
| Supervisor | `/var/log/supervisor/supervisord.log` | - |
|
||||
|
||||
### 日誌輪轉
|
||||
|
||||
建議配置日誌輪轉以防止日誌文件過大:
|
||||
|
||||
```bash
|
||||
# 創建 logrotate 配置(主機上)
|
||||
cat > /etc/logrotate.d/tool_ocr << 'EOF'
|
||||
/path/to/tool_ocr/data/logs/*.log {
|
||||
daily
|
||||
rotate 7
|
||||
compress
|
||||
delaycompress
|
||||
notifempty
|
||||
create 0644 root root
|
||||
}
|
||||
EOF
|
||||
```
|
||||
|
||||
## 安全建議
|
||||
|
||||
1. **修改默認密鑰**: 務必修改 `.env` 中的 `SECRET_KEY`
|
||||
2. **使用 HTTPS**: 在生產環境中啟用 SSL/TLS
|
||||
3. **限制 CORS**: 只允許可信的來源
|
||||
4. **定期更新**: 及時更新 Docker 映像和依賴
|
||||
5. **備份數據**: 定期備份重要數據
|
||||
6. **監控日誌**: 定期檢查日誌中的異常活動
|
||||
|
||||
## 常見問題
|
||||
|
||||
### Q: 如何修改對外端口?
|
||||
|
||||
A: 修改 `.env` 中的 `FRONTEND_PORT` 和 `docker-compose.yml` 中的端口映射。
|
||||
|
||||
### Q: 如何增加上傳文件大小限制?
|
||||
|
||||
A: 修改 `.env` 中的 `MAX_UPLOAD_SIZE` 和 `docker/nginx.conf` 中的 `client_max_body_size`。
|
||||
|
||||
### Q: 如何連接外部 MySQL 數據庫?
|
||||
|
||||
A: 在 `.env` 中配置正確的數據庫連接信息。
|
||||
|
||||
### Q: 如何查看詳細的錯誤信息?
|
||||
|
||||
A: 設置 `.env` 中的 `LOG_LEVEL=DEBUG` 並重啟容器。
|
||||
|
||||
## 聯繫支援
|
||||
|
||||
如果遇到問題,請:
|
||||
|
||||
1. 查看日誌: `docker-compose logs -f`
|
||||
2. 檢查配置: 確認 `.env` 文件正確
|
||||
3. 查看文檔: 參考本文檔的故障排除部分
|
||||
4. 提交 Issue: 在項目倉庫提交問題報告
|
||||
131
Dockerfile
131
Dockerfile
@@ -1,131 +0,0 @@
|
||||
# ============================================
|
||||
# Tool_OCR - Unified Docker Image
|
||||
# Frontend (React + Vite) + Backend (FastAPI)
|
||||
# Served by Nginx with reverse proxy
|
||||
# ============================================
|
||||
|
||||
# ============================================
|
||||
# Stage 1: Build Frontend
|
||||
# ============================================
|
||||
FROM node:20-alpine AS frontend-builder
|
||||
|
||||
WORKDIR /app/frontend
|
||||
|
||||
# Copy package files
|
||||
COPY frontend/package*.json ./
|
||||
|
||||
# Install all dependencies (including devDependencies for build)
|
||||
RUN npm ci
|
||||
|
||||
# Copy frontend source
|
||||
COPY frontend/ ./
|
||||
|
||||
# Create production environment file
|
||||
RUN echo "VITE_API_BASE_URL=" > .env.production
|
||||
|
||||
# Build frontend for production
|
||||
RUN npm run build
|
||||
|
||||
|
||||
# ============================================
|
||||
# Stage 2: Build Backend + Final Image
|
||||
# ============================================
|
||||
FROM python:3.10-slim-bookworm
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
||||
DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install system dependencies
|
||||
# - nginx: web server and reverse proxy
|
||||
# - supervisor: process manager for nginx + uvicorn
|
||||
# - curl: for health checks
|
||||
# - pandoc: for markdown to PDF conversion
|
||||
# - poppler-utils: for pdf2image (PDF processing)
|
||||
# - libpango-1.0-0, libpangocairo-1.0-0: for WeasyPrint
|
||||
# - libgdk-pixbuf2.0-0: for WeasyPrint image handling
|
||||
# - libffi-dev: for cryptography
|
||||
# - fonts-noto-cjk: Chinese/Japanese/Korean font support
|
||||
# - libgomp1, libgl1-mesa-glx, libglib2.0-0: for OpenCV and PaddleOCR
|
||||
# - libmagic1: for python-magic file type detection
|
||||
# - libreoffice-writer, libreoffice-impress: for Office document conversion (doc/docx/ppt/pptx)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
nginx \
|
||||
supervisor \
|
||||
curl \
|
||||
pandoc \
|
||||
poppler-utils \
|
||||
libpango-1.0-0 \
|
||||
libpangocairo-1.0-0 \
|
||||
libgdk-pixbuf2.0-0 \
|
||||
libffi-dev \
|
||||
fonts-noto-cjk \
|
||||
fonts-noto-cjk-extra \
|
||||
libgomp1 \
|
||||
libgl1-mesa-glx \
|
||||
libglib2.0-0 \
|
||||
libmagic1 \
|
||||
libreoffice-writer \
|
||||
libreoffice-impress \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy Python requirements
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install Python dependencies with extended timeout
|
||||
# PaddlePaddle is 189MB and may take time to download
|
||||
# Timeout: 600 seconds (10 minutes), Retries: 5
|
||||
RUN pip install --timeout 600 --retries 5 -r requirements.txt
|
||||
|
||||
# Copy backend application
|
||||
COPY backend/ ./backend/
|
||||
|
||||
# Copy frontend build from frontend-builder stage
|
||||
COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist
|
||||
|
||||
# Copy Nginx configuration
|
||||
COPY docker/nginx.conf /etc/nginx/nginx.conf
|
||||
COPY docker/default.conf /etc/nginx/conf.d/default.conf
|
||||
|
||||
# Copy supervisor configuration
|
||||
COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
# Copy startup script and fix line endings (Windows CRLF -> Linux LF)
|
||||
COPY docker/entrypoint.sh /entrypoint.sh
|
||||
RUN sed -i 's/\r$//' /entrypoint.sh && chmod +x /entrypoint.sh
|
||||
|
||||
# Create necessary directories with proper permissions
|
||||
RUN mkdir -p \
|
||||
/app/backend/uploads/temp \
|
||||
/app/backend/uploads/processed \
|
||||
/app/backend/uploads/images \
|
||||
/app/backend/storage/markdown \
|
||||
/app/backend/storage/json \
|
||||
/app/backend/storage/exports \
|
||||
/app/backend/models/paddleocr \
|
||||
/app/backend/logs \
|
||||
/var/log/supervisor \
|
||||
/var/log/nginx \
|
||||
/var/cache/nginx \
|
||||
/var/run \
|
||||
&& chmod -R 755 /app \
|
||||
&& chown -R www-data:www-data /var/log/nginx /var/cache/nginx
|
||||
|
||||
# Expose port (only one port needed!)
|
||||
EXPOSE 12015
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
||||
CMD curl -f http://localhost:12015/health || exit 1
|
||||
|
||||
# Set working directory to backend for Python app
|
||||
WORKDIR /app/backend
|
||||
|
||||
# Use entrypoint script to start supervisor
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
272
README.md
272
README.md
@@ -11,6 +11,7 @@ A web-based solution to extract text, images, and document structure from multip
|
||||
- 🖼️ **Image Extraction**: Preserve document images alongside text content
|
||||
- 📑 **Batch Processing**: Process multiple files concurrently with progress tracking
|
||||
- 📤 **Multiple Export Formats**: TXT, JSON, Excel, Markdown with images, searchable PDF
|
||||
- 📋 **Office Documents**: DOC, DOCX, PPT, PPTX support via LibreOffice conversion
|
||||
- 🔧 **Flexible Configuration**: Rule-based output formatting
|
||||
- 🌐 **Translation Ready**: Reserved architecture for future translation features
|
||||
|
||||
@@ -22,173 +23,176 @@ A web-based solution to extract text, images, and document structure from multip
|
||||
- **Database**: MySQL via SQLAlchemy
|
||||
- **PDF Generation**: Pandoc + WeasyPrint
|
||||
- **Image Processing**: OpenCV, Pillow, pdf2image
|
||||
- **Office Conversion**: LibreOffice (headless mode)
|
||||
|
||||
### Frontend
|
||||
- **Framework**: React 18 with Vite
|
||||
- **Styling**: TailwindCSS + shadcn/ui
|
||||
- **HTTP Client**: Axios with React Query
|
||||
- **Framework**: React 19 with TypeScript
|
||||
- **Build Tool**: Vite 7
|
||||
- **Styling**: Tailwind CSS v4 + shadcn/ui
|
||||
- **State Management**: React Query + Zustand
|
||||
- **HTTP Client**: Axios
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- **macOS**: Apple Silicon (M1/M2/M3) or Intel
|
||||
- **Python**: 3.10+
|
||||
- **Conda**: Miniconda or Anaconda (will be installed automatically)
|
||||
- **Homebrew**: For system dependencies
|
||||
- **OS**: WSL2 Ubuntu 24.04
|
||||
- **Python**: 3.12+
|
||||
- **Node.js**: 24.x LTS
|
||||
- **MySQL**: External database server (provided)
|
||||
|
||||
## Installation
|
||||
## Quick Start
|
||||
|
||||
### 1. Automated Setup (Recommended)
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
cd /Users/egg/Projects/Tool_OCR
|
||||
|
||||
# Run automated setup script
|
||||
chmod +x setup_conda.sh
|
||||
./setup_conda.sh
|
||||
|
||||
# If Conda was just installed, reload your shell
|
||||
source ~/.zshrc # or source ~/.bash_profile
|
||||
|
||||
# Run the script again to create environment
|
||||
./setup_conda.sh
|
||||
./setup_dev_env.sh
|
||||
```
|
||||
|
||||
### 2. Install Dependencies
|
||||
This script automatically installs:
|
||||
- Python development tools (pip, venv, build-essential)
|
||||
- System dependencies (pandoc, LibreOffice, fonts, etc.)
|
||||
- Node.js (via nvm)
|
||||
- Python packages
|
||||
- Frontend dependencies
|
||||
|
||||
### 2. Initialize Database
|
||||
|
||||
```bash
|
||||
# Activate Conda environment
|
||||
conda activate tool_ocr
|
||||
|
||||
# Install Python dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Install system dependencies (Pandoc for PDF generation)
|
||||
brew install pandoc
|
||||
|
||||
# Install Chinese fonts for PDF generation (optional)
|
||||
brew install --cask font-noto-sans-cjk
|
||||
# Note: macOS built-in fonts work fine, this is optional
|
||||
```
|
||||
|
||||
### 3. Download PaddleOCR Models
|
||||
|
||||
```bash
|
||||
# Create models directory
|
||||
mkdir -p models/paddleocr
|
||||
|
||||
# Models will be automatically downloaded on first run
|
||||
# (~900MB total, includes PaddleOCR-VL 0.9B model)
|
||||
```
|
||||
|
||||
### 4. Configure Environment
|
||||
|
||||
```bash
|
||||
# Copy environment template
|
||||
cp .env.example .env
|
||||
|
||||
# Edit .env with your settings
|
||||
# Database credentials are pre-configured
|
||||
nano .env
|
||||
```
|
||||
|
||||
### 5. Initialize Database
|
||||
|
||||
```bash
|
||||
# Database schema will be created automatically on first run
|
||||
# Using: mysql.theaken.com:33306/db_A060
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Start Backend Server
|
||||
|
||||
```bash
|
||||
# Activate environment
|
||||
conda activate tool_ocr
|
||||
|
||||
# Start FastAPI server
|
||||
source venv/bin/activate
|
||||
cd backend
|
||||
python -m app.main
|
||||
|
||||
# Server runs at: http://localhost:12010
|
||||
# API docs: http://localhost:12010/docs
|
||||
alembic upgrade head
|
||||
python create_test_user.py
|
||||
cd ..
|
||||
```
|
||||
|
||||
### Start Frontend (Coming Soon)
|
||||
Default test user:
|
||||
- Username: `admin`
|
||||
- Password: `admin123`
|
||||
|
||||
### 3. Start Development Servers
|
||||
|
||||
**Backend (Terminal 1):**
|
||||
```bash
|
||||
# Install frontend dependencies
|
||||
cd frontend
|
||||
npm install
|
||||
|
||||
# Start development server
|
||||
npm run dev
|
||||
|
||||
# Frontend runs at: http://localhost:12011
|
||||
./start_backend.sh
|
||||
```
|
||||
|
||||
**Frontend (Terminal 2):**
|
||||
```bash
|
||||
./start_frontend.sh
|
||||
```
|
||||
|
||||
### 4. Access Application
|
||||
|
||||
- **Frontend**: http://localhost:5173
|
||||
- **API Docs**: http://localhost:8000/docs
|
||||
- **Health Check**: http://localhost:8000/health
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
Tool_OCR/
|
||||
├── backend/
|
||||
├── backend/ # FastAPI backend
|
||||
│ ├── app/
|
||||
│ │ ├── api/v1/ # API endpoints
|
||||
│ │ ├── core/ # Configuration, database
|
||||
│ │ ├── models/ # Database models
|
||||
│ │ ├── services/ # Business logic
|
||||
│ │ ├── utils/ # Utilities
|
||||
│ │ └── main.py # Application entry point
|
||||
│ ├── alembic/ # Database migrations
|
||||
│ └── tests/ # Test suite
|
||||
├── frontend/
|
||||
│ └── src/ # React application
|
||||
├── uploads/
|
||||
│ ├── temp/ # Temporary uploads
|
||||
│ ├── processed/ # Processed files
|
||||
│ └── images/ # Extracted images
|
||||
├── storage/
|
||||
│ ├── markdown/ # Markdown outputs
|
||||
│ ├── json/ # JSON results
|
||||
│ └── exports/ # Export files
|
||||
├── models/
|
||||
│ └── paddleocr/ # PaddleOCR models
|
||||
├── config/ # Configuration files
|
||||
├── templates/ # PDF templates
|
||||
├── logs/ # Application logs
|
||||
├── requirements.txt # Python dependencies
|
||||
├── setup_conda.sh # Environment setup script
|
||||
├── .env.example # Environment template
|
||||
└── README.md
|
||||
├── frontend/ # React frontend
|
||||
│ ├── src/
|
||||
│ │ ├── components/ # UI components
|
||||
│ │ ├── pages/ # Page components
|
||||
│ │ ├── services/ # API services
|
||||
│ │ └── stores/ # State management
|
||||
│ └── public/ # Static assets
|
||||
├── .env.local # Local development config
|
||||
├── setup_dev_env.sh # Environment setup script
|
||||
├── start_backend.sh # Backend startup script
|
||||
└── start_frontend.sh # Frontend startup script
|
||||
```
|
||||
|
||||
## API Endpoints (Planned)
|
||||
## Configuration
|
||||
|
||||
- `POST /api/v1/ocr/upload` - Upload files for OCR processing
|
||||
- `GET /api/v1/ocr/tasks` - List all OCR tasks
|
||||
- `GET /api/v1/ocr/tasks/{task_id}` - Get task details
|
||||
- `POST /api/v1/ocr/batch` - Create batch processing task
|
||||
- `GET /api/v1/export/{task_id}` - Export results (TXT/JSON/Excel/MD/PDF)
|
||||
- `POST /api/v1/translate/document` - Translate document (reserved, returns 501)
|
||||
Main config file: `.env.local`
|
||||
|
||||
```bash
|
||||
# Database
|
||||
MYSQL_HOST=mysql.theaken.com
|
||||
MYSQL_PORT=33306
|
||||
|
||||
# Application ports
|
||||
BACKEND_PORT=8000
|
||||
FRONTEND_PORT=5173
|
||||
|
||||
# Token expiration (minutes)
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES=1440 # 24 hours
|
||||
|
||||
# Supported file formats
|
||||
ALLOWED_EXTENSIONS=png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx
|
||||
|
||||
# OCR settings
|
||||
OCR_LANGUAGES=ch,en,japan,korean
|
||||
MAX_OCR_WORKERS=4
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Authentication
|
||||
- `POST /api/v1/auth/login` - User login
|
||||
|
||||
### File Management
|
||||
- `POST /api/v1/upload` - Upload files
|
||||
- `POST /api/v1/ocr/process` - Start OCR processing
|
||||
- `GET /api/v1/batch/{id}/status` - Get batch status
|
||||
|
||||
### Results & Export
|
||||
- `GET /api/v1/ocr/result/{id}` - Get OCR result
|
||||
- `GET /api/v1/export/pdf/{id}` - Export as PDF
|
||||
|
||||
Full API documentation: http://localhost:8000/docs
|
||||
|
||||
## Supported File Formats
|
||||
|
||||
- **Images**: PNG, JPG, JPEG, BMP, TIFF
|
||||
- **Documents**: PDF
|
||||
- **Office**: DOC, DOCX, PPT, PPTX
|
||||
|
||||
Office files are automatically converted to PDF before OCR processing.
|
||||
|
||||
## Development
|
||||
|
||||
### Run Tests
|
||||
### Backend
|
||||
|
||||
```bash
|
||||
source venv/bin/activate
|
||||
cd backend
|
||||
pytest tests/ -v --cov=app
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
|
||||
# Database migration
|
||||
alembic revision --autogenerate -m "description"
|
||||
alembic upgrade head
|
||||
|
||||
# Code formatting
|
||||
black app/
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
### Frontend
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
black app/
|
||||
cd frontend
|
||||
|
||||
# Development server
|
||||
npm run dev
|
||||
|
||||
# Build for production
|
||||
npm run build
|
||||
|
||||
# Lint code
|
||||
pylint app/
|
||||
npm run lint
|
||||
```
|
||||
|
||||
## OpenSpec Workflow
|
||||
@@ -208,26 +212,26 @@ cat openspec/changes/add-ocr-batch-processing/tasks.md
|
||||
|
||||
## Roadmap
|
||||
|
||||
- [x] **Phase 0**: Environment setup and configuration
|
||||
- [ ] **Phase 1**: Core OCR with structure extraction
|
||||
- [ ] **Phase 2**: Frontend development
|
||||
- [x] **Phase 0**: Environment setup
|
||||
- [x] **Phase 1**: Core OCR backend (~98% complete)
|
||||
- [x] **Phase 2**: Frontend development (~92% complete)
|
||||
- [ ] **Phase 3**: Testing & optimization
|
||||
- [ ] **Phase 4**: Deployment
|
||||
- [ ] **Phase 4**: Deployment automation
|
||||
- [ ] **Phase 5**: Translation feature (future)
|
||||
|
||||
## Documentation
|
||||
|
||||
- Development specs: [openspec/project.md](openspec/project.md)
|
||||
- Implementation status: [openspec/changes/add-ocr-batch-processing/STATUS.md](openspec/changes/add-ocr-batch-processing/STATUS.md)
|
||||
- Agent instructions: [openspec/AGENTS.md](openspec/AGENTS.md)
|
||||
|
||||
## License
|
||||
|
||||
[To be determined]
|
||||
Internal project use
|
||||
|
||||
## Contributors
|
||||
## Notes
|
||||
|
||||
- Development environment: macOS Apple Silicon
|
||||
- Database: MySQL external server
|
||||
- OCR Engine: PaddleOCR-VL 0.9B with PP-StructureV3
|
||||
|
||||
## Support
|
||||
|
||||
For issues and questions, refer to:
|
||||
- OpenSpec documentation: `openspec/AGENTS.md`
|
||||
- Task breakdown: `openspec/changes/add-ocr-batch-processing/tasks.md`
|
||||
- Specifications: `openspec/changes/add-ocr-batch-processing/specs/`
|
||||
- First OCR run will download PaddleOCR models (~900MB)
|
||||
- Token expiration is set to 24 hours by default
|
||||
- Office conversion requires LibreOffice (installed via setup script)
|
||||
- Development environment: WSL2 Ubuntu 24.04 with Python venv
|
||||
|
||||
395
SETUP.md
395
SETUP.md
@@ -1,395 +0,0 @@
|
||||
# Tool_OCR Setup Guide
|
||||
|
||||
Complete setup instructions for macOS environment.
|
||||
|
||||
## Prerequisites Check
|
||||
|
||||
Before starting, verify you have:
|
||||
- ✅ macOS (Apple Silicon or Intel)
|
||||
- ✅ Terminal access (zsh or bash)
|
||||
- ✅ Internet connection for downloads
|
||||
|
||||
## Step-by-Step Setup
|
||||
|
||||
### Step 1: Install Conda Environment
|
||||
|
||||
Run the automated setup script:
|
||||
|
||||
```bash
|
||||
chmod +x setup_conda.sh
|
||||
./setup_conda.sh
|
||||
```
|
||||
|
||||
**Expected output:**
|
||||
- If Conda not installed: Downloads and installs Miniconda for Apple Silicon
|
||||
- If Conda already installed: Creates `tool_ocr` environment with Python 3.10
|
||||
|
||||
**If Conda was just installed:**
|
||||
```bash
|
||||
# Reload your shell to activate Conda
|
||||
source ~/.zshrc # if using zsh (default on macOS)
|
||||
source ~/.bashrc # if using bash
|
||||
|
||||
# Run setup script again to create environment
|
||||
./setup_conda.sh
|
||||
```
|
||||
|
||||
### Step 2: Activate Environment
|
||||
|
||||
```bash
|
||||
conda activate tool_ocr
|
||||
```
|
||||
|
||||
You should see `(tool_ocr)` prefix in your terminal prompt.
|
||||
|
||||
### Step 3: Install Python Dependencies
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
**This will install:**
|
||||
- FastAPI and Uvicorn (web framework)
|
||||
- PaddleOCR and PaddlePaddle (OCR engine)
|
||||
- Image processing libraries (Pillow, OpenCV, pdf2image)
|
||||
- PDF generation tools (WeasyPrint, Markdown)
|
||||
- Database tools (SQLAlchemy, PyMySQL, Alembic)
|
||||
- Authentication libraries (python-jose, passlib)
|
||||
- Testing tools (pytest, pytest-asyncio)
|
||||
|
||||
**Installation time:** ~5-10 minutes depending on your internet speed
|
||||
|
||||
### Step 4: Install System Dependencies
|
||||
|
||||
```bash
|
||||
# Install libmagic (required for python-magic file type detection)
|
||||
brew install libmagic
|
||||
|
||||
# Install WeasyPrint dependencies (required for PDF generation)
|
||||
brew install pango gdk-pixbuf libffi
|
||||
|
||||
# Install Pandoc (optional - for enhanced PDF generation)
|
||||
brew install pandoc
|
||||
|
||||
# Install Chinese fonts for PDF output (optional - macOS has built-in Chinese fonts)
|
||||
brew install --cask font-noto-sans-cjk
|
||||
# Note: If above fails, skip it - macOS built-in fonts (PingFang SC, Heiti TC) work fine
|
||||
```
|
||||
|
||||
**If Homebrew not installed:**
|
||||
```bash
|
||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||
```
|
||||
|
||||
### Step 5: Configure Environment Variables
|
||||
|
||||
```bash
|
||||
# Copy template
|
||||
cp .env.example .env
|
||||
|
||||
# Edit with your preferred editor
|
||||
nano .env
|
||||
# or
|
||||
code .env
|
||||
```
|
||||
|
||||
**Important settings to verify in `.env`:**
|
||||
|
||||
```bash
|
||||
# Database (pre-configured, should work as-is)
|
||||
MYSQL_HOST=mysql.theaken.com
|
||||
MYSQL_PORT=33306
|
||||
MYSQL_USER=A060
|
||||
MYSQL_PASSWORD=WLeSCi0yhtc7
|
||||
MYSQL_DATABASE=db_A060
|
||||
|
||||
# Application ports
|
||||
BACKEND_PORT=12010
|
||||
FRONTEND_PORT=12011
|
||||
|
||||
# Security (CHANGE THIS!)
|
||||
SECRET_KEY=your-secret-key-here-please-change-this-to-random-string
|
||||
```
|
||||
|
||||
**Generate a secure SECRET_KEY:**
|
||||
```bash
|
||||
python -c "import secrets; print(secrets.token_urlsafe(32))"
|
||||
```
|
||||
|
||||
Copy the output and paste it as your `SECRET_KEY` value.
|
||||
|
||||
### Step 6: Set Environment Variable for WeasyPrint
|
||||
|
||||
Add to your shell config (`~/.zshrc` or `~/.bash_profile`):
|
||||
|
||||
```bash
|
||||
export DYLD_LIBRARY_PATH="/opt/homebrew/lib:$DYLD_LIBRARY_PATH"
|
||||
```
|
||||
|
||||
Then reload:
|
||||
```bash
|
||||
source ~/.zshrc # or source ~/.bash_profile
|
||||
```
|
||||
|
||||
### Step 7: Run Service Layer Tests
|
||||
|
||||
Verify all services are working:
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
python test_services.py
|
||||
```
|
||||
|
||||
Expected output:
|
||||
```
|
||||
✓ PASS - database
|
||||
✓ PASS - preprocessor
|
||||
✓ PASS - pdf_generator
|
||||
✓ PASS - file_manager
|
||||
Total: 4-5/5 tests passed
|
||||
```
|
||||
|
||||
**Note:** OCR engine test may fail on first run as PaddleOCR downloads models (~900MB). This is normal.
|
||||
|
||||
### Step 8: Create Directory Structure
|
||||
|
||||
The directories should already exist, but verify:
|
||||
|
||||
```bash
|
||||
ls -la
|
||||
```
|
||||
|
||||
You should see:
|
||||
- `backend/` - FastAPI application
|
||||
- `frontend/` - React application (will be populated later)
|
||||
- `uploads/` - File upload storage
|
||||
- `storage/` - Processed results
|
||||
- `models/` - PaddleOCR models (empty until first run)
|
||||
- `logs/` - Application logs
|
||||
|
||||
### Step 8: Start Backend Server
|
||||
|
||||
```bash
|
||||
cd backend
|
||||
python -m app.main
|
||||
```
|
||||
|
||||
**Expected output:**
|
||||
```
|
||||
INFO: Started server process
|
||||
INFO: Waiting for application startup.
|
||||
INFO: Application startup complete.
|
||||
INFO: Uvicorn running on http://0.0.0.0:12010
|
||||
```
|
||||
|
||||
**Test the server:**
|
||||
Open browser and visit:
|
||||
- http://localhost:12010 - API root
|
||||
- http://localhost:12010/docs - Interactive API documentation
|
||||
- http://localhost:12010/health - Health check endpoint
|
||||
|
||||
### Step 9: Download PaddleOCR Models
|
||||
|
||||
On first OCR request, PaddleOCR will automatically download models (~900MB).
|
||||
|
||||
**To pre-download models manually:**
|
||||
|
||||
```bash
|
||||
python -c "
|
||||
from paddleocr import PaddleOCR
|
||||
ocr = PaddleOCR(use_angle_cls=True, lang='ch', use_gpu=False)
|
||||
print('Models downloaded successfully')
|
||||
"
|
||||
```
|
||||
|
||||
This will download:
|
||||
- Detection model: ch_PP-OCRv4_det
|
||||
- Recognition model: ch_PP-OCRv4_rec
|
||||
- Angle classifier: ch_ppocr_mobile_v2.0_cls
|
||||
|
||||
Models are stored in: `./models/paddleocr/`
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Issue: "conda: command not found"
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Reload shell configuration
|
||||
source ~/.zshrc # or source ~/.bashrc
|
||||
|
||||
# If still not working, manually add Conda to PATH
|
||||
export PATH="$HOME/miniconda3/bin:$PATH"
|
||||
```
|
||||
|
||||
### Issue: PaddlePaddle installation fails
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# For Apple Silicon Macs, ensure you're using ARM version
|
||||
pip uninstall paddlepaddle
|
||||
pip install paddlepaddle --no-cache-dir
|
||||
```
|
||||
|
||||
### Issue: WeasyPrint fails to install
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Install required system libraries
|
||||
brew install cairo pango gdk-pixbuf libffi
|
||||
pip install --upgrade weasyprint
|
||||
```
|
||||
|
||||
### Issue: Database connection fails
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Test database connection
|
||||
python -c "
|
||||
import pymysql
|
||||
conn = pymysql.connect(
|
||||
host='mysql.theaken.com',
|
||||
port=33306,
|
||||
user='A060',
|
||||
password='WLeSCi0yhtc7',
|
||||
database='db_A060'
|
||||
)
|
||||
print('Database connection OK')
|
||||
conn.close()
|
||||
"
|
||||
```
|
||||
|
||||
If this fails, verify:
|
||||
- Internet connection is active
|
||||
- Firewall is not blocking port 33306
|
||||
- Database credentials in `.env` are correct
|
||||
|
||||
### Issue: Port 12010 already in use
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# Find what's using the port
|
||||
lsof -i :12010
|
||||
|
||||
# Kill the process or change port in .env
|
||||
# Edit BACKEND_PORT=12011 (or any available port)
|
||||
```
|
||||
|
||||
## Next Steps
|
||||
|
||||
After successful setup:
|
||||
|
||||
1. ✅ Environment is ready
|
||||
2. ✅ Backend server can start
|
||||
3. ✅ Database connection configured
|
||||
|
||||
**Ready to develop:**
|
||||
- Implement database models (`backend/app/models/`)
|
||||
- Create API endpoints (`backend/app/api/v1/`)
|
||||
- Build OCR service (`backend/app/services/ocr_service.py`)
|
||||
- Develop frontend UI (`frontend/src/`)
|
||||
|
||||
**Start with Phase 1 tasks:**
|
||||
Refer to [openspec/changes/add-ocr-batch-processing/tasks.md](openspec/changes/add-ocr-batch-processing/tasks.md) for detailed implementation tasks.
|
||||
|
||||
## Development Workflow
|
||||
|
||||
```bash
|
||||
# Activate environment
|
||||
conda activate tool_ocr
|
||||
|
||||
# Start backend in development mode (auto-reload)
|
||||
cd backend
|
||||
python -m app.main
|
||||
|
||||
bash -c "source ~/.zshrc && conda activate tool_ocr && export DYLD_LIBRARY_PATH=/opt/homebrew/lib:$DYLD_LIBRARY_PATH && python -m app.main"
|
||||
|
||||
# In another terminal, start frontend
|
||||
cd frontend
|
||||
npm run dev
|
||||
|
||||
# Run tests
|
||||
cd backend
|
||||
pytest tests/ -v
|
||||
|
||||
# Check code style
|
||||
black app/
|
||||
pylint app/
|
||||
```
|
||||
|
||||
## Background Services
|
||||
|
||||
### Automatic Cleanup Scheduler
|
||||
|
||||
The application automatically runs a cleanup scheduler that:
|
||||
- **Runs every**: 1 hour (configurable via `BackgroundTaskManager.cleanup_interval`)
|
||||
- **Deletes files older than**: 24 hours (configurable via `BackgroundTaskManager.file_retention_hours`)
|
||||
- **Cleans up**:
|
||||
- Physical files and directories
|
||||
- Database records (results, files, batches)
|
||||
- Expired batches in COMPLETED, FAILED, or PARTIAL status
|
||||
|
||||
The cleanup scheduler starts automatically when the backend application starts and stops gracefully on shutdown.
|
||||
|
||||
**Monitor cleanup activity:**
|
||||
```bash
|
||||
# Watch cleanup logs in real-time
|
||||
tail -f /tmp/tool_ocr_startup.log | grep cleanup
|
||||
|
||||
# Or check application logs
|
||||
tail -f backend/logs/app.log | grep cleanup
|
||||
```
|
||||
|
||||
### Retry Logic
|
||||
|
||||
OCR processing includes automatic retry logic:
|
||||
- **Maximum retries**: 3 attempts (configurable)
|
||||
- **Retry delay**: 5 seconds between attempts (configurable)
|
||||
- **Tracks**: `retry_count` field in database
|
||||
- **Error handling**: Detailed error messages with retry attempt information
|
||||
|
||||
**Configuration** (in [backend/app/services/background_tasks.py](backend/app/services/background_tasks.py)):
|
||||
```python
|
||||
task_manager = BackgroundTaskManager(
|
||||
max_retries=3, # Number of retry attempts
|
||||
retry_delay=5, # Delay between retries (seconds)
|
||||
cleanup_interval=3600, # Cleanup runs every hour
|
||||
file_retention_hours=24 # Keep files for 24 hours
|
||||
)
|
||||
```
|
||||
|
||||
### Background Task Status
|
||||
|
||||
Check if background services are running:
|
||||
```bash
|
||||
# Check health endpoint
|
||||
curl http://localhost:12010/health
|
||||
|
||||
# Check application startup logs for cleanup scheduler
|
||||
grep "cleanup scheduler" /tmp/tool_ocr_startup.log
|
||||
# Expected output: "Started cleanup scheduler for expired files"
|
||||
# Expected output: "Starting cleanup scheduler (interval: 3600s, retention: 24h)"
|
||||
```
|
||||
|
||||
## Deactivate Environment
|
||||
|
||||
When done working:
|
||||
```bash
|
||||
conda deactivate
|
||||
```
|
||||
|
||||
## Environment Management
|
||||
|
||||
```bash
|
||||
# List Conda environments
|
||||
conda env list
|
||||
|
||||
# Remove environment (if needed)
|
||||
conda env remove -n tool_ocr
|
||||
|
||||
# Export environment
|
||||
conda env export > environment.yml
|
||||
|
||||
# Create from exported environment
|
||||
conda env create -f environment.yml
|
||||
```
|
||||
@@ -14,6 +14,13 @@ engine = create_engine(
|
||||
pool_pre_ping=True, # Enable connection health checks
|
||||
pool_size=10,
|
||||
max_overflow=20,
|
||||
pool_recycle=3600, # Recycle connections every hour
|
||||
pool_timeout=30, # Connection timeout
|
||||
connect_args={
|
||||
'connect_timeout': 10,
|
||||
'read_timeout': 30,
|
||||
'write_timeout': 30,
|
||||
},
|
||||
echo=False, # Set to True for SQL query logging
|
||||
)
|
||||
|
||||
|
||||
@@ -168,6 +168,16 @@ class BackgroundTaskManager:
|
||||
ocr_file.completed_at = datetime.utcnow()
|
||||
ocr_file.processing_time = (ocr_file.completed_at - ocr_file.started_at).total_seconds()
|
||||
|
||||
# Commit with retry on connection errors
|
||||
try:
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
logger.warning(f"Commit failed, rolling back and retrying: {commit_error}")
|
||||
db.rollback()
|
||||
db.refresh(ocr_file)
|
||||
ocr_file.status = FileStatus.COMPLETED
|
||||
ocr_file.completed_at = datetime.utcnow()
|
||||
ocr_file.processing_time = (ocr_file.completed_at - ocr_file.started_at).total_seconds()
|
||||
db.commit()
|
||||
|
||||
logger.info(f"Successfully processed file {ocr_file.id} ({ocr_file.original_filename})")
|
||||
@@ -175,17 +185,22 @@ class BackgroundTaskManager:
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Attempt {attempt + 1}/{self.max_retries + 1} failed for file {ocr_file.id}: {e}")
|
||||
db.rollback() # Rollback failed transaction
|
||||
|
||||
if attempt < self.max_retries:
|
||||
# Wait before retry
|
||||
time.sleep(self.retry_delay)
|
||||
else:
|
||||
# Final failure
|
||||
try:
|
||||
ocr_file.status = FileStatus.FAILED
|
||||
ocr_file.error_message = f"Failed after {self.max_retries + 1} attempts: {str(e)}"
|
||||
ocr_file.completed_at = datetime.utcnow()
|
||||
ocr_file.retry_count = attempt
|
||||
db.commit()
|
||||
except Exception as final_error:
|
||||
logger.error(f"Failed to update error status: {final_error}")
|
||||
db.rollback()
|
||||
return False
|
||||
|
||||
return False
|
||||
@@ -375,6 +390,16 @@ def process_batch_files_with_retry(
|
||||
batch.status = BatchStatus.FAILED
|
||||
|
||||
batch.completed_at = datetime.utcnow()
|
||||
|
||||
# Commit with retry on connection errors
|
||||
try:
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
logger.warning(f"Batch commit failed, rolling back and retrying: {commit_error}")
|
||||
db.rollback()
|
||||
batch = db.query(OCRBatch).filter(OCRBatch.id == batch_id).first()
|
||||
if batch:
|
||||
batch.completed_at = datetime.utcnow()
|
||||
db.commit()
|
||||
|
||||
logger.info(
|
||||
@@ -384,6 +409,7 @@ def process_batch_files_with_retry(
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fatal error processing batch {batch_id}: {e}")
|
||||
db.rollback() # Rollback any failed transaction
|
||||
try:
|
||||
batch = db.query(OCRBatch).filter(OCRBatch.id == batch_id).first()
|
||||
if batch:
|
||||
@@ -392,3 +418,4 @@ def process_batch_files_with_retry(
|
||||
db.commit()
|
||||
except Exception as commit_error:
|
||||
logger.error(f"Error updating batch status: {commit_error}")
|
||||
db.rollback()
|
||||
|
||||
@@ -42,14 +42,16 @@ class OfficeConverter:
|
||||
def _verify_libreoffice(self):
|
||||
"""Verify LibreOffice is installed and accessible"""
|
||||
if not Path(self.libreoffice_path).exists():
|
||||
# Try alternative path for Homebrew installation
|
||||
# Try alternative path (system-wide installation)
|
||||
alt_path = shutil.which("soffice")
|
||||
if alt_path:
|
||||
self.libreoffice_path = alt_path
|
||||
logger.info(f"Using LibreOffice at: {alt_path}")
|
||||
else:
|
||||
raise OfficeConverterError(
|
||||
"LibreOffice not found. Please install LibreOffice: brew install libreoffice"
|
||||
"LibreOffice not found. Please install LibreOffice:\n"
|
||||
" Ubuntu/Debian: sudo apt install -y libreoffice-writer libreoffice-impress libreoffice-core-nogui\n"
|
||||
" macOS: brew install libreoffice"
|
||||
)
|
||||
|
||||
def is_office_document(self, file_path: Path) -> bool:
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
services:
|
||||
tool_ocr:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
image: tool_ocr:latest
|
||||
container_name: tool_ocr
|
||||
restart: unless-stopped
|
||||
|
||||
ports:
|
||||
- "12015:12015" # Only one port needed!
|
||||
|
||||
environment:
|
||||
# Database Configuration
|
||||
- MYSQL_HOST=${MYSQL_HOST:-mysql.theaken.com}
|
||||
- MYSQL_PORT=${MYSQL_PORT:-33306}
|
||||
- MYSQL_USER=${MYSQL_USER:-A060}
|
||||
- MYSQL_PASSWORD=${MYSQL_PASSWORD:-WLeSCi0yhtc7}
|
||||
- MYSQL_DATABASE=${MYSQL_DATABASE:-db_A060}
|
||||
|
||||
# Application Configuration
|
||||
- BACKEND_PORT=8000 # Internal backend port
|
||||
- FRONTEND_PORT=12015 # External port
|
||||
- SECRET_KEY=${SECRET_KEY:-your-secret-key-here-please-change-this}
|
||||
- ALGORITHM=${ALGORITHM:-HS256}
|
||||
- ACCESS_TOKEN_EXPIRE_MINUTES=${ACCESS_TOKEN_EXPIRE_MINUTES:-1440}
|
||||
|
||||
# OCR Configuration
|
||||
- PADDLEOCR_MODEL_DIR=/app/backend/models/paddleocr
|
||||
- OCR_LANGUAGES=${OCR_LANGUAGES:-ch,en,japan,korean}
|
||||
- OCR_CONFIDENCE_THRESHOLD=${OCR_CONFIDENCE_THRESHOLD:-0.5}
|
||||
- MAX_OCR_WORKERS=${MAX_OCR_WORKERS:-4}
|
||||
|
||||
# File Upload Configuration
|
||||
- MAX_UPLOAD_SIZE=${MAX_UPLOAD_SIZE:-52428800}
|
||||
- ALLOWED_EXTENSIONS=${ALLOWED_EXTENSIONS:-png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx}
|
||||
- UPLOAD_DIR=/app/backend/uploads
|
||||
- TEMP_DIR=/app/backend/uploads/temp
|
||||
- PROCESSED_DIR=/app/backend/uploads/processed
|
||||
- IMAGES_DIR=/app/backend/uploads/images
|
||||
|
||||
# Export Configuration
|
||||
- STORAGE_DIR=/app/backend/storage
|
||||
- MARKDOWN_DIR=/app/backend/storage/markdown
|
||||
- JSON_DIR=/app/backend/storage/json
|
||||
- EXPORTS_DIR=/app/backend/storage/exports
|
||||
|
||||
# PDF Generation Configuration
|
||||
- PANDOC_PATH=/usr/bin/pandoc
|
||||
- FONT_DIR=/usr/share/fonts
|
||||
- PDF_PAGE_SIZE=${PDF_PAGE_SIZE:-A4}
|
||||
- PDF_MARGIN_TOP=${PDF_MARGIN_TOP:-20}
|
||||
- PDF_MARGIN_BOTTOM=${PDF_MARGIN_BOTTOM:-20}
|
||||
- PDF_MARGIN_LEFT=${PDF_MARGIN_LEFT:-20}
|
||||
- PDF_MARGIN_RIGHT=${PDF_MARGIN_RIGHT:-20}
|
||||
|
||||
# Translation Configuration (Reserved)
|
||||
- ENABLE_TRANSLATION=${ENABLE_TRANSLATION:-false}
|
||||
- TRANSLATION_ENGINE=${TRANSLATION_ENGINE:-offline}
|
||||
- ARGOSTRANSLATE_MODELS_DIR=/app/backend/models/argostranslate
|
||||
|
||||
# Background Tasks Configuration
|
||||
- TASK_QUEUE_TYPE=${TASK_QUEUE_TYPE:-memory}
|
||||
|
||||
# CORS Configuration
|
||||
- CORS_ORIGINS=${CORS_ORIGINS:-http://localhost:12015,http://127.0.0.1:12015}
|
||||
|
||||
# Logging Configuration
|
||||
- LOG_LEVEL=${LOG_LEVEL:-INFO}
|
||||
- LOG_FILE=/app/backend/logs/app.log
|
||||
|
||||
volumes:
|
||||
# Persist data directories
|
||||
- ./data/uploads:/app/backend/uploads
|
||||
- ./data/storage:/app/backend/storage
|
||||
- ./data/models:/app/backend/models
|
||||
- ./data/logs:/app/backend/logs
|
||||
|
||||
networks:
|
||||
- tool_ocr_network
|
||||
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:12010/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
|
||||
networks:
|
||||
tool_ocr_network:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
uploads:
|
||||
storage:
|
||||
models:
|
||||
logs:
|
||||
@@ -1,89 +0,0 @@
|
||||
# Nginx Site Configuration for Tool_OCR
|
||||
|
||||
upstream backend {
|
||||
server 127.0.0.1:8000;
|
||||
keepalive 32;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 12015;
|
||||
server_name _;
|
||||
|
||||
# Security headers
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header X-XSS-Protection "1; mode=block" always;
|
||||
|
||||
# Root directory for frontend
|
||||
root /app/frontend/dist;
|
||||
index index.html;
|
||||
|
||||
# Logging
|
||||
access_log /var/log/nginx/tool_ocr_access.log;
|
||||
error_log /var/log/nginx/tool_ocr_error.log;
|
||||
|
||||
# Backend API proxy
|
||||
location /api/ {
|
||||
proxy_pass http://backend/api/;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
# Headers
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header Connection "";
|
||||
|
||||
# Timeouts
|
||||
proxy_connect_timeout 60s;
|
||||
proxy_send_timeout 300s;
|
||||
proxy_read_timeout 300s;
|
||||
|
||||
# Buffering
|
||||
proxy_buffering off;
|
||||
proxy_request_buffering off;
|
||||
}
|
||||
|
||||
# Health check endpoint (backend)
|
||||
location /health {
|
||||
proxy_pass http://backend/health;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Connection "";
|
||||
}
|
||||
|
||||
# API docs (backend)
|
||||
location /docs {
|
||||
proxy_pass http://backend/docs;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Connection "";
|
||||
}
|
||||
|
||||
location /openapi.json {
|
||||
proxy_pass http://backend/openapi.json;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header Connection "";
|
||||
}
|
||||
|
||||
# Frontend static files with caching
|
||||
location /assets/ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# Frontend - React Router support (SPA fallback)
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
expires -1;
|
||||
add_header Cache-Control "no-store, no-cache, must-revalidate";
|
||||
}
|
||||
|
||||
# Deny access to hidden files
|
||||
location ~ /\. {
|
||||
deny all;
|
||||
access_log off;
|
||||
log_not_found off;
|
||||
}
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo "========================================"
|
||||
echo "Tool_OCR Container Starting..."
|
||||
echo "========================================"
|
||||
|
||||
# Wait a moment for system to stabilize
|
||||
sleep 2
|
||||
|
||||
# Run database migrations if needed
|
||||
echo "Checking database migrations..."
|
||||
cd /app/backend
|
||||
if [ -f "alembic.ini" ]; then
|
||||
echo "Running Alembic migrations..."
|
||||
alembic upgrade head || echo "Warning: Migration failed or already up to date"
|
||||
fi
|
||||
|
||||
# Create necessary directories if they don't exist
|
||||
echo "Ensuring directories exist..."
|
||||
mkdir -p \
|
||||
/app/backend/uploads/temp \
|
||||
/app/backend/uploads/processed \
|
||||
/app/backend/uploads/images \
|
||||
/app/backend/storage/markdown \
|
||||
/app/backend/storage/json \
|
||||
/app/backend/storage/exports \
|
||||
/app/backend/models/paddleocr \
|
||||
/app/backend/logs
|
||||
|
||||
# Set permissions
|
||||
chmod -R 755 /app/backend/uploads /app/backend/storage /app/backend/logs
|
||||
|
||||
echo "========================================"
|
||||
echo "Starting services with Supervisor..."
|
||||
echo "- Nginx listening on port 12015"
|
||||
echo "- Backend API on internal port 8000"
|
||||
echo "========================================"
|
||||
|
||||
# Start supervisord
|
||||
exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
|
||||
@@ -1,40 +0,0 @@
|
||||
# Nginx Main Configuration
|
||||
user www-data;
|
||||
worker_processes auto;
|
||||
pid /var/run/nginx.pid;
|
||||
|
||||
events {
|
||||
worker_connections 1024;
|
||||
use epoll;
|
||||
}
|
||||
|
||||
http {
|
||||
# Basic Settings
|
||||
sendfile on;
|
||||
tcp_nopush on;
|
||||
tcp_nodelay on;
|
||||
keepalive_timeout 65;
|
||||
types_hash_max_size 2048;
|
||||
client_max_body_size 50M; # Match MAX_UPLOAD_SIZE in .env
|
||||
|
||||
# MIME Types
|
||||
include /etc/nginx/mime.types;
|
||||
default_type application/octet-stream;
|
||||
|
||||
# Logging
|
||||
access_log /var/log/nginx/access.log;
|
||||
error_log /var/log/nginx/error.log;
|
||||
|
||||
# Gzip Compression
|
||||
gzip on;
|
||||
gzip_vary on;
|
||||
gzip_proxied any;
|
||||
gzip_comp_level 6;
|
||||
gzip_types text/plain text/css text/xml text/javascript
|
||||
application/json application/javascript application/xml+rss
|
||||
application/rss+xml font/truetype font/opentype
|
||||
application/vnd.ms-fontobject image/svg+xml;
|
||||
|
||||
# Include site configurations
|
||||
include /etc/nginx/conf.d/*.conf;
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
[supervisord]
|
||||
nodaemon=true
|
||||
user=root
|
||||
logfile=/var/log/supervisor/supervisord.log
|
||||
pidfile=/var/run/supervisord.pid
|
||||
loglevel=info
|
||||
|
||||
[program:nginx]
|
||||
command=/usr/sbin/nginx -g "daemon off;"
|
||||
autostart=true
|
||||
autorestart=true
|
||||
priority=10
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
|
||||
[program:backend]
|
||||
command=python -m uvicorn app.main:app --host 127.0.0.1 --port 8000 --log-level info
|
||||
directory=/app/backend
|
||||
autostart=true
|
||||
autorestart=true
|
||||
priority=20
|
||||
stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
environment=PYTHONUNBUFFERED="1"
|
||||
@@ -1 +1 @@
|
||||
VITE_API_BASE_URL=http://localhost:12015
|
||||
VITE_API_BASE_URL=http://localhost:8000
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
# Frontend Environment Variables for Docker Deployment
|
||||
# Copy this to frontend/.env.production for Docker builds
|
||||
|
||||
# API Base URL
|
||||
# In Docker environment, use empty string for same-origin requests
|
||||
# Nginx will proxy /api/* to the backend
|
||||
VITE_API_BASE_URL=
|
||||
53
frontend/package-lock.json
generated
53
frontend/package-lock.json
generated
@@ -86,6 +86,7 @@
|
||||
"integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@babel/code-frame": "^7.27.1",
|
||||
"@babel/generator": "^7.28.5",
|
||||
@@ -1672,9 +1673,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@tanstack/query-core": {
|
||||
"version": "5.90.7",
|
||||
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.90.7.tgz",
|
||||
"integrity": "sha512-6PN65csiuTNfBMXqQUxQhCNdtm1rV+9kC9YwWAIKcaxAauq3Wu7p18j3gQY3YIBJU70jT/wzCCZ2uqto/vQgiQ==",
|
||||
"version": "5.90.8",
|
||||
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.90.8.tgz",
|
||||
"integrity": "sha512-4E0RP/0GJCxSNiRF2kAqE/LQkTJVlL/QNU7gIJSptaseV9HP6kOuA+N11y4bZKZxa3QopK3ZuewwutHx6DqDXQ==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "github",
|
||||
@@ -1682,12 +1683,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@tanstack/react-query": {
|
||||
"version": "5.90.7",
|
||||
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.7.tgz",
|
||||
"integrity": "sha512-wAHc/cgKzW7LZNFloThyHnV/AX9gTg3w5yAv0gvQHPZoCnepwqCMtzbuPbb2UvfvO32XZ46e8bPOYbfZhzVnnQ==",
|
||||
"version": "5.90.8",
|
||||
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.8.tgz",
|
||||
"integrity": "sha512-/3b9QGzkf4rE5/miL6tyhldQRlLXzMHcySOm/2Tm2OLEFE9P1ImkH0+OviDBSvyAvtAOJocar5xhd7vxdLi3aQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@tanstack/query-core": "5.90.7"
|
||||
"@tanstack/query-core": "5.90.8"
|
||||
},
|
||||
"funding": {
|
||||
"type": "github",
|
||||
@@ -1803,15 +1804,17 @@
|
||||
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/react": {
|
||||
"version": "19.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.3.tgz",
|
||||
"integrity": "sha512-k5dJVszUiNr1DSe8Cs+knKR6IrqhqdhpUwzqhkS8ecQTSf3THNtbfIp/umqHMpX2bv+9dkx3fwDv/86LcSfvSg==",
|
||||
"version": "19.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.4.tgz",
|
||||
"integrity": "sha512-tBFxBp9Nfyy5rsmefN+WXc1JeW/j2BpBHFdLZbEVfs9wn3E3NRFxwV0pJg8M1qQAexFpvz73hJXFofV0ZAu92A==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"csstype": "^3.0.2"
|
||||
}
|
||||
@@ -1878,6 +1881,7 @@
|
||||
"integrity": "sha512-tK3GPFWbirvNgsNKto+UmB/cRtn6TZfyw0D6IKrW55n6Vbs7KJoZtI//kpTKzE/DUmmnAFD8/Ca46s7Obs92/w==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@typescript-eslint/scope-manager": "8.46.4",
|
||||
"@typescript-eslint/types": "8.46.4",
|
||||
@@ -2136,6 +2140,7 @@
|
||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"acorn": "bin/acorn"
|
||||
},
|
||||
@@ -2275,9 +2280,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/baseline-browser-mapping": {
|
||||
"version": "2.8.26",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.26.tgz",
|
||||
"integrity": "sha512-73lC1ugzwoaWCLJ1LvOgrR5xsMLTqSKIEoMHVtL9E/HNk0PXtTM76ZIm84856/SF7Nv8mPZxKoBsgpm0tR1u1Q==",
|
||||
"version": "2.8.27",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.27.tgz",
|
||||
"integrity": "sha512-2CXFpkjVnY2FT+B6GrSYxzYf65BJWEqz5tIRHCvNsZZ2F3CmsCB37h8SpYgKG7y9C4YAeTipIPWG7EmFmhAeXA==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
@@ -2328,6 +2333,7 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"baseline-browser-mapping": "^2.8.25",
|
||||
"caniuse-lite": "^1.0.30001754",
|
||||
@@ -2789,6 +2795,7 @@
|
||||
"integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@eslint-community/eslint-utils": "^4.8.0",
|
||||
"@eslint-community/regexpp": "^4.12.1",
|
||||
@@ -3413,6 +3420,7 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.27.6"
|
||||
},
|
||||
@@ -3581,9 +3589,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/js-yaml": {
|
||||
"version": "4.1.0",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
|
||||
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
|
||||
"integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@@ -4867,6 +4875,7 @@
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"nanoid": "^3.3.11",
|
||||
"picocolors": "^1.1.1",
|
||||
@@ -4956,6 +4965,7 @@
|
||||
"resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz",
|
||||
"integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
@@ -4965,6 +4975,7 @@
|
||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz",
|
||||
"integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"scheduler": "^0.27.0"
|
||||
},
|
||||
@@ -4990,9 +5001,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/react-i18next": {
|
||||
"version": "16.3.0",
|
||||
"resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-16.3.0.tgz",
|
||||
"integrity": "sha512-XGYIVU6gCOL4UQsfp87WbbvBc2WvgdkEDI8r4TwACzFg1bXY8pd1d9Cw6u9WJ2soTKHKaF1xQEyWA3/dUvtAGw==",
|
||||
"version": "16.3.1",
|
||||
"resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-16.3.1.tgz",
|
||||
"integrity": "sha512-HbYaBeA58Hg38OzdEvJp4kLIvk10rp9F9Jq+wNkqtqxDXObtdYMSsQnegWgdUVcpZjZuK9ZxehM+Z9BW2Vqgqw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.27.6",
|
||||
@@ -5412,6 +5423,7 @@
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
@@ -5490,6 +5502,7 @@
|
||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||
"devOptional": true,
|
||||
"license": "Apache-2.0",
|
||||
"peer": true,
|
||||
"bin": {
|
||||
"tsc": "bin/tsc",
|
||||
"tsserver": "bin/tsserver"
|
||||
@@ -5700,6 +5713,7 @@
|
||||
"integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
"fdir": "^6.5.0",
|
||||
@@ -5793,6 +5807,7 @@
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
|
||||
@@ -19,10 +19,10 @@ import type {
|
||||
/**
|
||||
* API Client Configuration
|
||||
* - In Docker: VITE_API_BASE_URL is empty string, use relative path
|
||||
* - In development: Use VITE_API_BASE_URL from .env or default to localhost:12015
|
||||
* - In development: Use VITE_API_BASE_URL from .env or default to localhost:8000
|
||||
*/
|
||||
const envApiBaseUrl = import.meta.env.VITE_API_BASE_URL
|
||||
const API_BASE_URL = envApiBaseUrl !== undefined ? envApiBaseUrl : 'http://localhost:12015'
|
||||
const API_BASE_URL = envApiBaseUrl !== undefined ? envApiBaseUrl : 'http://localhost:8000'
|
||||
const API_VERSION = 'v1'
|
||||
|
||||
class ApiClient {
|
||||
|
||||
@@ -6,10 +6,10 @@ import path from 'path'
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
server: {
|
||||
port: 12011,
|
||||
port: 5173,
|
||||
proxy: {
|
||||
'/api': {
|
||||
target: 'http://localhost:12015',
|
||||
target: 'http://localhost:8000',
|
||||
changeOrigin: true,
|
||||
},
|
||||
},
|
||||
|
||||
@@ -14,10 +14,11 @@ Tool_OCR is a web-based application for batch image-to-text conversion with mult
|
||||
## Tech Stack
|
||||
|
||||
### Development Environment
|
||||
- **OS Platform**: Windows 10/11
|
||||
- **Python Version**: 3.10 (via Conda)
|
||||
- **Environment Manager**: Conda
|
||||
- **Virtual Environment Path**: `C:\Users\lin46\.conda\envs\tool_ocr`
|
||||
- **OS Platform**: WSL2 Ubuntu 24.04
|
||||
- **Python Version**: 3.12
|
||||
- **Environment Manager**: Python venv
|
||||
- **Virtual Environment Path**: `./venv`
|
||||
- **Node.js**: 24.x LTS (via nvm)
|
||||
- **IDE Recommended**: VS Code with Python + React extensions
|
||||
|
||||
### Backend Technologies
|
||||
@@ -74,11 +75,15 @@ Tool_OCR is a web-based application for batch image-to-text conversion with mult
|
||||
|
||||
### Environment Setup (Backend)
|
||||
```bash
|
||||
# Create new conda environment
|
||||
conda create -n tool_ocr python=3.10 -y
|
||||
# Run automated setup script (recommended)
|
||||
./setup_dev_env.sh
|
||||
|
||||
# Or manually:
|
||||
# Create Python virtual environment
|
||||
python3 -m venv venv
|
||||
|
||||
# Activate environment
|
||||
conda activate tool_ocr
|
||||
source venv/bin/activate
|
||||
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Tool_OCR - Conda 環境設置腳本 (macOS Apple Silicon)
|
||||
|
||||
set -e # 遇到錯誤立即退出
|
||||
|
||||
echo "==================================="
|
||||
echo "Tool_OCR - 環境設置"
|
||||
echo "==================================="
|
||||
|
||||
# 檢查 Conda 是否已安裝
|
||||
if command -v conda &> /dev/null; then
|
||||
echo "✓ Conda 已安裝: $(conda --version)"
|
||||
else
|
||||
echo "📦 開始安裝 Miniconda..."
|
||||
|
||||
# 下載 Miniconda for Apple Silicon
|
||||
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh"
|
||||
INSTALLER="/tmp/miniconda_installer.sh"
|
||||
|
||||
echo "下載 Miniconda..."
|
||||
curl -o "$INSTALLER" "$MINICONDA_URL"
|
||||
|
||||
echo "安裝 Miniconda (默認安裝到 ~/miniconda3)..."
|
||||
bash "$INSTALLER" -b -p "$HOME/miniconda3"
|
||||
|
||||
# 初始化 Conda
|
||||
echo "初始化 Conda..."
|
||||
"$HOME/miniconda3/bin/conda" init zsh bash
|
||||
|
||||
# 清理安裝檔案
|
||||
rm "$INSTALLER"
|
||||
|
||||
echo "✓ Miniconda 安裝完成!"
|
||||
echo ""
|
||||
echo "⚠️ 請執行以下命令以載入 Conda:"
|
||||
echo " source ~/.zshrc (如果使用 zsh)"
|
||||
echo " source ~/.bash_profile (如果使用 bash)"
|
||||
echo ""
|
||||
echo "然後重新執行此腳本繼續設置。"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 檢查是否在 base 環境
|
||||
CURRENT_ENV=$(conda info --envs | grep '*' | awk '{print $1}')
|
||||
echo "當前 Conda 環境: $CURRENT_ENV"
|
||||
|
||||
# 創建 tool_ocr 環境
|
||||
ENV_NAME="tool_ocr"
|
||||
if conda env list | grep -q "^$ENV_NAME "; then
|
||||
echo "✓ 環境 '$ENV_NAME' 已存在"
|
||||
read -p "是否重新創建? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
echo "移除現有環境..."
|
||||
conda env remove -n "$ENV_NAME" -y
|
||||
else
|
||||
echo "使用現有環境"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "📦 創建 Conda 環境: $ENV_NAME (Python 3.10)..."
|
||||
conda create -n "$ENV_NAME" python=3.10 -y
|
||||
|
||||
echo ""
|
||||
echo "✅ Conda 環境設置完成!"
|
||||
echo ""
|
||||
echo "下一步:"
|
||||
echo " 1. 啟動環境: conda activate $ENV_NAME"
|
||||
echo " 2. 安裝依賴: pip install -r requirements.txt"
|
||||
echo " 3. 下載 PaddleOCR 模型"
|
||||
echo ""
|
||||
165
setup_dev_env.sh
Executable file
165
setup_dev_env.sh
Executable file
@@ -0,0 +1,165 @@
|
||||
#!/bin/bash
|
||||
# Tool_OCR WSL Ubuntu 開發環境設置腳本
|
||||
|
||||
set -e # 遇到錯誤時停止
|
||||
|
||||
echo "================================"
|
||||
echo "Tool_OCR 開發環境設置"
|
||||
echo "================================"
|
||||
echo ""
|
||||
|
||||
# 顏色定義
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 檢查是否為 root
|
||||
if [ "$EUID" -eq 0 ]; then
|
||||
echo -e "${RED}請不要使用 sudo 運行此腳本${NC}"
|
||||
echo "腳本會在需要時提示輸入 sudo 密碼"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${YELLOW}[1/8] 更新系統套件列表...${NC}"
|
||||
sudo apt update
|
||||
|
||||
echo ""
|
||||
echo -e "${YELLOW}[2/8] 安裝 Python 開發工具...${NC}"
|
||||
sudo apt install -y \
|
||||
python3-pip \
|
||||
python3-venv \
|
||||
python3-dev \
|
||||
build-essential \
|
||||
pkg-config
|
||||
|
||||
echo ""
|
||||
echo -e "${YELLOW}[3/8] 安裝系統層級依賴...${NC}"
|
||||
sudo apt install -y \
|
||||
pandoc \
|
||||
libmagic1 \
|
||||
libmagic-dev \
|
||||
fonts-noto-cjk \
|
||||
fonts-noto-cjk-extra \
|
||||
fonts-liberation \
|
||||
libpango-1.0-0 \
|
||||
libpangocairo-1.0-0 \
|
||||
libcairo2 \
|
||||
libcairo2-dev \
|
||||
libgdk-pixbuf2.0-0 \
|
||||
libgdk-pixbuf-2.0-dev \
|
||||
libffi-dev \
|
||||
libffi8 \
|
||||
shared-mime-info \
|
||||
poppler-utils \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libglib2.0-dev \
|
||||
libgomp1 \
|
||||
libjpeg-dev \
|
||||
libpng-dev \
|
||||
libtiff-dev \
|
||||
libopencv-dev \
|
||||
libsqlite3-dev \
|
||||
libreoffice-core-nogui \
|
||||
libreoffice-writer-nogui \
|
||||
libreoffice-impress-nogui \
|
||||
ca-certificates \
|
||||
curl \
|
||||
wget \
|
||||
libxml2 \
|
||||
libxslt1-dev \
|
||||
python3-cffi
|
||||
|
||||
echo ""
|
||||
echo -e "${YELLOW}[4/8] 安裝 Node.js 和 npm...${NC}"
|
||||
# 檢查是否已安裝 nvm
|
||||
if [ ! -d "$HOME/.nvm" ]; then
|
||||
echo "安裝 nvm..."
|
||||
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash
|
||||
|
||||
# 載入 nvm
|
||||
export NVM_DIR="$HOME/.nvm"
|
||||
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
||||
else
|
||||
echo "nvm 已安裝"
|
||||
export NVM_DIR="$HOME/.nvm"
|
||||
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
||||
fi
|
||||
|
||||
# 安裝 Node.js LTS
|
||||
echo "安裝 Node.js LTS..."
|
||||
nvm install --lts
|
||||
nvm use --lts
|
||||
|
||||
echo ""
|
||||
echo -e "${YELLOW}[5/8] 創建 Python 虛擬環境...${NC}"
|
||||
if [ ! -d "venv" ]; then
|
||||
python3 -m venv venv
|
||||
echo "虛擬環境已創建"
|
||||
else
|
||||
echo "虛擬環境已存在"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${YELLOW}[6/8] 安裝 Python 依賴...${NC}"
|
||||
source venv/bin/activate
|
||||
pip install --upgrade pip setuptools wheel
|
||||
pip install -r requirements.txt
|
||||
|
||||
echo ""
|
||||
echo -e "${YELLOW}測試關鍵套件...${NC}"
|
||||
python -c "import magic; print('✓ python-magic')" || echo "✗ python-magic failed"
|
||||
python -c "from weasyprint import HTML; print('✓ WeasyPrint')" || echo "✗ WeasyPrint failed"
|
||||
python -c "import cv2; print('✓ OpenCV')" || echo "✗ OpenCV failed"
|
||||
|
||||
echo ""
|
||||
echo -e "${YELLOW}[7/8] 安裝前端依賴...${NC}"
|
||||
cd frontend
|
||||
|
||||
# 清理可能存在的鎖定文件
|
||||
if [ -d "node_modules" ]; then
|
||||
echo "清理現有 node_modules..."
|
||||
rm -rf node_modules package-lock.json
|
||||
fi
|
||||
|
||||
# 清理 npm 緩存
|
||||
npm cache clean --force
|
||||
|
||||
# 安裝依賴(使用 --force 避免鎖定問題)
|
||||
echo "安裝前端依賴..."
|
||||
npm install --force
|
||||
|
||||
cd ..
|
||||
|
||||
echo ""
|
||||
echo -e "${YELLOW}[8/8] 創建必要的目錄...${NC}"
|
||||
mkdir -p backend/uploads/{temp,processed,images}
|
||||
mkdir -p backend/storage/{markdown,json,exports}
|
||||
mkdir -p backend/models/paddleocr
|
||||
mkdir -p backend/logs
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}================================${NC}"
|
||||
echo -e "${GREEN}環境設置完成!${NC}"
|
||||
echo -e "${GREEN}================================${NC}"
|
||||
echo ""
|
||||
echo "下一步操作:"
|
||||
echo "1. 初始化數據庫:"
|
||||
echo " source venv/bin/activate"
|
||||
echo " cd backend"
|
||||
echo " alembic upgrade head"
|
||||
echo " python create_test_user.py"
|
||||
echo " cd .."
|
||||
echo ""
|
||||
echo "2. 啟動後端:"
|
||||
echo " ./start_backend.sh"
|
||||
echo ""
|
||||
echo "3. 啟動前端 (新終端):"
|
||||
echo " ./start_frontend.sh"
|
||||
echo ""
|
||||
echo "4. 訪問應用:"
|
||||
echo " 前端: http://localhost:5173"
|
||||
echo " API文檔: http://localhost:8000/docs"
|
||||
echo " 健康檢查: http://localhost:8000/health"
|
||||
echo ""
|
||||
59
start_backend.sh
Executable file
59
start_backend.sh
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
# Tool_OCR - 後端開發服務器啟動腳本
|
||||
|
||||
set -e
|
||||
|
||||
# 顏色定義
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${YELLOW}正在啟動 Tool_OCR 後端開發服務器...${NC}"
|
||||
echo ""
|
||||
|
||||
# 檢查虛擬環境
|
||||
if [ ! -d "venv" ]; then
|
||||
echo -e "${RED}錯誤: 未找到虛擬環境${NC}"
|
||||
echo "請先運行: ./setup_dev_env.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 檢查 .env.local
|
||||
if [ ! -f ".env.local" ]; then
|
||||
echo -e "${RED}錯誤: 未找到 .env.local 配置文件${NC}"
|
||||
echo "請確保 .env.local 文件存在"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 啟動虛擬環境
|
||||
echo -e "${GREEN}啟動 Python 虛擬環境...${NC}"
|
||||
source venv/bin/activate
|
||||
|
||||
# 載入環境變量
|
||||
echo -e "${GREEN}載入環境變量...${NC}"
|
||||
export $(cat .env.local | grep -v '^#' | xargs)
|
||||
|
||||
# 進入後端目錄
|
||||
cd backend
|
||||
|
||||
# 檢查必要的目錄
|
||||
echo -e "${GREEN}檢查目錄結構...${NC}"
|
||||
mkdir -p uploads/{temp,processed,images}
|
||||
mkdir -p storage/{markdown,json,exports}
|
||||
mkdir -p models/paddleocr
|
||||
mkdir -p logs
|
||||
|
||||
# 啟動後端服務器
|
||||
echo ""
|
||||
echo -e "${GREEN}================================${NC}"
|
||||
echo -e "${GREEN}後端服務器啟動中...${NC}"
|
||||
echo -e "${GREEN}================================${NC}"
|
||||
echo ""
|
||||
echo "API 文檔: http://localhost:8000/docs"
|
||||
echo "健康檢查: http://localhost:8000/health"
|
||||
echo ""
|
||||
echo "按 Ctrl+C 停止服務器"
|
||||
echo ""
|
||||
|
||||
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
||||
40
start_frontend.sh
Executable file
40
start_frontend.sh
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
# Tool_OCR - 前端開發服務器啟動腳本
|
||||
|
||||
set -e
|
||||
|
||||
# 顏色定義
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo -e "${YELLOW}正在啟動 Tool_OCR 前端開發服務器...${NC}"
|
||||
echo ""
|
||||
|
||||
# 檢查 node_modules
|
||||
if [ ! -d "frontend/node_modules" ]; then
|
||||
echo -e "${RED}錯誤: 未找到 node_modules${NC}"
|
||||
echo "請先運行: ./setup_dev_env.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 載入 nvm
|
||||
export NVM_DIR="$HOME/.nvm"
|
||||
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
||||
|
||||
# 進入前端目錄
|
||||
cd frontend
|
||||
|
||||
# 啟動前端服務器
|
||||
echo ""
|
||||
echo -e "${GREEN}================================${NC}"
|
||||
echo -e "${GREEN}前端服務器啟動中...${NC}"
|
||||
echo -e "${GREEN}================================${NC}"
|
||||
echo ""
|
||||
echo "前端界面: http://localhost:5173"
|
||||
echo ""
|
||||
echo "按 Ctrl+C 停止服務器"
|
||||
echo ""
|
||||
|
||||
npm run dev
|
||||
Reference in New Issue
Block a user