feat: migrate to WSL Ubuntu native development environment
從 Docker/macOS+Conda 部署遷移到 WSL2 Ubuntu 原生開發環境 主要變更: - 移除所有 Docker 相關配置檔案 (Dockerfile, docker-compose.yml, .dockerignore 等) - 移除 macOS/Conda 設置腳本 (SETUP.md, setup_conda.sh) - 新增 WSL Ubuntu 自動化環境設置腳本 (setup_dev_env.sh) - 新增後端/前端快速啟動腳本 (start_backend.sh, start_frontend.sh) - 統一開發端口配置 (backend: 8000, frontend: 5173) - 改進資料庫連接穩定性(連接池、超時設置、重試機制) - 更新專案文檔以反映當前 WSL 開發環境 Technical improvements: - Database connection pooling with health checks and auto-reconnection - Retry logic for long-running OCR tasks to prevent DB timeouts - Extended JWT token expiration to 24 hours - Support for Office documents (pptx, docx) via LibreOffice headless - Comprehensive system dependency installation in single script Environment: - OS: WSL2 Ubuntu 24.04 - Python: 3.12 (venv) - Node.js: 24.x LTS (nvm) - Backend Port: 8000 - Frontend Port: 5173 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -53,7 +53,13 @@
|
|||||||
"Bash(done)",
|
"Bash(done)",
|
||||||
"Bash(git add:*)",
|
"Bash(git add:*)",
|
||||||
"Bash(git commit:*)",
|
"Bash(git commit:*)",
|
||||||
"Bash(git push)"
|
"Bash(git push)",
|
||||||
|
"Bash(docker --version:*)",
|
||||||
|
"Bash(dpkg:*)",
|
||||||
|
"Bash(pip3:*)",
|
||||||
|
"Bash(chmod:*)",
|
||||||
|
"Bash(sudo apt install:*)",
|
||||||
|
"Bash(/usr/bin/soffice:*)"
|
||||||
],
|
],
|
||||||
"deny": [],
|
"deny": [],
|
||||||
"ask": []
|
"ask": []
|
||||||
|
|||||||
@@ -1,87 +0,0 @@
|
|||||||
# Git
|
|
||||||
.git
|
|
||||||
.gitignore
|
|
||||||
.gitattributes
|
|
||||||
|
|
||||||
# Python
|
|
||||||
__pycache__
|
|
||||||
*.py[cod]
|
|
||||||
*$py.class
|
|
||||||
*.so
|
|
||||||
.Python
|
|
||||||
env/
|
|
||||||
venv/
|
|
||||||
ENV/
|
|
||||||
*.egg-info/
|
|
||||||
dist/
|
|
||||||
build/
|
|
||||||
*.egg
|
|
||||||
.pytest_cache/
|
|
||||||
.coverage
|
|
||||||
htmlcov/
|
|
||||||
|
|
||||||
# Node
|
|
||||||
node_modules/
|
|
||||||
npm-debug.log*
|
|
||||||
yarn-debug.log*
|
|
||||||
yarn-error.log*
|
|
||||||
.npm
|
|
||||||
.yarn
|
|
||||||
|
|
||||||
# IDE
|
|
||||||
.vscode/
|
|
||||||
.idea/
|
|
||||||
*.swp
|
|
||||||
*.swo
|
|
||||||
*~
|
|
||||||
.DS_Store
|
|
||||||
|
|
||||||
# Environment files
|
|
||||||
.env
|
|
||||||
.env.local
|
|
||||||
.env.*.local
|
|
||||||
|
|
||||||
# Logs
|
|
||||||
logs/
|
|
||||||
*.log
|
|
||||||
|
|
||||||
# Data directories (will be mounted as volumes)
|
|
||||||
data/
|
|
||||||
uploads/
|
|
||||||
storage/
|
|
||||||
models/
|
|
||||||
|
|
||||||
# Backend specific
|
|
||||||
backend/uploads/
|
|
||||||
backend/storage/
|
|
||||||
backend/models/
|
|
||||||
backend/logs/
|
|
||||||
backend/__pycache__/
|
|
||||||
backend/*.egg-info/
|
|
||||||
|
|
||||||
# Frontend specific
|
|
||||||
frontend/node_modules/
|
|
||||||
frontend/dist/
|
|
||||||
frontend/.vite/
|
|
||||||
frontend/.cache/
|
|
||||||
|
|
||||||
# Documentation (not needed in container)
|
|
||||||
*.md
|
|
||||||
!README.md
|
|
||||||
docs/
|
|
||||||
demo_docs/
|
|
||||||
|
|
||||||
# Claude and OpenSpec
|
|
||||||
.claude/
|
|
||||||
openspec/
|
|
||||||
|
|
||||||
# OS
|
|
||||||
Thumbs.db
|
|
||||||
Desktop.ini
|
|
||||||
|
|
||||||
# Temporary files
|
|
||||||
*.tmp
|
|
||||||
*.bak
|
|
||||||
*.swp
|
|
||||||
temp/
|
|
||||||
tmp/
|
|
||||||
82
.env.docker
82
.env.docker
@@ -1,82 +0,0 @@
|
|||||||
# Tool_OCR - Docker Environment Configuration
|
|
||||||
# Copy this file to .env when deploying with Docker
|
|
||||||
|
|
||||||
# ===== Database Configuration =====
|
|
||||||
MYSQL_HOST=mysql.theaken.com
|
|
||||||
MYSQL_PORT=33306
|
|
||||||
MYSQL_USER=A060
|
|
||||||
MYSQL_PASSWORD=WLeSCi0yhtc7
|
|
||||||
MYSQL_DATABASE=db_A060
|
|
||||||
|
|
||||||
# ===== Application Configuration =====
|
|
||||||
# External port (exposed to host)
|
|
||||||
FRONTEND_PORT=12015
|
|
||||||
|
|
||||||
# Security (IMPORTANT: Change SECRET_KEY in production!)
|
|
||||||
SECRET_KEY=your-secret-key-here-please-change-this-to-random-string
|
|
||||||
ALGORITHM=HS256
|
|
||||||
ACCESS_TOKEN_EXPIRE_MINUTES=1440
|
|
||||||
|
|
||||||
# ===== OCR Configuration =====
|
|
||||||
# PaddleOCR model directory (inside container)
|
|
||||||
PADDLEOCR_MODEL_DIR=/app/backend/models/paddleocr
|
|
||||||
# Supported languages (comma-separated)
|
|
||||||
OCR_LANGUAGES=ch,en,japan,korean
|
|
||||||
# Default confidence threshold
|
|
||||||
OCR_CONFIDENCE_THRESHOLD=0.5
|
|
||||||
# Maximum concurrent OCR workers
|
|
||||||
MAX_OCR_WORKERS=4
|
|
||||||
|
|
||||||
# ===== File Upload Configuration =====
|
|
||||||
# Maximum file size in bytes (50MB default)
|
|
||||||
MAX_UPLOAD_SIZE=52428800
|
|
||||||
# Allowed file extensions (comma-separated)
|
|
||||||
ALLOWED_EXTENSIONS=png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx
|
|
||||||
# Upload directories (inside container)
|
|
||||||
UPLOAD_DIR=/app/backend/uploads
|
|
||||||
TEMP_DIR=/app/backend/uploads/temp
|
|
||||||
PROCESSED_DIR=/app/backend/uploads/processed
|
|
||||||
IMAGES_DIR=/app/backend/uploads/images
|
|
||||||
|
|
||||||
# ===== Export Configuration =====
|
|
||||||
# Storage directories (inside container)
|
|
||||||
STORAGE_DIR=/app/backend/storage
|
|
||||||
MARKDOWN_DIR=/app/backend/storage/markdown
|
|
||||||
JSON_DIR=/app/backend/storage/json
|
|
||||||
EXPORTS_DIR=/app/backend/storage/exports
|
|
||||||
|
|
||||||
# ===== PDF Generation Configuration =====
|
|
||||||
# Pandoc path (inside container)
|
|
||||||
PANDOC_PATH=/usr/bin/pandoc
|
|
||||||
# Font directory (inside container)
|
|
||||||
FONT_DIR=/usr/share/fonts
|
|
||||||
# Default PDF page size
|
|
||||||
PDF_PAGE_SIZE=A4
|
|
||||||
# Default PDF margins (mm)
|
|
||||||
PDF_MARGIN_TOP=20
|
|
||||||
PDF_MARGIN_BOTTOM=20
|
|
||||||
PDF_MARGIN_LEFT=20
|
|
||||||
PDF_MARGIN_RIGHT=20
|
|
||||||
|
|
||||||
# ===== Translation Configuration (Reserved) =====
|
|
||||||
# Enable translation feature (reserved for future)
|
|
||||||
ENABLE_TRANSLATION=false
|
|
||||||
# Translation engine: offline (argostranslate) or api (future)
|
|
||||||
TRANSLATION_ENGINE=offline
|
|
||||||
# Argostranslate models directory (inside container)
|
|
||||||
ARGOSTRANSLATE_MODELS_DIR=/app/backend/models/argostranslate
|
|
||||||
|
|
||||||
# ===== Background Tasks Configuration =====
|
|
||||||
# Task queue type: memory (default) or redis (future)
|
|
||||||
TASK_QUEUE_TYPE=memory
|
|
||||||
# Redis URL (if using redis)
|
|
||||||
# REDIS_URL=redis://localhost:6379/0
|
|
||||||
|
|
||||||
# ===== CORS Configuration =====
|
|
||||||
# Allowed origins (comma-separated, * for all)
|
|
||||||
# For Docker, use the external URL
|
|
||||||
CORS_ORIGINS=http://localhost:12015,http://127.0.0.1:12015
|
|
||||||
|
|
||||||
# ===== Logging Configuration =====
|
|
||||||
LOG_LEVEL=INFO
|
|
||||||
LOG_FILE=/app/backend/logs/app.log
|
|
||||||
@@ -1,524 +0,0 @@
|
|||||||
# Tool_OCR Docker 部署指南
|
|
||||||
|
|
||||||
## 架構說明
|
|
||||||
|
|
||||||
Tool_OCR 使用統一容器架構,將前端和後端封裝在同一個容器中:
|
|
||||||
|
|
||||||
```
|
|
||||||
┌─────────────────────────────────────┐
|
|
||||||
│ Container (tool_ocr) │
|
|
||||||
│ │
|
|
||||||
│ ┌──────────────────────────────┐ │
|
|
||||||
│ │ Nginx :12010 (External) │ │
|
|
||||||
│ │ - Frontend Static Files │ │
|
|
||||||
│ │ - Reverse Proxy for API │ │
|
|
||||||
│ └─────────┬────────────────────┘ │
|
|
||||||
│ │ proxy_pass │
|
|
||||||
│ ▼ │
|
|
||||||
│ ┌──────────────────────────────┐ │
|
|
||||||
│ │ FastAPI :8000 (Internal) │ │
|
|
||||||
│ │ - OCR Processing │ │
|
|
||||||
│ │ - File Management │ │
|
|
||||||
│ │ - Export Services │ │
|
|
||||||
│ └──────────────────────────────┘ │
|
|
||||||
│ │
|
|
||||||
│ Supervisor manages both services │
|
|
||||||
└─────────────────────────────────────┘
|
|
||||||
│
|
|
||||||
│ Port 12010 only!
|
|
||||||
▼
|
|
||||||
External Access
|
|
||||||
```
|
|
||||||
|
|
||||||
### 優勢
|
|
||||||
|
|
||||||
1. **單一端口**: 只需要暴露一個端口 (12010)
|
|
||||||
2. **簡化部署**: 一個容器包含完整應用
|
|
||||||
3. **統一管理**: Supervisor 管理所有服務
|
|
||||||
4. **生產就緒**: Nginx 提供高性能靜態文件服務和反向代理
|
|
||||||
|
|
||||||
## 快速開始
|
|
||||||
|
|
||||||
### 前置要求
|
|
||||||
|
|
||||||
- Docker Engine 20.10+
|
|
||||||
- Docker Compose 2.0+
|
|
||||||
- 至少 4GB 可用內存
|
|
||||||
- 至少 10GB 可用磁碟空間
|
|
||||||
|
|
||||||
### 1. 準備環境配置
|
|
||||||
|
|
||||||
**複製環境配置範本:**
|
|
||||||
|
|
||||||
Linux/Mac:
|
|
||||||
```bash
|
|
||||||
cp .env.docker .env
|
|
||||||
```
|
|
||||||
|
|
||||||
Windows (PowerShell):
|
|
||||||
```powershell
|
|
||||||
Copy-Item .env.docker .env
|
|
||||||
```
|
|
||||||
|
|
||||||
**編輯 `.env` 文件,至少修改以下重要配置:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 修改為安全的密鑰
|
|
||||||
SECRET_KEY=your-very-secure-random-key-here
|
|
||||||
|
|
||||||
# 根據需要調整端口
|
|
||||||
FRONTEND_PORT=12010
|
|
||||||
|
|
||||||
# 根據實際情況配置 CORS
|
|
||||||
CORS_ORIGINS=http://your-domain.com:12010,http://localhost:12010
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. 創建數據目錄
|
|
||||||
|
|
||||||
Linux/Mac:
|
|
||||||
```bash
|
|
||||||
mkdir -p data/{uploads,storage,models,logs}
|
|
||||||
```
|
|
||||||
|
|
||||||
Windows (PowerShell):
|
|
||||||
```powershell
|
|
||||||
mkdir -p data/uploads, data/storage, data/models, data/logs
|
|
||||||
```
|
|
||||||
|
|
||||||
或使用跨平台命令:
|
|
||||||
```bash
|
|
||||||
mkdir -p data/uploads data/storage data/models data/logs
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. 構建並啟動容器
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 構建映像
|
|
||||||
docker compose build
|
|
||||||
|
|
||||||
# 啟動服務
|
|
||||||
docker compose up -d
|
|
||||||
|
|
||||||
# 查看日誌
|
|
||||||
docker compose logs -f
|
|
||||||
```
|
|
||||||
|
|
||||||
> 注意:舊版本 Docker 使用 `docker-compose`(帶連字符),新版本使用 `docker compose`(無連字符)。兩者都支持。
|
|
||||||
|
|
||||||
### 4. 驗證部署
|
|
||||||
|
|
||||||
Linux/Mac:
|
|
||||||
```bash
|
|
||||||
# 檢查健康狀態
|
|
||||||
curl http://localhost:12010/health
|
|
||||||
|
|
||||||
# 訪問 API 文檔
|
|
||||||
open http://localhost:12010/docs
|
|
||||||
|
|
||||||
# 訪問前端界面
|
|
||||||
open http://localhost:12010
|
|
||||||
```
|
|
||||||
|
|
||||||
Windows (PowerShell):
|
|
||||||
```powershell
|
|
||||||
# 檢查健康狀態
|
|
||||||
curl http://localhost:12010/health
|
|
||||||
|
|
||||||
# 在瀏覽器中打開
|
|
||||||
Start-Process "http://localhost:12010"
|
|
||||||
Start-Process "http://localhost:12010/docs"
|
|
||||||
```
|
|
||||||
|
|
||||||
## 管理命令
|
|
||||||
|
|
||||||
> 提示:以下命令在 Windows、Linux 和 Mac 上通用。如果您使用舊版 Docker,將 `docker compose` 替換為 `docker-compose`。
|
|
||||||
|
|
||||||
### 查看狀態
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 查看容器狀態
|
|
||||||
docker compose ps
|
|
||||||
|
|
||||||
# 查看實時日誌
|
|
||||||
docker compose logs -f
|
|
||||||
|
|
||||||
# 查看特定服務日誌
|
|
||||||
docker compose exec tool_ocr tail -f /var/log/nginx/tool_ocr_access.log
|
|
||||||
docker compose exec tool_ocr tail -f /app/backend/logs/app.log
|
|
||||||
```
|
|
||||||
|
|
||||||
### 重啟服務
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 重啟容器
|
|
||||||
docker compose restart
|
|
||||||
|
|
||||||
# 重啟 Nginx (容器內)
|
|
||||||
docker compose exec tool_ocr supervisorctl restart nginx
|
|
||||||
|
|
||||||
# 重啟 Backend (容器內)
|
|
||||||
docker compose exec tool_ocr supervisorctl restart backend
|
|
||||||
```
|
|
||||||
|
|
||||||
### 停止和清理
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 停止服務
|
|
||||||
docker compose stop
|
|
||||||
|
|
||||||
# 停止並移除容器
|
|
||||||
docker compose down
|
|
||||||
|
|
||||||
# 完全清理(包括數據卷)⚠️ 慎用
|
|
||||||
docker compose down -v
|
|
||||||
```
|
|
||||||
|
|
||||||
### 進入容器調試
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 進入容器 shell
|
|
||||||
docker compose exec tool_ocr bash
|
|
||||||
|
|
||||||
# 查看 Supervisor 狀態
|
|
||||||
docker compose exec tool_ocr supervisorctl status
|
|
||||||
|
|
||||||
# 查看進程
|
|
||||||
docker compose exec tool_ocr ps aux
|
|
||||||
```
|
|
||||||
|
|
||||||
## 數據持久化
|
|
||||||
|
|
||||||
以下目錄會持久化到主機的 `./data/` 目錄:
|
|
||||||
|
|
||||||
| 容器內路徑 | 主機路徑 | 說明 |
|
|
||||||
|-----------|---------|------|
|
|
||||||
| `/app/backend/uploads` | `./data/uploads` | 上傳文件 |
|
|
||||||
| `/app/backend/storage` | `./data/storage` | 處理結果 |
|
|
||||||
| `/app/backend/models` | `./data/models` | OCR 模型 |
|
|
||||||
| `/app/backend/logs` | `./data/logs` | 應用日誌 |
|
|
||||||
|
|
||||||
### 備份數據
|
|
||||||
|
|
||||||
Linux/Mac:
|
|
||||||
```bash
|
|
||||||
# 備份所有數據
|
|
||||||
tar -czf tool_ocr_backup_$(date +%Y%m%d).tar.gz data/
|
|
||||||
|
|
||||||
# 只備份重要數據
|
|
||||||
tar -czf tool_ocr_data_$(date +%Y%m%d).tar.gz data/uploads data/storage
|
|
||||||
```
|
|
||||||
|
|
||||||
Windows (PowerShell):
|
|
||||||
```powershell
|
|
||||||
# 備份所有數據(需要安裝 7-Zip 或使用 Compress-Archive)
|
|
||||||
$date = Get-Date -Format "yyyyMMdd"
|
|
||||||
Compress-Archive -Path data -DestinationPath "tool_ocr_backup_$date.zip"
|
|
||||||
|
|
||||||
# 只備份重要數據
|
|
||||||
Compress-Archive -Path data/uploads, data/storage -DestinationPath "tool_ocr_data_$date.zip"
|
|
||||||
```
|
|
||||||
|
|
||||||
### 恢復數據
|
|
||||||
|
|
||||||
Linux/Mac:
|
|
||||||
```bash
|
|
||||||
# 停止容器
|
|
||||||
docker compose stop
|
|
||||||
|
|
||||||
# 恢復數據
|
|
||||||
tar -xzf tool_ocr_backup_20250113.tar.gz
|
|
||||||
|
|
||||||
# 啟動容器
|
|
||||||
docker compose up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
Windows (PowerShell):
|
|
||||||
```powershell
|
|
||||||
# 停止容器
|
|
||||||
docker compose stop
|
|
||||||
|
|
||||||
# 恢復數據
|
|
||||||
Expand-Archive -Path tool_ocr_backup_20250113.zip -DestinationPath . -Force
|
|
||||||
|
|
||||||
# 啟動容器
|
|
||||||
docker compose up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
## 1Panel 部署指南
|
|
||||||
|
|
||||||
### 1. 準備項目文件
|
|
||||||
|
|
||||||
在 1Panel 的應用目錄中創建項目:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd /opt/1panel/apps
|
|
||||||
mkdir -p tool_ocr
|
|
||||||
cd tool_ocr
|
|
||||||
|
|
||||||
# 上傳項目文件
|
|
||||||
# - Dockerfile
|
|
||||||
# - docker-compose.yml
|
|
||||||
# - docker/ 目錄
|
|
||||||
# - backend/ 目錄
|
|
||||||
# - frontend/ 目錄
|
|
||||||
# - requirements.txt
|
|
||||||
# - .env
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. 在 1Panel 中創建應用
|
|
||||||
|
|
||||||
1. 登入 1Panel 管理面板
|
|
||||||
2. 進入「應用商店」→「自定義應用」
|
|
||||||
3. 選擇「Docker Compose」
|
|
||||||
4. 上傳或粘貼 `docker-compose.yml` 內容
|
|
||||||
5. 配置環境變量
|
|
||||||
6. 點擊「創建」
|
|
||||||
|
|
||||||
### 3. 配置反向代理(可選)
|
|
||||||
|
|
||||||
如果需要通過域名訪問:
|
|
||||||
|
|
||||||
1. 在 1Panel 中創建網站
|
|
||||||
2. 配置反向代理:
|
|
||||||
- 目標地址: `http://127.0.0.1:12010`
|
|
||||||
- 啟用 WebSocket 支援(如需要)
|
|
||||||
|
|
||||||
### 4. 配置 SSL 證書(可選)
|
|
||||||
|
|
||||||
1. 在 1Panel 網站設置中
|
|
||||||
2. 申請或上傳 SSL 證書
|
|
||||||
3. 啟用 HTTPS
|
|
||||||
|
|
||||||
## 更新部署
|
|
||||||
|
|
||||||
### 更新代碼
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 停止容器
|
|
||||||
docker compose stop
|
|
||||||
|
|
||||||
# 拉取最新代碼
|
|
||||||
git pull
|
|
||||||
|
|
||||||
# 重新構建映像
|
|
||||||
docker compose build --no-cache
|
|
||||||
|
|
||||||
# 啟動容器
|
|
||||||
docker compose up -d
|
|
||||||
|
|
||||||
# 查看日誌確認啟動成功
|
|
||||||
docker compose logs -f
|
|
||||||
```
|
|
||||||
|
|
||||||
### 數據庫遷移
|
|
||||||
|
|
||||||
如果有數據庫結構變更:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 進入容器
|
|
||||||
docker compose exec tool_ocr bash
|
|
||||||
|
|
||||||
# 運行遷移
|
|
||||||
cd /app/backend
|
|
||||||
alembic upgrade head
|
|
||||||
|
|
||||||
# 退出容器
|
|
||||||
exit
|
|
||||||
```
|
|
||||||
|
|
||||||
## 故障排除
|
|
||||||
|
|
||||||
### 1. 容器無法啟動
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 查看詳細錯誤
|
|
||||||
docker compose logs
|
|
||||||
```
|
|
||||||
|
|
||||||
檢查端口占用:
|
|
||||||
|
|
||||||
Linux/Mac:
|
|
||||||
```bash
|
|
||||||
netstat -tuln | grep 12010
|
|
||||||
# 或
|
|
||||||
lsof -i :12010
|
|
||||||
```
|
|
||||||
|
|
||||||
Windows (PowerShell):
|
|
||||||
```powershell
|
|
||||||
netstat -ano | findstr 12010
|
|
||||||
# 或
|
|
||||||
Get-NetTCPConnection -LocalPort 12010
|
|
||||||
```
|
|
||||||
|
|
||||||
檢查磁碟空間:
|
|
||||||
|
|
||||||
Linux/Mac:
|
|
||||||
```bash
|
|
||||||
df -h
|
|
||||||
```
|
|
||||||
|
|
||||||
Windows (PowerShell):
|
|
||||||
```powershell
|
|
||||||
Get-PSDrive
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Nginx 無法啟動
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 檢查 Nginx 配置語法
|
|
||||||
docker compose exec tool_ocr nginx -t
|
|
||||||
|
|
||||||
# 查看 Nginx 錯誤日誌
|
|
||||||
docker compose exec tool_ocr cat /var/log/nginx/error.log
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Backend API 無法訪問
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 檢查 Backend 是否運行
|
|
||||||
docker compose exec tool_ocr supervisorctl status backend
|
|
||||||
|
|
||||||
# 查看 Backend 日誌
|
|
||||||
docker compose exec tool_ocr cat /app/backend/logs/app.log
|
|
||||||
|
|
||||||
# 重啟 Backend
|
|
||||||
docker compose exec tool_ocr supervisorctl restart backend
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. 數據庫連接失敗
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 測試數據庫連接
|
|
||||||
docker compose exec tool_ocr python -c "
|
|
||||||
from app.core.database import engine
|
|
||||||
try:
|
|
||||||
with engine.connect() as conn:
|
|
||||||
print('Database connection successful!')
|
|
||||||
except Exception as e:
|
|
||||||
print(f'Database connection failed: {e}')
|
|
||||||
"
|
|
||||||
```
|
|
||||||
|
|
||||||
### 5. OCR 處理失敗
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 檢查 PaddleOCR 模型
|
|
||||||
docker compose exec tool_ocr ls -la /app/backend/models/paddleocr/
|
|
||||||
|
|
||||||
# 測試 OCR 功能
|
|
||||||
docker compose exec tool_ocr python -c "
|
|
||||||
from paddleocr import PaddleOCR
|
|
||||||
ocr = PaddleOCR(lang='ch')
|
|
||||||
print('PaddleOCR initialized successfully!')
|
|
||||||
"
|
|
||||||
```
|
|
||||||
|
|
||||||
### 6. 前端頁面無法訪問
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 檢查前端文件是否存在
|
|
||||||
docker compose exec tool_ocr ls -la /app/frontend/dist/
|
|
||||||
|
|
||||||
# 檢查 Nginx 配置
|
|
||||||
docker compose exec tool_ocr cat /etc/nginx/conf.d/default.conf
|
|
||||||
```
|
|
||||||
|
|
||||||
## 性能優化
|
|
||||||
|
|
||||||
### 1. 調整 OCR 工作進程數
|
|
||||||
|
|
||||||
根據 CPU 核心數調整:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 在 .env 中設置
|
|
||||||
MAX_OCR_WORKERS=8 # 建議設置為 CPU 核心數
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. 調整 Nginx Worker 進程數
|
|
||||||
|
|
||||||
編輯 `docker/nginx.conf`:
|
|
||||||
|
|
||||||
```nginx
|
|
||||||
worker_processes auto; # 自動根據 CPU 核心數
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. 優化 Upload 大小限制
|
|
||||||
|
|
||||||
根據實際需求調整:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 在 .env 中設置(以字節為單位)
|
|
||||||
MAX_UPLOAD_SIZE=104857600 # 100MB
|
|
||||||
```
|
|
||||||
|
|
||||||
同時修改 `docker/nginx.conf`:
|
|
||||||
|
|
||||||
```nginx
|
|
||||||
client_max_body_size 100M;
|
|
||||||
```
|
|
||||||
|
|
||||||
## 監控和日誌
|
|
||||||
|
|
||||||
### 日誌位置
|
|
||||||
|
|
||||||
| 服務 | 容器內路徑 | 主機路徑 |
|
|
||||||
|------|-----------|---------|
|
|
||||||
| Nginx Access | `/var/log/nginx/tool_ocr_access.log` | - |
|
|
||||||
| Nginx Error | `/var/log/nginx/tool_ocr_error.log` | - |
|
|
||||||
| Backend | `/app/backend/logs/app.log` | `./data/logs/app.log` |
|
|
||||||
| Supervisor | `/var/log/supervisor/supervisord.log` | - |
|
|
||||||
|
|
||||||
### 日誌輪轉
|
|
||||||
|
|
||||||
建議配置日誌輪轉以防止日誌文件過大:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 創建 logrotate 配置(主機上)
|
|
||||||
cat > /etc/logrotate.d/tool_ocr << 'EOF'
|
|
||||||
/path/to/tool_ocr/data/logs/*.log {
|
|
||||||
daily
|
|
||||||
rotate 7
|
|
||||||
compress
|
|
||||||
delaycompress
|
|
||||||
notifempty
|
|
||||||
create 0644 root root
|
|
||||||
}
|
|
||||||
EOF
|
|
||||||
```
|
|
||||||
|
|
||||||
## 安全建議
|
|
||||||
|
|
||||||
1. **修改默認密鑰**: 務必修改 `.env` 中的 `SECRET_KEY`
|
|
||||||
2. **使用 HTTPS**: 在生產環境中啟用 SSL/TLS
|
|
||||||
3. **限制 CORS**: 只允許可信的來源
|
|
||||||
4. **定期更新**: 及時更新 Docker 映像和依賴
|
|
||||||
5. **備份數據**: 定期備份重要數據
|
|
||||||
6. **監控日誌**: 定期檢查日誌中的異常活動
|
|
||||||
|
|
||||||
## 常見問題
|
|
||||||
|
|
||||||
### Q: 如何修改對外端口?
|
|
||||||
|
|
||||||
A: 修改 `.env` 中的 `FRONTEND_PORT` 和 `docker-compose.yml` 中的端口映射。
|
|
||||||
|
|
||||||
### Q: 如何增加上傳文件大小限制?
|
|
||||||
|
|
||||||
A: 修改 `.env` 中的 `MAX_UPLOAD_SIZE` 和 `docker/nginx.conf` 中的 `client_max_body_size`。
|
|
||||||
|
|
||||||
### Q: 如何連接外部 MySQL 數據庫?
|
|
||||||
|
|
||||||
A: 在 `.env` 中配置正確的數據庫連接信息。
|
|
||||||
|
|
||||||
### Q: 如何查看詳細的錯誤信息?
|
|
||||||
|
|
||||||
A: 設置 `.env` 中的 `LOG_LEVEL=DEBUG` 並重啟容器。
|
|
||||||
|
|
||||||
## 聯繫支援
|
|
||||||
|
|
||||||
如果遇到問題,請:
|
|
||||||
|
|
||||||
1. 查看日誌: `docker-compose logs -f`
|
|
||||||
2. 檢查配置: 確認 `.env` 文件正確
|
|
||||||
3. 查看文檔: 參考本文檔的故障排除部分
|
|
||||||
4. 提交 Issue: 在項目倉庫提交問題報告
|
|
||||||
131
Dockerfile
131
Dockerfile
@@ -1,131 +0,0 @@
|
|||||||
# ============================================
|
|
||||||
# Tool_OCR - Unified Docker Image
|
|
||||||
# Frontend (React + Vite) + Backend (FastAPI)
|
|
||||||
# Served by Nginx with reverse proxy
|
|
||||||
# ============================================
|
|
||||||
|
|
||||||
# ============================================
|
|
||||||
# Stage 1: Build Frontend
|
|
||||||
# ============================================
|
|
||||||
FROM node:20-alpine AS frontend-builder
|
|
||||||
|
|
||||||
WORKDIR /app/frontend
|
|
||||||
|
|
||||||
# Copy package files
|
|
||||||
COPY frontend/package*.json ./
|
|
||||||
|
|
||||||
# Install all dependencies (including devDependencies for build)
|
|
||||||
RUN npm ci
|
|
||||||
|
|
||||||
# Copy frontend source
|
|
||||||
COPY frontend/ ./
|
|
||||||
|
|
||||||
# Create production environment file
|
|
||||||
RUN echo "VITE_API_BASE_URL=" > .env.production
|
|
||||||
|
|
||||||
# Build frontend for production
|
|
||||||
RUN npm run build
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================
|
|
||||||
# Stage 2: Build Backend + Final Image
|
|
||||||
# ============================================
|
|
||||||
FROM python:3.10-slim-bookworm
|
|
||||||
|
|
||||||
# Set working directory
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
# Set environment variables
|
|
||||||
ENV PYTHONUNBUFFERED=1 \
|
|
||||||
PYTHONDONTWRITEBYTECODE=1 \
|
|
||||||
PIP_NO_CACHE_DIR=1 \
|
|
||||||
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
|
||||||
DEBIAN_FRONTEND=noninteractive
|
|
||||||
|
|
||||||
# Install system dependencies
|
|
||||||
# - nginx: web server and reverse proxy
|
|
||||||
# - supervisor: process manager for nginx + uvicorn
|
|
||||||
# - curl: for health checks
|
|
||||||
# - pandoc: for markdown to PDF conversion
|
|
||||||
# - poppler-utils: for pdf2image (PDF processing)
|
|
||||||
# - libpango-1.0-0, libpangocairo-1.0-0: for WeasyPrint
|
|
||||||
# - libgdk-pixbuf2.0-0: for WeasyPrint image handling
|
|
||||||
# - libffi-dev: for cryptography
|
|
||||||
# - fonts-noto-cjk: Chinese/Japanese/Korean font support
|
|
||||||
# - libgomp1, libgl1-mesa-glx, libglib2.0-0: for OpenCV and PaddleOCR
|
|
||||||
# - libmagic1: for python-magic file type detection
|
|
||||||
# - libreoffice-writer, libreoffice-impress: for Office document conversion (doc/docx/ppt/pptx)
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
nginx \
|
|
||||||
supervisor \
|
|
||||||
curl \
|
|
||||||
pandoc \
|
|
||||||
poppler-utils \
|
|
||||||
libpango-1.0-0 \
|
|
||||||
libpangocairo-1.0-0 \
|
|
||||||
libgdk-pixbuf2.0-0 \
|
|
||||||
libffi-dev \
|
|
||||||
fonts-noto-cjk \
|
|
||||||
fonts-noto-cjk-extra \
|
|
||||||
libgomp1 \
|
|
||||||
libgl1-mesa-glx \
|
|
||||||
libglib2.0-0 \
|
|
||||||
libmagic1 \
|
|
||||||
libreoffice-writer \
|
|
||||||
libreoffice-impress \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Copy Python requirements
|
|
||||||
COPY requirements.txt .
|
|
||||||
|
|
||||||
# Install Python dependencies with extended timeout
|
|
||||||
# PaddlePaddle is 189MB and may take time to download
|
|
||||||
# Timeout: 600 seconds (10 minutes), Retries: 5
|
|
||||||
RUN pip install --timeout 600 --retries 5 -r requirements.txt
|
|
||||||
|
|
||||||
# Copy backend application
|
|
||||||
COPY backend/ ./backend/
|
|
||||||
|
|
||||||
# Copy frontend build from frontend-builder stage
|
|
||||||
COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist
|
|
||||||
|
|
||||||
# Copy Nginx configuration
|
|
||||||
COPY docker/nginx.conf /etc/nginx/nginx.conf
|
|
||||||
COPY docker/default.conf /etc/nginx/conf.d/default.conf
|
|
||||||
|
|
||||||
# Copy supervisor configuration
|
|
||||||
COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
|
||||||
|
|
||||||
# Copy startup script and fix line endings (Windows CRLF -> Linux LF)
|
|
||||||
COPY docker/entrypoint.sh /entrypoint.sh
|
|
||||||
RUN sed -i 's/\r$//' /entrypoint.sh && chmod +x /entrypoint.sh
|
|
||||||
|
|
||||||
# Create necessary directories with proper permissions
|
|
||||||
RUN mkdir -p \
|
|
||||||
/app/backend/uploads/temp \
|
|
||||||
/app/backend/uploads/processed \
|
|
||||||
/app/backend/uploads/images \
|
|
||||||
/app/backend/storage/markdown \
|
|
||||||
/app/backend/storage/json \
|
|
||||||
/app/backend/storage/exports \
|
|
||||||
/app/backend/models/paddleocr \
|
|
||||||
/app/backend/logs \
|
|
||||||
/var/log/supervisor \
|
|
||||||
/var/log/nginx \
|
|
||||||
/var/cache/nginx \
|
|
||||||
/var/run \
|
|
||||||
&& chmod -R 755 /app \
|
|
||||||
&& chown -R www-data:www-data /var/log/nginx /var/cache/nginx
|
|
||||||
|
|
||||||
# Expose port (only one port needed!)
|
|
||||||
EXPOSE 12015
|
|
||||||
|
|
||||||
# Health check
|
|
||||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
|
||||||
CMD curl -f http://localhost:12015/health || exit 1
|
|
||||||
|
|
||||||
# Set working directory to backend for Python app
|
|
||||||
WORKDIR /app/backend
|
|
||||||
|
|
||||||
# Use entrypoint script to start supervisor
|
|
||||||
ENTRYPOINT ["/entrypoint.sh"]
|
|
||||||
272
README.md
272
README.md
@@ -11,6 +11,7 @@ A web-based solution to extract text, images, and document structure from multip
|
|||||||
- 🖼️ **Image Extraction**: Preserve document images alongside text content
|
- 🖼️ **Image Extraction**: Preserve document images alongside text content
|
||||||
- 📑 **Batch Processing**: Process multiple files concurrently with progress tracking
|
- 📑 **Batch Processing**: Process multiple files concurrently with progress tracking
|
||||||
- 📤 **Multiple Export Formats**: TXT, JSON, Excel, Markdown with images, searchable PDF
|
- 📤 **Multiple Export Formats**: TXT, JSON, Excel, Markdown with images, searchable PDF
|
||||||
|
- 📋 **Office Documents**: DOC, DOCX, PPT, PPTX support via LibreOffice conversion
|
||||||
- 🔧 **Flexible Configuration**: Rule-based output formatting
|
- 🔧 **Flexible Configuration**: Rule-based output formatting
|
||||||
- 🌐 **Translation Ready**: Reserved architecture for future translation features
|
- 🌐 **Translation Ready**: Reserved architecture for future translation features
|
||||||
|
|
||||||
@@ -22,173 +23,176 @@ A web-based solution to extract text, images, and document structure from multip
|
|||||||
- **Database**: MySQL via SQLAlchemy
|
- **Database**: MySQL via SQLAlchemy
|
||||||
- **PDF Generation**: Pandoc + WeasyPrint
|
- **PDF Generation**: Pandoc + WeasyPrint
|
||||||
- **Image Processing**: OpenCV, Pillow, pdf2image
|
- **Image Processing**: OpenCV, Pillow, pdf2image
|
||||||
|
- **Office Conversion**: LibreOffice (headless mode)
|
||||||
|
|
||||||
### Frontend
|
### Frontend
|
||||||
- **Framework**: React 18 with Vite
|
- **Framework**: React 19 with TypeScript
|
||||||
- **Styling**: TailwindCSS + shadcn/ui
|
- **Build Tool**: Vite 7
|
||||||
- **HTTP Client**: Axios with React Query
|
- **Styling**: Tailwind CSS v4 + shadcn/ui
|
||||||
|
- **State Management**: React Query + Zustand
|
||||||
|
- **HTTP Client**: Axios
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
- **macOS**: Apple Silicon (M1/M2/M3) or Intel
|
- **OS**: WSL2 Ubuntu 24.04
|
||||||
- **Python**: 3.10+
|
- **Python**: 3.12+
|
||||||
- **Conda**: Miniconda or Anaconda (will be installed automatically)
|
- **Node.js**: 24.x LTS
|
||||||
- **Homebrew**: For system dependencies
|
|
||||||
- **MySQL**: External database server (provided)
|
- **MySQL**: External database server (provided)
|
||||||
|
|
||||||
## Installation
|
## Quick Start
|
||||||
|
|
||||||
### 1. Automated Setup (Recommended)
|
### 1. Automated Setup (Recommended)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Clone the repository
|
|
||||||
cd /Users/egg/Projects/Tool_OCR
|
|
||||||
|
|
||||||
# Run automated setup script
|
# Run automated setup script
|
||||||
chmod +x setup_conda.sh
|
./setup_dev_env.sh
|
||||||
./setup_conda.sh
|
|
||||||
|
|
||||||
# If Conda was just installed, reload your shell
|
|
||||||
source ~/.zshrc # or source ~/.bash_profile
|
|
||||||
|
|
||||||
# Run the script again to create environment
|
|
||||||
./setup_conda.sh
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Install Dependencies
|
This script automatically installs:
|
||||||
|
- Python development tools (pip, venv, build-essential)
|
||||||
|
- System dependencies (pandoc, LibreOffice, fonts, etc.)
|
||||||
|
- Node.js (via nvm)
|
||||||
|
- Python packages
|
||||||
|
- Frontend dependencies
|
||||||
|
|
||||||
|
### 2. Initialize Database
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Activate Conda environment
|
source venv/bin/activate
|
||||||
conda activate tool_ocr
|
|
||||||
|
|
||||||
# Install Python dependencies
|
|
||||||
pip install -r requirements.txt
|
|
||||||
|
|
||||||
# Install system dependencies (Pandoc for PDF generation)
|
|
||||||
brew install pandoc
|
|
||||||
|
|
||||||
# Install Chinese fonts for PDF generation (optional)
|
|
||||||
brew install --cask font-noto-sans-cjk
|
|
||||||
# Note: macOS built-in fonts work fine, this is optional
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Download PaddleOCR Models
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Create models directory
|
|
||||||
mkdir -p models/paddleocr
|
|
||||||
|
|
||||||
# Models will be automatically downloaded on first run
|
|
||||||
# (~900MB total, includes PaddleOCR-VL 0.9B model)
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. Configure Environment
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Copy environment template
|
|
||||||
cp .env.example .env
|
|
||||||
|
|
||||||
# Edit .env with your settings
|
|
||||||
# Database credentials are pre-configured
|
|
||||||
nano .env
|
|
||||||
```
|
|
||||||
|
|
||||||
### 5. Initialize Database
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Database schema will be created automatically on first run
|
|
||||||
# Using: mysql.theaken.com:33306/db_A060
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### Start Backend Server
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Activate environment
|
|
||||||
conda activate tool_ocr
|
|
||||||
|
|
||||||
# Start FastAPI server
|
|
||||||
cd backend
|
cd backend
|
||||||
python -m app.main
|
alembic upgrade head
|
||||||
|
python create_test_user.py
|
||||||
# Server runs at: http://localhost:12010
|
cd ..
|
||||||
# API docs: http://localhost:12010/docs
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Start Frontend (Coming Soon)
|
Default test user:
|
||||||
|
- Username: `admin`
|
||||||
|
- Password: `admin123`
|
||||||
|
|
||||||
|
### 3. Start Development Servers
|
||||||
|
|
||||||
|
**Backend (Terminal 1):**
|
||||||
```bash
|
```bash
|
||||||
# Install frontend dependencies
|
./start_backend.sh
|
||||||
cd frontend
|
|
||||||
npm install
|
|
||||||
|
|
||||||
# Start development server
|
|
||||||
npm run dev
|
|
||||||
|
|
||||||
# Frontend runs at: http://localhost:12011
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Frontend (Terminal 2):**
|
||||||
|
```bash
|
||||||
|
./start_frontend.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Access Application
|
||||||
|
|
||||||
|
- **Frontend**: http://localhost:5173
|
||||||
|
- **API Docs**: http://localhost:8000/docs
|
||||||
|
- **Health Check**: http://localhost:8000/health
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
Tool_OCR/
|
Tool_OCR/
|
||||||
├── backend/
|
├── backend/ # FastAPI backend
|
||||||
│ ├── app/
|
│ ├── app/
|
||||||
│ │ ├── api/v1/ # API endpoints
|
│ │ ├── api/v1/ # API endpoints
|
||||||
│ │ ├── core/ # Configuration, database
|
│ │ ├── core/ # Configuration, database
|
||||||
│ │ ├── models/ # Database models
|
│ │ ├── models/ # Database models
|
||||||
│ │ ├── services/ # Business logic
|
│ │ ├── services/ # Business logic
|
||||||
│ │ ├── utils/ # Utilities
|
|
||||||
│ │ └── main.py # Application entry point
|
│ │ └── main.py # Application entry point
|
||||||
|
│ ├── alembic/ # Database migrations
|
||||||
│ └── tests/ # Test suite
|
│ └── tests/ # Test suite
|
||||||
├── frontend/
|
├── frontend/ # React frontend
|
||||||
│ └── src/ # React application
|
│ ├── src/
|
||||||
├── uploads/
|
│ │ ├── components/ # UI components
|
||||||
│ ├── temp/ # Temporary uploads
|
│ │ ├── pages/ # Page components
|
||||||
│ ├── processed/ # Processed files
|
│ │ ├── services/ # API services
|
||||||
│ └── images/ # Extracted images
|
│ │ └── stores/ # State management
|
||||||
├── storage/
|
│ └── public/ # Static assets
|
||||||
│ ├── markdown/ # Markdown outputs
|
├── .env.local # Local development config
|
||||||
│ ├── json/ # JSON results
|
├── setup_dev_env.sh # Environment setup script
|
||||||
│ └── exports/ # Export files
|
├── start_backend.sh # Backend startup script
|
||||||
├── models/
|
└── start_frontend.sh # Frontend startup script
|
||||||
│ └── paddleocr/ # PaddleOCR models
|
|
||||||
├── config/ # Configuration files
|
|
||||||
├── templates/ # PDF templates
|
|
||||||
├── logs/ # Application logs
|
|
||||||
├── requirements.txt # Python dependencies
|
|
||||||
├── setup_conda.sh # Environment setup script
|
|
||||||
├── .env.example # Environment template
|
|
||||||
└── README.md
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## API Endpoints (Planned)
|
## Configuration
|
||||||
|
|
||||||
- `POST /api/v1/ocr/upload` - Upload files for OCR processing
|
Main config file: `.env.local`
|
||||||
- `GET /api/v1/ocr/tasks` - List all OCR tasks
|
|
||||||
- `GET /api/v1/ocr/tasks/{task_id}` - Get task details
|
```bash
|
||||||
- `POST /api/v1/ocr/batch` - Create batch processing task
|
# Database
|
||||||
- `GET /api/v1/export/{task_id}` - Export results (TXT/JSON/Excel/MD/PDF)
|
MYSQL_HOST=mysql.theaken.com
|
||||||
- `POST /api/v1/translate/document` - Translate document (reserved, returns 501)
|
MYSQL_PORT=33306
|
||||||
|
|
||||||
|
# Application ports
|
||||||
|
BACKEND_PORT=8000
|
||||||
|
FRONTEND_PORT=5173
|
||||||
|
|
||||||
|
# Token expiration (minutes)
|
||||||
|
ACCESS_TOKEN_EXPIRE_MINUTES=1440 # 24 hours
|
||||||
|
|
||||||
|
# Supported file formats
|
||||||
|
ALLOWED_EXTENSIONS=png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx
|
||||||
|
|
||||||
|
# OCR settings
|
||||||
|
OCR_LANGUAGES=ch,en,japan,korean
|
||||||
|
MAX_OCR_WORKERS=4
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Authentication
|
||||||
|
- `POST /api/v1/auth/login` - User login
|
||||||
|
|
||||||
|
### File Management
|
||||||
|
- `POST /api/v1/upload` - Upload files
|
||||||
|
- `POST /api/v1/ocr/process` - Start OCR processing
|
||||||
|
- `GET /api/v1/batch/{id}/status` - Get batch status
|
||||||
|
|
||||||
|
### Results & Export
|
||||||
|
- `GET /api/v1/ocr/result/{id}` - Get OCR result
|
||||||
|
- `GET /api/v1/export/pdf/{id}` - Export as PDF
|
||||||
|
|
||||||
|
Full API documentation: http://localhost:8000/docs
|
||||||
|
|
||||||
|
## Supported File Formats
|
||||||
|
|
||||||
|
- **Images**: PNG, JPG, JPEG, BMP, TIFF
|
||||||
|
- **Documents**: PDF
|
||||||
|
- **Office**: DOC, DOCX, PPT, PPTX
|
||||||
|
|
||||||
|
Office files are automatically converted to PDF before OCR processing.
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
### Run Tests
|
### Backend
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
source venv/bin/activate
|
||||||
cd backend
|
cd backend
|
||||||
pytest tests/ -v --cov=app
|
|
||||||
|
# Run tests
|
||||||
|
pytest
|
||||||
|
|
||||||
|
# Database migration
|
||||||
|
alembic revision --autogenerate -m "description"
|
||||||
|
alembic upgrade head
|
||||||
|
|
||||||
|
# Code formatting
|
||||||
|
black app/
|
||||||
```
|
```
|
||||||
|
|
||||||
### Code Quality
|
### Frontend
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Format code
|
cd frontend
|
||||||
black app/
|
|
||||||
|
# Development server
|
||||||
|
npm run dev
|
||||||
|
|
||||||
|
# Build for production
|
||||||
|
npm run build
|
||||||
|
|
||||||
# Lint code
|
# Lint code
|
||||||
pylint app/
|
npm run lint
|
||||||
```
|
```
|
||||||
|
|
||||||
## OpenSpec Workflow
|
## OpenSpec Workflow
|
||||||
@@ -208,26 +212,26 @@ cat openspec/changes/add-ocr-batch-processing/tasks.md
|
|||||||
|
|
||||||
## Roadmap
|
## Roadmap
|
||||||
|
|
||||||
- [x] **Phase 0**: Environment setup and configuration
|
- [x] **Phase 0**: Environment setup
|
||||||
- [ ] **Phase 1**: Core OCR with structure extraction
|
- [x] **Phase 1**: Core OCR backend (~98% complete)
|
||||||
- [ ] **Phase 2**: Frontend development
|
- [x] **Phase 2**: Frontend development (~92% complete)
|
||||||
- [ ] **Phase 3**: Testing & optimization
|
- [ ] **Phase 3**: Testing & optimization
|
||||||
- [ ] **Phase 4**: Deployment
|
- [ ] **Phase 4**: Deployment automation
|
||||||
- [ ] **Phase 5**: Translation feature (future)
|
- [ ] **Phase 5**: Translation feature (future)
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
- Development specs: [openspec/project.md](openspec/project.md)
|
||||||
|
- Implementation status: [openspec/changes/add-ocr-batch-processing/STATUS.md](openspec/changes/add-ocr-batch-processing/STATUS.md)
|
||||||
|
- Agent instructions: [openspec/AGENTS.md](openspec/AGENTS.md)
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
[To be determined]
|
Internal project use
|
||||||
|
|
||||||
## Contributors
|
## Notes
|
||||||
|
|
||||||
- Development environment: macOS Apple Silicon
|
- First OCR run will download PaddleOCR models (~900MB)
|
||||||
- Database: MySQL external server
|
- Token expiration is set to 24 hours by default
|
||||||
- OCR Engine: PaddleOCR-VL 0.9B with PP-StructureV3
|
- Office conversion requires LibreOffice (installed via setup script)
|
||||||
|
- Development environment: WSL2 Ubuntu 24.04 with Python venv
|
||||||
## Support
|
|
||||||
|
|
||||||
For issues and questions, refer to:
|
|
||||||
- OpenSpec documentation: `openspec/AGENTS.md`
|
|
||||||
- Task breakdown: `openspec/changes/add-ocr-batch-processing/tasks.md`
|
|
||||||
- Specifications: `openspec/changes/add-ocr-batch-processing/specs/`
|
|
||||||
|
|||||||
395
SETUP.md
395
SETUP.md
@@ -1,395 +0,0 @@
|
|||||||
# Tool_OCR Setup Guide
|
|
||||||
|
|
||||||
Complete setup instructions for macOS environment.
|
|
||||||
|
|
||||||
## Prerequisites Check
|
|
||||||
|
|
||||||
Before starting, verify you have:
|
|
||||||
- ✅ macOS (Apple Silicon or Intel)
|
|
||||||
- ✅ Terminal access (zsh or bash)
|
|
||||||
- ✅ Internet connection for downloads
|
|
||||||
|
|
||||||
## Step-by-Step Setup
|
|
||||||
|
|
||||||
### Step 1: Install Conda Environment
|
|
||||||
|
|
||||||
Run the automated setup script:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
chmod +x setup_conda.sh
|
|
||||||
./setup_conda.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
**Expected output:**
|
|
||||||
- If Conda not installed: Downloads and installs Miniconda for Apple Silicon
|
|
||||||
- If Conda already installed: Creates `tool_ocr` environment with Python 3.10
|
|
||||||
|
|
||||||
**If Conda was just installed:**
|
|
||||||
```bash
|
|
||||||
# Reload your shell to activate Conda
|
|
||||||
source ~/.zshrc # if using zsh (default on macOS)
|
|
||||||
source ~/.bashrc # if using bash
|
|
||||||
|
|
||||||
# Run setup script again to create environment
|
|
||||||
./setup_conda.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 2: Activate Environment
|
|
||||||
|
|
||||||
```bash
|
|
||||||
conda activate tool_ocr
|
|
||||||
```
|
|
||||||
|
|
||||||
You should see `(tool_ocr)` prefix in your terminal prompt.
|
|
||||||
|
|
||||||
### Step 3: Install Python Dependencies
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
**This will install:**
|
|
||||||
- FastAPI and Uvicorn (web framework)
|
|
||||||
- PaddleOCR and PaddlePaddle (OCR engine)
|
|
||||||
- Image processing libraries (Pillow, OpenCV, pdf2image)
|
|
||||||
- PDF generation tools (WeasyPrint, Markdown)
|
|
||||||
- Database tools (SQLAlchemy, PyMySQL, Alembic)
|
|
||||||
- Authentication libraries (python-jose, passlib)
|
|
||||||
- Testing tools (pytest, pytest-asyncio)
|
|
||||||
|
|
||||||
**Installation time:** ~5-10 minutes depending on your internet speed
|
|
||||||
|
|
||||||
### Step 4: Install System Dependencies
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Install libmagic (required for python-magic file type detection)
|
|
||||||
brew install libmagic
|
|
||||||
|
|
||||||
# Install WeasyPrint dependencies (required for PDF generation)
|
|
||||||
brew install pango gdk-pixbuf libffi
|
|
||||||
|
|
||||||
# Install Pandoc (optional - for enhanced PDF generation)
|
|
||||||
brew install pandoc
|
|
||||||
|
|
||||||
# Install Chinese fonts for PDF output (optional - macOS has built-in Chinese fonts)
|
|
||||||
brew install --cask font-noto-sans-cjk
|
|
||||||
# Note: If above fails, skip it - macOS built-in fonts (PingFang SC, Heiti TC) work fine
|
|
||||||
```
|
|
||||||
|
|
||||||
**If Homebrew not installed:**
|
|
||||||
```bash
|
|
||||||
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 5: Configure Environment Variables
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Copy template
|
|
||||||
cp .env.example .env
|
|
||||||
|
|
||||||
# Edit with your preferred editor
|
|
||||||
nano .env
|
|
||||||
# or
|
|
||||||
code .env
|
|
||||||
```
|
|
||||||
|
|
||||||
**Important settings to verify in `.env`:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Database (pre-configured, should work as-is)
|
|
||||||
MYSQL_HOST=mysql.theaken.com
|
|
||||||
MYSQL_PORT=33306
|
|
||||||
MYSQL_USER=A060
|
|
||||||
MYSQL_PASSWORD=WLeSCi0yhtc7
|
|
||||||
MYSQL_DATABASE=db_A060
|
|
||||||
|
|
||||||
# Application ports
|
|
||||||
BACKEND_PORT=12010
|
|
||||||
FRONTEND_PORT=12011
|
|
||||||
|
|
||||||
# Security (CHANGE THIS!)
|
|
||||||
SECRET_KEY=your-secret-key-here-please-change-this-to-random-string
|
|
||||||
```
|
|
||||||
|
|
||||||
**Generate a secure SECRET_KEY:**
|
|
||||||
```bash
|
|
||||||
python -c "import secrets; print(secrets.token_urlsafe(32))"
|
|
||||||
```
|
|
||||||
|
|
||||||
Copy the output and paste it as your `SECRET_KEY` value.
|
|
||||||
|
|
||||||
### Step 6: Set Environment Variable for WeasyPrint
|
|
||||||
|
|
||||||
Add to your shell config (`~/.zshrc` or `~/.bash_profile`):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export DYLD_LIBRARY_PATH="/opt/homebrew/lib:$DYLD_LIBRARY_PATH"
|
|
||||||
```
|
|
||||||
|
|
||||||
Then reload:
|
|
||||||
```bash
|
|
||||||
source ~/.zshrc # or source ~/.bash_profile
|
|
||||||
```
|
|
||||||
|
|
||||||
### Step 7: Run Service Layer Tests
|
|
||||||
|
|
||||||
Verify all services are working:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd backend
|
|
||||||
python test_services.py
|
|
||||||
```
|
|
||||||
|
|
||||||
Expected output:
|
|
||||||
```
|
|
||||||
✓ PASS - database
|
|
||||||
✓ PASS - preprocessor
|
|
||||||
✓ PASS - pdf_generator
|
|
||||||
✓ PASS - file_manager
|
|
||||||
Total: 4-5/5 tests passed
|
|
||||||
```
|
|
||||||
|
|
||||||
**Note:** OCR engine test may fail on first run as PaddleOCR downloads models (~900MB). This is normal.
|
|
||||||
|
|
||||||
### Step 8: Create Directory Structure
|
|
||||||
|
|
||||||
The directories should already exist, but verify:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
ls -la
|
|
||||||
```
|
|
||||||
|
|
||||||
You should see:
|
|
||||||
- `backend/` - FastAPI application
|
|
||||||
- `frontend/` - React application (will be populated later)
|
|
||||||
- `uploads/` - File upload storage
|
|
||||||
- `storage/` - Processed results
|
|
||||||
- `models/` - PaddleOCR models (empty until first run)
|
|
||||||
- `logs/` - Application logs
|
|
||||||
|
|
||||||
### Step 8: Start Backend Server
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd backend
|
|
||||||
python -m app.main
|
|
||||||
```
|
|
||||||
|
|
||||||
**Expected output:**
|
|
||||||
```
|
|
||||||
INFO: Started server process
|
|
||||||
INFO: Waiting for application startup.
|
|
||||||
INFO: Application startup complete.
|
|
||||||
INFO: Uvicorn running on http://0.0.0.0:12010
|
|
||||||
```
|
|
||||||
|
|
||||||
**Test the server:**
|
|
||||||
Open browser and visit:
|
|
||||||
- http://localhost:12010 - API root
|
|
||||||
- http://localhost:12010/docs - Interactive API documentation
|
|
||||||
- http://localhost:12010/health - Health check endpoint
|
|
||||||
|
|
||||||
### Step 9: Download PaddleOCR Models
|
|
||||||
|
|
||||||
On first OCR request, PaddleOCR will automatically download models (~900MB).
|
|
||||||
|
|
||||||
**To pre-download models manually:**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -c "
|
|
||||||
from paddleocr import PaddleOCR
|
|
||||||
ocr = PaddleOCR(use_angle_cls=True, lang='ch', use_gpu=False)
|
|
||||||
print('Models downloaded successfully')
|
|
||||||
"
|
|
||||||
```
|
|
||||||
|
|
||||||
This will download:
|
|
||||||
- Detection model: ch_PP-OCRv4_det
|
|
||||||
- Recognition model: ch_PP-OCRv4_rec
|
|
||||||
- Angle classifier: ch_ppocr_mobile_v2.0_cls
|
|
||||||
|
|
||||||
Models are stored in: `./models/paddleocr/`
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### Issue: "conda: command not found"
|
|
||||||
|
|
||||||
**Solution:**
|
|
||||||
```bash
|
|
||||||
# Reload shell configuration
|
|
||||||
source ~/.zshrc # or source ~/.bashrc
|
|
||||||
|
|
||||||
# If still not working, manually add Conda to PATH
|
|
||||||
export PATH="$HOME/miniconda3/bin:$PATH"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Issue: PaddlePaddle installation fails
|
|
||||||
|
|
||||||
**Solution:**
|
|
||||||
```bash
|
|
||||||
# For Apple Silicon Macs, ensure you're using ARM version
|
|
||||||
pip uninstall paddlepaddle
|
|
||||||
pip install paddlepaddle --no-cache-dir
|
|
||||||
```
|
|
||||||
|
|
||||||
### Issue: WeasyPrint fails to install
|
|
||||||
|
|
||||||
**Solution:**
|
|
||||||
```bash
|
|
||||||
# Install required system libraries
|
|
||||||
brew install cairo pango gdk-pixbuf libffi
|
|
||||||
pip install --upgrade weasyprint
|
|
||||||
```
|
|
||||||
|
|
||||||
### Issue: Database connection fails
|
|
||||||
|
|
||||||
**Solution:**
|
|
||||||
```bash
|
|
||||||
# Test database connection
|
|
||||||
python -c "
|
|
||||||
import pymysql
|
|
||||||
conn = pymysql.connect(
|
|
||||||
host='mysql.theaken.com',
|
|
||||||
port=33306,
|
|
||||||
user='A060',
|
|
||||||
password='WLeSCi0yhtc7',
|
|
||||||
database='db_A060'
|
|
||||||
)
|
|
||||||
print('Database connection OK')
|
|
||||||
conn.close()
|
|
||||||
"
|
|
||||||
```
|
|
||||||
|
|
||||||
If this fails, verify:
|
|
||||||
- Internet connection is active
|
|
||||||
- Firewall is not blocking port 33306
|
|
||||||
- Database credentials in `.env` are correct
|
|
||||||
|
|
||||||
### Issue: Port 12010 already in use
|
|
||||||
|
|
||||||
**Solution:**
|
|
||||||
```bash
|
|
||||||
# Find what's using the port
|
|
||||||
lsof -i :12010
|
|
||||||
|
|
||||||
# Kill the process or change port in .env
|
|
||||||
# Edit BACKEND_PORT=12011 (or any available port)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Next Steps
|
|
||||||
|
|
||||||
After successful setup:
|
|
||||||
|
|
||||||
1. ✅ Environment is ready
|
|
||||||
2. ✅ Backend server can start
|
|
||||||
3. ✅ Database connection configured
|
|
||||||
|
|
||||||
**Ready to develop:**
|
|
||||||
- Implement database models (`backend/app/models/`)
|
|
||||||
- Create API endpoints (`backend/app/api/v1/`)
|
|
||||||
- Build OCR service (`backend/app/services/ocr_service.py`)
|
|
||||||
- Develop frontend UI (`frontend/src/`)
|
|
||||||
|
|
||||||
**Start with Phase 1 tasks:**
|
|
||||||
Refer to [openspec/changes/add-ocr-batch-processing/tasks.md](openspec/changes/add-ocr-batch-processing/tasks.md) for detailed implementation tasks.
|
|
||||||
|
|
||||||
## Development Workflow
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Activate environment
|
|
||||||
conda activate tool_ocr
|
|
||||||
|
|
||||||
# Start backend in development mode (auto-reload)
|
|
||||||
cd backend
|
|
||||||
python -m app.main
|
|
||||||
|
|
||||||
bash -c "source ~/.zshrc && conda activate tool_ocr && export DYLD_LIBRARY_PATH=/opt/homebrew/lib:$DYLD_LIBRARY_PATH && python -m app.main"
|
|
||||||
|
|
||||||
# In another terminal, start frontend
|
|
||||||
cd frontend
|
|
||||||
npm run dev
|
|
||||||
|
|
||||||
# Run tests
|
|
||||||
cd backend
|
|
||||||
pytest tests/ -v
|
|
||||||
|
|
||||||
# Check code style
|
|
||||||
black app/
|
|
||||||
pylint app/
|
|
||||||
```
|
|
||||||
|
|
||||||
## Background Services
|
|
||||||
|
|
||||||
### Automatic Cleanup Scheduler
|
|
||||||
|
|
||||||
The application automatically runs a cleanup scheduler that:
|
|
||||||
- **Runs every**: 1 hour (configurable via `BackgroundTaskManager.cleanup_interval`)
|
|
||||||
- **Deletes files older than**: 24 hours (configurable via `BackgroundTaskManager.file_retention_hours`)
|
|
||||||
- **Cleans up**:
|
|
||||||
- Physical files and directories
|
|
||||||
- Database records (results, files, batches)
|
|
||||||
- Expired batches in COMPLETED, FAILED, or PARTIAL status
|
|
||||||
|
|
||||||
The cleanup scheduler starts automatically when the backend application starts and stops gracefully on shutdown.
|
|
||||||
|
|
||||||
**Monitor cleanup activity:**
|
|
||||||
```bash
|
|
||||||
# Watch cleanup logs in real-time
|
|
||||||
tail -f /tmp/tool_ocr_startup.log | grep cleanup
|
|
||||||
|
|
||||||
# Or check application logs
|
|
||||||
tail -f backend/logs/app.log | grep cleanup
|
|
||||||
```
|
|
||||||
|
|
||||||
### Retry Logic
|
|
||||||
|
|
||||||
OCR processing includes automatic retry logic:
|
|
||||||
- **Maximum retries**: 3 attempts (configurable)
|
|
||||||
- **Retry delay**: 5 seconds between attempts (configurable)
|
|
||||||
- **Tracks**: `retry_count` field in database
|
|
||||||
- **Error handling**: Detailed error messages with retry attempt information
|
|
||||||
|
|
||||||
**Configuration** (in [backend/app/services/background_tasks.py](backend/app/services/background_tasks.py)):
|
|
||||||
```python
|
|
||||||
task_manager = BackgroundTaskManager(
|
|
||||||
max_retries=3, # Number of retry attempts
|
|
||||||
retry_delay=5, # Delay between retries (seconds)
|
|
||||||
cleanup_interval=3600, # Cleanup runs every hour
|
|
||||||
file_retention_hours=24 # Keep files for 24 hours
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Background Task Status
|
|
||||||
|
|
||||||
Check if background services are running:
|
|
||||||
```bash
|
|
||||||
# Check health endpoint
|
|
||||||
curl http://localhost:12010/health
|
|
||||||
|
|
||||||
# Check application startup logs for cleanup scheduler
|
|
||||||
grep "cleanup scheduler" /tmp/tool_ocr_startup.log
|
|
||||||
# Expected output: "Started cleanup scheduler for expired files"
|
|
||||||
# Expected output: "Starting cleanup scheduler (interval: 3600s, retention: 24h)"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Deactivate Environment
|
|
||||||
|
|
||||||
When done working:
|
|
||||||
```bash
|
|
||||||
conda deactivate
|
|
||||||
```
|
|
||||||
|
|
||||||
## Environment Management
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# List Conda environments
|
|
||||||
conda env list
|
|
||||||
|
|
||||||
# Remove environment (if needed)
|
|
||||||
conda env remove -n tool_ocr
|
|
||||||
|
|
||||||
# Export environment
|
|
||||||
conda env export > environment.yml
|
|
||||||
|
|
||||||
# Create from exported environment
|
|
||||||
conda env create -f environment.yml
|
|
||||||
```
|
|
||||||
@@ -14,6 +14,13 @@ engine = create_engine(
|
|||||||
pool_pre_ping=True, # Enable connection health checks
|
pool_pre_ping=True, # Enable connection health checks
|
||||||
pool_size=10,
|
pool_size=10,
|
||||||
max_overflow=20,
|
max_overflow=20,
|
||||||
|
pool_recycle=3600, # Recycle connections every hour
|
||||||
|
pool_timeout=30, # Connection timeout
|
||||||
|
connect_args={
|
||||||
|
'connect_timeout': 10,
|
||||||
|
'read_timeout': 30,
|
||||||
|
'write_timeout': 30,
|
||||||
|
},
|
||||||
echo=False, # Set to True for SQL query logging
|
echo=False, # Set to True for SQL query logging
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -168,6 +168,16 @@ class BackgroundTaskManager:
|
|||||||
ocr_file.completed_at = datetime.utcnow()
|
ocr_file.completed_at = datetime.utcnow()
|
||||||
ocr_file.processing_time = (ocr_file.completed_at - ocr_file.started_at).total_seconds()
|
ocr_file.processing_time = (ocr_file.completed_at - ocr_file.started_at).total_seconds()
|
||||||
|
|
||||||
|
# Commit with retry on connection errors
|
||||||
|
try:
|
||||||
|
db.commit()
|
||||||
|
except Exception as commit_error:
|
||||||
|
logger.warning(f"Commit failed, rolling back and retrying: {commit_error}")
|
||||||
|
db.rollback()
|
||||||
|
db.refresh(ocr_file)
|
||||||
|
ocr_file.status = FileStatus.COMPLETED
|
||||||
|
ocr_file.completed_at = datetime.utcnow()
|
||||||
|
ocr_file.processing_time = (ocr_file.completed_at - ocr_file.started_at).total_seconds()
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
logger.info(f"Successfully processed file {ocr_file.id} ({ocr_file.original_filename})")
|
logger.info(f"Successfully processed file {ocr_file.id} ({ocr_file.original_filename})")
|
||||||
@@ -175,17 +185,22 @@ class BackgroundTaskManager:
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Attempt {attempt + 1}/{self.max_retries + 1} failed for file {ocr_file.id}: {e}")
|
logger.error(f"Attempt {attempt + 1}/{self.max_retries + 1} failed for file {ocr_file.id}: {e}")
|
||||||
|
db.rollback() # Rollback failed transaction
|
||||||
|
|
||||||
if attempt < self.max_retries:
|
if attempt < self.max_retries:
|
||||||
# Wait before retry
|
# Wait before retry
|
||||||
time.sleep(self.retry_delay)
|
time.sleep(self.retry_delay)
|
||||||
else:
|
else:
|
||||||
# Final failure
|
# Final failure
|
||||||
|
try:
|
||||||
ocr_file.status = FileStatus.FAILED
|
ocr_file.status = FileStatus.FAILED
|
||||||
ocr_file.error_message = f"Failed after {self.max_retries + 1} attempts: {str(e)}"
|
ocr_file.error_message = f"Failed after {self.max_retries + 1} attempts: {str(e)}"
|
||||||
ocr_file.completed_at = datetime.utcnow()
|
ocr_file.completed_at = datetime.utcnow()
|
||||||
ocr_file.retry_count = attempt
|
ocr_file.retry_count = attempt
|
||||||
db.commit()
|
db.commit()
|
||||||
|
except Exception as final_error:
|
||||||
|
logger.error(f"Failed to update error status: {final_error}")
|
||||||
|
db.rollback()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return False
|
return False
|
||||||
@@ -375,6 +390,16 @@ def process_batch_files_with_retry(
|
|||||||
batch.status = BatchStatus.FAILED
|
batch.status = BatchStatus.FAILED
|
||||||
|
|
||||||
batch.completed_at = datetime.utcnow()
|
batch.completed_at = datetime.utcnow()
|
||||||
|
|
||||||
|
# Commit with retry on connection errors
|
||||||
|
try:
|
||||||
|
db.commit()
|
||||||
|
except Exception as commit_error:
|
||||||
|
logger.warning(f"Batch commit failed, rolling back and retrying: {commit_error}")
|
||||||
|
db.rollback()
|
||||||
|
batch = db.query(OCRBatch).filter(OCRBatch.id == batch_id).first()
|
||||||
|
if batch:
|
||||||
|
batch.completed_at = datetime.utcnow()
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -384,6 +409,7 @@ def process_batch_files_with_retry(
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Fatal error processing batch {batch_id}: {e}")
|
logger.error(f"Fatal error processing batch {batch_id}: {e}")
|
||||||
|
db.rollback() # Rollback any failed transaction
|
||||||
try:
|
try:
|
||||||
batch = db.query(OCRBatch).filter(OCRBatch.id == batch_id).first()
|
batch = db.query(OCRBatch).filter(OCRBatch.id == batch_id).first()
|
||||||
if batch:
|
if batch:
|
||||||
@@ -392,3 +418,4 @@ def process_batch_files_with_retry(
|
|||||||
db.commit()
|
db.commit()
|
||||||
except Exception as commit_error:
|
except Exception as commit_error:
|
||||||
logger.error(f"Error updating batch status: {commit_error}")
|
logger.error(f"Error updating batch status: {commit_error}")
|
||||||
|
db.rollback()
|
||||||
|
|||||||
@@ -42,14 +42,16 @@ class OfficeConverter:
|
|||||||
def _verify_libreoffice(self):
|
def _verify_libreoffice(self):
|
||||||
"""Verify LibreOffice is installed and accessible"""
|
"""Verify LibreOffice is installed and accessible"""
|
||||||
if not Path(self.libreoffice_path).exists():
|
if not Path(self.libreoffice_path).exists():
|
||||||
# Try alternative path for Homebrew installation
|
# Try alternative path (system-wide installation)
|
||||||
alt_path = shutil.which("soffice")
|
alt_path = shutil.which("soffice")
|
||||||
if alt_path:
|
if alt_path:
|
||||||
self.libreoffice_path = alt_path
|
self.libreoffice_path = alt_path
|
||||||
logger.info(f"Using LibreOffice at: {alt_path}")
|
logger.info(f"Using LibreOffice at: {alt_path}")
|
||||||
else:
|
else:
|
||||||
raise OfficeConverterError(
|
raise OfficeConverterError(
|
||||||
"LibreOffice not found. Please install LibreOffice: brew install libreoffice"
|
"LibreOffice not found. Please install LibreOffice:\n"
|
||||||
|
" Ubuntu/Debian: sudo apt install -y libreoffice-writer libreoffice-impress libreoffice-core-nogui\n"
|
||||||
|
" macOS: brew install libreoffice"
|
||||||
)
|
)
|
||||||
|
|
||||||
def is_office_document(self, file_path: Path) -> bool:
|
def is_office_document(self, file_path: Path) -> bool:
|
||||||
|
|||||||
@@ -1,97 +0,0 @@
|
|||||||
services:
|
|
||||||
tool_ocr:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
image: tool_ocr:latest
|
|
||||||
container_name: tool_ocr
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
ports:
|
|
||||||
- "12015:12015" # Only one port needed!
|
|
||||||
|
|
||||||
environment:
|
|
||||||
# Database Configuration
|
|
||||||
- MYSQL_HOST=${MYSQL_HOST:-mysql.theaken.com}
|
|
||||||
- MYSQL_PORT=${MYSQL_PORT:-33306}
|
|
||||||
- MYSQL_USER=${MYSQL_USER:-A060}
|
|
||||||
- MYSQL_PASSWORD=${MYSQL_PASSWORD:-WLeSCi0yhtc7}
|
|
||||||
- MYSQL_DATABASE=${MYSQL_DATABASE:-db_A060}
|
|
||||||
|
|
||||||
# Application Configuration
|
|
||||||
- BACKEND_PORT=8000 # Internal backend port
|
|
||||||
- FRONTEND_PORT=12015 # External port
|
|
||||||
- SECRET_KEY=${SECRET_KEY:-your-secret-key-here-please-change-this}
|
|
||||||
- ALGORITHM=${ALGORITHM:-HS256}
|
|
||||||
- ACCESS_TOKEN_EXPIRE_MINUTES=${ACCESS_TOKEN_EXPIRE_MINUTES:-1440}
|
|
||||||
|
|
||||||
# OCR Configuration
|
|
||||||
- PADDLEOCR_MODEL_DIR=/app/backend/models/paddleocr
|
|
||||||
- OCR_LANGUAGES=${OCR_LANGUAGES:-ch,en,japan,korean}
|
|
||||||
- OCR_CONFIDENCE_THRESHOLD=${OCR_CONFIDENCE_THRESHOLD:-0.5}
|
|
||||||
- MAX_OCR_WORKERS=${MAX_OCR_WORKERS:-4}
|
|
||||||
|
|
||||||
# File Upload Configuration
|
|
||||||
- MAX_UPLOAD_SIZE=${MAX_UPLOAD_SIZE:-52428800}
|
|
||||||
- ALLOWED_EXTENSIONS=${ALLOWED_EXTENSIONS:-png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx}
|
|
||||||
- UPLOAD_DIR=/app/backend/uploads
|
|
||||||
- TEMP_DIR=/app/backend/uploads/temp
|
|
||||||
- PROCESSED_DIR=/app/backend/uploads/processed
|
|
||||||
- IMAGES_DIR=/app/backend/uploads/images
|
|
||||||
|
|
||||||
# Export Configuration
|
|
||||||
- STORAGE_DIR=/app/backend/storage
|
|
||||||
- MARKDOWN_DIR=/app/backend/storage/markdown
|
|
||||||
- JSON_DIR=/app/backend/storage/json
|
|
||||||
- EXPORTS_DIR=/app/backend/storage/exports
|
|
||||||
|
|
||||||
# PDF Generation Configuration
|
|
||||||
- PANDOC_PATH=/usr/bin/pandoc
|
|
||||||
- FONT_DIR=/usr/share/fonts
|
|
||||||
- PDF_PAGE_SIZE=${PDF_PAGE_SIZE:-A4}
|
|
||||||
- PDF_MARGIN_TOP=${PDF_MARGIN_TOP:-20}
|
|
||||||
- PDF_MARGIN_BOTTOM=${PDF_MARGIN_BOTTOM:-20}
|
|
||||||
- PDF_MARGIN_LEFT=${PDF_MARGIN_LEFT:-20}
|
|
||||||
- PDF_MARGIN_RIGHT=${PDF_MARGIN_RIGHT:-20}
|
|
||||||
|
|
||||||
# Translation Configuration (Reserved)
|
|
||||||
- ENABLE_TRANSLATION=${ENABLE_TRANSLATION:-false}
|
|
||||||
- TRANSLATION_ENGINE=${TRANSLATION_ENGINE:-offline}
|
|
||||||
- ARGOSTRANSLATE_MODELS_DIR=/app/backend/models/argostranslate
|
|
||||||
|
|
||||||
# Background Tasks Configuration
|
|
||||||
- TASK_QUEUE_TYPE=${TASK_QUEUE_TYPE:-memory}
|
|
||||||
|
|
||||||
# CORS Configuration
|
|
||||||
- CORS_ORIGINS=${CORS_ORIGINS:-http://localhost:12015,http://127.0.0.1:12015}
|
|
||||||
|
|
||||||
# Logging Configuration
|
|
||||||
- LOG_LEVEL=${LOG_LEVEL:-INFO}
|
|
||||||
- LOG_FILE=/app/backend/logs/app.log
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
# Persist data directories
|
|
||||||
- ./data/uploads:/app/backend/uploads
|
|
||||||
- ./data/storage:/app/backend/storage
|
|
||||||
- ./data/models:/app/backend/models
|
|
||||||
- ./data/logs:/app/backend/logs
|
|
||||||
|
|
||||||
networks:
|
|
||||||
- tool_ocr_network
|
|
||||||
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:12010/health"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 10s
|
|
||||||
retries: 3
|
|
||||||
start_period: 40s
|
|
||||||
|
|
||||||
networks:
|
|
||||||
tool_ocr_network:
|
|
||||||
driver: bridge
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
uploads:
|
|
||||||
storage:
|
|
||||||
models:
|
|
||||||
logs:
|
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
# Nginx Site Configuration for Tool_OCR
|
|
||||||
|
|
||||||
upstream backend {
|
|
||||||
server 127.0.0.1:8000;
|
|
||||||
keepalive 32;
|
|
||||||
}
|
|
||||||
|
|
||||||
server {
|
|
||||||
listen 12015;
|
|
||||||
server_name _;
|
|
||||||
|
|
||||||
# Security headers
|
|
||||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
|
||||||
add_header X-Content-Type-Options "nosniff" always;
|
|
||||||
add_header X-XSS-Protection "1; mode=block" always;
|
|
||||||
|
|
||||||
# Root directory for frontend
|
|
||||||
root /app/frontend/dist;
|
|
||||||
index index.html;
|
|
||||||
|
|
||||||
# Logging
|
|
||||||
access_log /var/log/nginx/tool_ocr_access.log;
|
|
||||||
error_log /var/log/nginx/tool_ocr_error.log;
|
|
||||||
|
|
||||||
# Backend API proxy
|
|
||||||
location /api/ {
|
|
||||||
proxy_pass http://backend/api/;
|
|
||||||
proxy_http_version 1.1;
|
|
||||||
|
|
||||||
# Headers
|
|
||||||
proxy_set_header Host $host;
|
|
||||||
proxy_set_header X-Real-IP $remote_addr;
|
|
||||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
||||||
proxy_set_header X-Forwarded-Proto $scheme;
|
|
||||||
proxy_set_header Connection "";
|
|
||||||
|
|
||||||
# Timeouts
|
|
||||||
proxy_connect_timeout 60s;
|
|
||||||
proxy_send_timeout 300s;
|
|
||||||
proxy_read_timeout 300s;
|
|
||||||
|
|
||||||
# Buffering
|
|
||||||
proxy_buffering off;
|
|
||||||
proxy_request_buffering off;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Health check endpoint (backend)
|
|
||||||
location /health {
|
|
||||||
proxy_pass http://backend/health;
|
|
||||||
proxy_http_version 1.1;
|
|
||||||
proxy_set_header Host $host;
|
|
||||||
proxy_set_header Connection "";
|
|
||||||
}
|
|
||||||
|
|
||||||
# API docs (backend)
|
|
||||||
location /docs {
|
|
||||||
proxy_pass http://backend/docs;
|
|
||||||
proxy_http_version 1.1;
|
|
||||||
proxy_set_header Host $host;
|
|
||||||
proxy_set_header Connection "";
|
|
||||||
}
|
|
||||||
|
|
||||||
location /openapi.json {
|
|
||||||
proxy_pass http://backend/openapi.json;
|
|
||||||
proxy_http_version 1.1;
|
|
||||||
proxy_set_header Host $host;
|
|
||||||
proxy_set_header Connection "";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Frontend static files with caching
|
|
||||||
location /assets/ {
|
|
||||||
expires 1y;
|
|
||||||
add_header Cache-Control "public, immutable";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Frontend - React Router support (SPA fallback)
|
|
||||||
location / {
|
|
||||||
try_files $uri $uri/ /index.html;
|
|
||||||
expires -1;
|
|
||||||
add_header Cache-Control "no-store, no-cache, must-revalidate";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Deny access to hidden files
|
|
||||||
location ~ /\. {
|
|
||||||
deny all;
|
|
||||||
access_log off;
|
|
||||||
log_not_found off;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
echo "========================================"
|
|
||||||
echo "Tool_OCR Container Starting..."
|
|
||||||
echo "========================================"
|
|
||||||
|
|
||||||
# Wait a moment for system to stabilize
|
|
||||||
sleep 2
|
|
||||||
|
|
||||||
# Run database migrations if needed
|
|
||||||
echo "Checking database migrations..."
|
|
||||||
cd /app/backend
|
|
||||||
if [ -f "alembic.ini" ]; then
|
|
||||||
echo "Running Alembic migrations..."
|
|
||||||
alembic upgrade head || echo "Warning: Migration failed or already up to date"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Create necessary directories if they don't exist
|
|
||||||
echo "Ensuring directories exist..."
|
|
||||||
mkdir -p \
|
|
||||||
/app/backend/uploads/temp \
|
|
||||||
/app/backend/uploads/processed \
|
|
||||||
/app/backend/uploads/images \
|
|
||||||
/app/backend/storage/markdown \
|
|
||||||
/app/backend/storage/json \
|
|
||||||
/app/backend/storage/exports \
|
|
||||||
/app/backend/models/paddleocr \
|
|
||||||
/app/backend/logs
|
|
||||||
|
|
||||||
# Set permissions
|
|
||||||
chmod -R 755 /app/backend/uploads /app/backend/storage /app/backend/logs
|
|
||||||
|
|
||||||
echo "========================================"
|
|
||||||
echo "Starting services with Supervisor..."
|
|
||||||
echo "- Nginx listening on port 12015"
|
|
||||||
echo "- Backend API on internal port 8000"
|
|
||||||
echo "========================================"
|
|
||||||
|
|
||||||
# Start supervisord
|
|
||||||
exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
|
|
||||||
@@ -1,40 +0,0 @@
|
|||||||
# Nginx Main Configuration
|
|
||||||
user www-data;
|
|
||||||
worker_processes auto;
|
|
||||||
pid /var/run/nginx.pid;
|
|
||||||
|
|
||||||
events {
|
|
||||||
worker_connections 1024;
|
|
||||||
use epoll;
|
|
||||||
}
|
|
||||||
|
|
||||||
http {
|
|
||||||
# Basic Settings
|
|
||||||
sendfile on;
|
|
||||||
tcp_nopush on;
|
|
||||||
tcp_nodelay on;
|
|
||||||
keepalive_timeout 65;
|
|
||||||
types_hash_max_size 2048;
|
|
||||||
client_max_body_size 50M; # Match MAX_UPLOAD_SIZE in .env
|
|
||||||
|
|
||||||
# MIME Types
|
|
||||||
include /etc/nginx/mime.types;
|
|
||||||
default_type application/octet-stream;
|
|
||||||
|
|
||||||
# Logging
|
|
||||||
access_log /var/log/nginx/access.log;
|
|
||||||
error_log /var/log/nginx/error.log;
|
|
||||||
|
|
||||||
# Gzip Compression
|
|
||||||
gzip on;
|
|
||||||
gzip_vary on;
|
|
||||||
gzip_proxied any;
|
|
||||||
gzip_comp_level 6;
|
|
||||||
gzip_types text/plain text/css text/xml text/javascript
|
|
||||||
application/json application/javascript application/xml+rss
|
|
||||||
application/rss+xml font/truetype font/opentype
|
|
||||||
application/vnd.ms-fontobject image/svg+xml;
|
|
||||||
|
|
||||||
# Include site configurations
|
|
||||||
include /etc/nginx/conf.d/*.conf;
|
|
||||||
}
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
[supervisord]
|
|
||||||
nodaemon=true
|
|
||||||
user=root
|
|
||||||
logfile=/var/log/supervisor/supervisord.log
|
|
||||||
pidfile=/var/run/supervisord.pid
|
|
||||||
loglevel=info
|
|
||||||
|
|
||||||
[program:nginx]
|
|
||||||
command=/usr/sbin/nginx -g "daemon off;"
|
|
||||||
autostart=true
|
|
||||||
autorestart=true
|
|
||||||
priority=10
|
|
||||||
stdout_logfile=/dev/stdout
|
|
||||||
stdout_logfile_maxbytes=0
|
|
||||||
stderr_logfile=/dev/stderr
|
|
||||||
stderr_logfile_maxbytes=0
|
|
||||||
|
|
||||||
[program:backend]
|
|
||||||
command=python -m uvicorn app.main:app --host 127.0.0.1 --port 8000 --log-level info
|
|
||||||
directory=/app/backend
|
|
||||||
autostart=true
|
|
||||||
autorestart=true
|
|
||||||
priority=20
|
|
||||||
stdout_logfile=/dev/stdout
|
|
||||||
stdout_logfile_maxbytes=0
|
|
||||||
stderr_logfile=/dev/stderr
|
|
||||||
stderr_logfile_maxbytes=0
|
|
||||||
environment=PYTHONUNBUFFERED="1"
|
|
||||||
@@ -1 +1 @@
|
|||||||
VITE_API_BASE_URL=http://localhost:12015
|
VITE_API_BASE_URL=http://localhost:8000
|
||||||
|
|||||||
@@ -1,7 +0,0 @@
|
|||||||
# Frontend Environment Variables for Docker Deployment
|
|
||||||
# Copy this to frontend/.env.production for Docker builds
|
|
||||||
|
|
||||||
# API Base URL
|
|
||||||
# In Docker environment, use empty string for same-origin requests
|
|
||||||
# Nginx will proxy /api/* to the backend
|
|
||||||
VITE_API_BASE_URL=
|
|
||||||
53
frontend/package-lock.json
generated
53
frontend/package-lock.json
generated
@@ -86,6 +86,7 @@
|
|||||||
"integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
|
"integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/code-frame": "^7.27.1",
|
"@babel/code-frame": "^7.27.1",
|
||||||
"@babel/generator": "^7.28.5",
|
"@babel/generator": "^7.28.5",
|
||||||
@@ -1672,9 +1673,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@tanstack/query-core": {
|
"node_modules/@tanstack/query-core": {
|
||||||
"version": "5.90.7",
|
"version": "5.90.8",
|
||||||
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.90.7.tgz",
|
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.90.8.tgz",
|
||||||
"integrity": "sha512-6PN65csiuTNfBMXqQUxQhCNdtm1rV+9kC9YwWAIKcaxAauq3Wu7p18j3gQY3YIBJU70jT/wzCCZ2uqto/vQgiQ==",
|
"integrity": "sha512-4E0RP/0GJCxSNiRF2kAqE/LQkTJVlL/QNU7gIJSptaseV9HP6kOuA+N11y4bZKZxa3QopK3ZuewwutHx6DqDXQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"funding": {
|
"funding": {
|
||||||
"type": "github",
|
"type": "github",
|
||||||
@@ -1682,12 +1683,12 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@tanstack/react-query": {
|
"node_modules/@tanstack/react-query": {
|
||||||
"version": "5.90.7",
|
"version": "5.90.8",
|
||||||
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.7.tgz",
|
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.8.tgz",
|
||||||
"integrity": "sha512-wAHc/cgKzW7LZNFloThyHnV/AX9gTg3w5yAv0gvQHPZoCnepwqCMtzbuPbb2UvfvO32XZ46e8bPOYbfZhzVnnQ==",
|
"integrity": "sha512-/3b9QGzkf4rE5/miL6tyhldQRlLXzMHcySOm/2Tm2OLEFE9P1ImkH0+OviDBSvyAvtAOJocar5xhd7vxdLi3aQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@tanstack/query-core": "5.90.7"
|
"@tanstack/query-core": "5.90.8"
|
||||||
},
|
},
|
||||||
"funding": {
|
"funding": {
|
||||||
"type": "github",
|
"type": "github",
|
||||||
@@ -1803,15 +1804,17 @@
|
|||||||
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
|
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~7.16.0"
|
"undici-types": "~7.16.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/react": {
|
"node_modules/@types/react": {
|
||||||
"version": "19.2.3",
|
"version": "19.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.4.tgz",
|
||||||
"integrity": "sha512-k5dJVszUiNr1DSe8Cs+knKR6IrqhqdhpUwzqhkS8ecQTSf3THNtbfIp/umqHMpX2bv+9dkx3fwDv/86LcSfvSg==",
|
"integrity": "sha512-tBFxBp9Nfyy5rsmefN+WXc1JeW/j2BpBHFdLZbEVfs9wn3E3NRFxwV0pJg8M1qQAexFpvz73hJXFofV0ZAu92A==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"csstype": "^3.0.2"
|
"csstype": "^3.0.2"
|
||||||
}
|
}
|
||||||
@@ -1878,6 +1881,7 @@
|
|||||||
"integrity": "sha512-tK3GPFWbirvNgsNKto+UmB/cRtn6TZfyw0D6IKrW55n6Vbs7KJoZtI//kpTKzE/DUmmnAFD8/Ca46s7Obs92/w==",
|
"integrity": "sha512-tK3GPFWbirvNgsNKto+UmB/cRtn6TZfyw0D6IKrW55n6Vbs7KJoZtI//kpTKzE/DUmmnAFD8/Ca46s7Obs92/w==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@typescript-eslint/scope-manager": "8.46.4",
|
"@typescript-eslint/scope-manager": "8.46.4",
|
||||||
"@typescript-eslint/types": "8.46.4",
|
"@typescript-eslint/types": "8.46.4",
|
||||||
@@ -2136,6 +2140,7 @@
|
|||||||
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
"integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"bin": {
|
"bin": {
|
||||||
"acorn": "bin/acorn"
|
"acorn": "bin/acorn"
|
||||||
},
|
},
|
||||||
@@ -2275,9 +2280,9 @@
|
|||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/baseline-browser-mapping": {
|
"node_modules/baseline-browser-mapping": {
|
||||||
"version": "2.8.26",
|
"version": "2.8.27",
|
||||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.26.tgz",
|
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.27.tgz",
|
||||||
"integrity": "sha512-73lC1ugzwoaWCLJ1LvOgrR5xsMLTqSKIEoMHVtL9E/HNk0PXtTM76ZIm84856/SF7Nv8mPZxKoBsgpm0tR1u1Q==",
|
"integrity": "sha512-2CXFpkjVnY2FT+B6GrSYxzYf65BJWEqz5tIRHCvNsZZ2F3CmsCB37h8SpYgKG7y9C4YAeTipIPWG7EmFmhAeXA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"bin": {
|
"bin": {
|
||||||
@@ -2328,6 +2333,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"baseline-browser-mapping": "^2.8.25",
|
"baseline-browser-mapping": "^2.8.25",
|
||||||
"caniuse-lite": "^1.0.30001754",
|
"caniuse-lite": "^1.0.30001754",
|
||||||
@@ -2789,6 +2795,7 @@
|
|||||||
"integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==",
|
"integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@eslint-community/eslint-utils": "^4.8.0",
|
"@eslint-community/eslint-utils": "^4.8.0",
|
||||||
"@eslint-community/regexpp": "^4.12.1",
|
"@eslint-community/regexpp": "^4.12.1",
|
||||||
@@ -3413,6 +3420,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/runtime": "^7.27.6"
|
"@babel/runtime": "^7.27.6"
|
||||||
},
|
},
|
||||||
@@ -3581,9 +3589,9 @@
|
|||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/js-yaml": {
|
"node_modules/js-yaml": {
|
||||||
"version": "4.1.0",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
|
||||||
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
|
"integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
@@ -4867,6 +4875,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"nanoid": "^3.3.11",
|
"nanoid": "^3.3.11",
|
||||||
"picocolors": "^1.1.1",
|
"picocolors": "^1.1.1",
|
||||||
@@ -4956,6 +4965,7 @@
|
|||||||
"resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz",
|
||||||
"integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==",
|
"integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
@@ -4965,6 +4975,7 @@
|
|||||||
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz",
|
||||||
"integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==",
|
"integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"scheduler": "^0.27.0"
|
"scheduler": "^0.27.0"
|
||||||
},
|
},
|
||||||
@@ -4990,9 +5001,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/react-i18next": {
|
"node_modules/react-i18next": {
|
||||||
"version": "16.3.0",
|
"version": "16.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-16.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-16.3.1.tgz",
|
||||||
"integrity": "sha512-XGYIVU6gCOL4UQsfp87WbbvBc2WvgdkEDI8r4TwACzFg1bXY8pd1d9Cw6u9WJ2soTKHKaF1xQEyWA3/dUvtAGw==",
|
"integrity": "sha512-HbYaBeA58Hg38OzdEvJp4kLIvk10rp9F9Jq+wNkqtqxDXObtdYMSsQnegWgdUVcpZjZuK9ZxehM+Z9BW2Vqgqw==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@babel/runtime": "^7.27.6",
|
"@babel/runtime": "^7.27.6",
|
||||||
@@ -5412,6 +5423,7 @@
|
|||||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=12"
|
"node": ">=12"
|
||||||
},
|
},
|
||||||
@@ -5490,6 +5502,7 @@
|
|||||||
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
||||||
"devOptional": true,
|
"devOptional": true,
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
|
"peer": true,
|
||||||
"bin": {
|
"bin": {
|
||||||
"tsc": "bin/tsc",
|
"tsc": "bin/tsc",
|
||||||
"tsserver": "bin/tsserver"
|
"tsserver": "bin/tsserver"
|
||||||
@@ -5700,6 +5713,7 @@
|
|||||||
"integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
|
"integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"esbuild": "^0.25.0",
|
"esbuild": "^0.25.0",
|
||||||
"fdir": "^6.5.0",
|
"fdir": "^6.5.0",
|
||||||
@@ -5793,6 +5807,7 @@
|
|||||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
"peer": true,
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=12"
|
"node": ">=12"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -19,10 +19,10 @@ import type {
|
|||||||
/**
|
/**
|
||||||
* API Client Configuration
|
* API Client Configuration
|
||||||
* - In Docker: VITE_API_BASE_URL is empty string, use relative path
|
* - In Docker: VITE_API_BASE_URL is empty string, use relative path
|
||||||
* - In development: Use VITE_API_BASE_URL from .env or default to localhost:12015
|
* - In development: Use VITE_API_BASE_URL from .env or default to localhost:8000
|
||||||
*/
|
*/
|
||||||
const envApiBaseUrl = import.meta.env.VITE_API_BASE_URL
|
const envApiBaseUrl = import.meta.env.VITE_API_BASE_URL
|
||||||
const API_BASE_URL = envApiBaseUrl !== undefined ? envApiBaseUrl : 'http://localhost:12015'
|
const API_BASE_URL = envApiBaseUrl !== undefined ? envApiBaseUrl : 'http://localhost:8000'
|
||||||
const API_VERSION = 'v1'
|
const API_VERSION = 'v1'
|
||||||
|
|
||||||
class ApiClient {
|
class ApiClient {
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ import path from 'path'
|
|||||||
export default defineConfig({
|
export default defineConfig({
|
||||||
plugins: [react()],
|
plugins: [react()],
|
||||||
server: {
|
server: {
|
||||||
port: 12011,
|
port: 5173,
|
||||||
proxy: {
|
proxy: {
|
||||||
'/api': {
|
'/api': {
|
||||||
target: 'http://localhost:12015',
|
target: 'http://localhost:8000',
|
||||||
changeOrigin: true,
|
changeOrigin: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -14,10 +14,11 @@ Tool_OCR is a web-based application for batch image-to-text conversion with mult
|
|||||||
## Tech Stack
|
## Tech Stack
|
||||||
|
|
||||||
### Development Environment
|
### Development Environment
|
||||||
- **OS Platform**: Windows 10/11
|
- **OS Platform**: WSL2 Ubuntu 24.04
|
||||||
- **Python Version**: 3.10 (via Conda)
|
- **Python Version**: 3.12
|
||||||
- **Environment Manager**: Conda
|
- **Environment Manager**: Python venv
|
||||||
- **Virtual Environment Path**: `C:\Users\lin46\.conda\envs\tool_ocr`
|
- **Virtual Environment Path**: `./venv`
|
||||||
|
- **Node.js**: 24.x LTS (via nvm)
|
||||||
- **IDE Recommended**: VS Code with Python + React extensions
|
- **IDE Recommended**: VS Code with Python + React extensions
|
||||||
|
|
||||||
### Backend Technologies
|
### Backend Technologies
|
||||||
@@ -74,11 +75,15 @@ Tool_OCR is a web-based application for batch image-to-text conversion with mult
|
|||||||
|
|
||||||
### Environment Setup (Backend)
|
### Environment Setup (Backend)
|
||||||
```bash
|
```bash
|
||||||
# Create new conda environment
|
# Run automated setup script (recommended)
|
||||||
conda create -n tool_ocr python=3.10 -y
|
./setup_dev_env.sh
|
||||||
|
|
||||||
|
# Or manually:
|
||||||
|
# Create Python virtual environment
|
||||||
|
python3 -m venv venv
|
||||||
|
|
||||||
# Activate environment
|
# Activate environment
|
||||||
conda activate tool_ocr
|
source venv/bin/activate
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|||||||
@@ -1,72 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Tool_OCR - Conda 環境設置腳本 (macOS Apple Silicon)
|
|
||||||
|
|
||||||
set -e # 遇到錯誤立即退出
|
|
||||||
|
|
||||||
echo "==================================="
|
|
||||||
echo "Tool_OCR - 環境設置"
|
|
||||||
echo "==================================="
|
|
||||||
|
|
||||||
# 檢查 Conda 是否已安裝
|
|
||||||
if command -v conda &> /dev/null; then
|
|
||||||
echo "✓ Conda 已安裝: $(conda --version)"
|
|
||||||
else
|
|
||||||
echo "📦 開始安裝 Miniconda..."
|
|
||||||
|
|
||||||
# 下載 Miniconda for Apple Silicon
|
|
||||||
MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh"
|
|
||||||
INSTALLER="/tmp/miniconda_installer.sh"
|
|
||||||
|
|
||||||
echo "下載 Miniconda..."
|
|
||||||
curl -o "$INSTALLER" "$MINICONDA_URL"
|
|
||||||
|
|
||||||
echo "安裝 Miniconda (默認安裝到 ~/miniconda3)..."
|
|
||||||
bash "$INSTALLER" -b -p "$HOME/miniconda3"
|
|
||||||
|
|
||||||
# 初始化 Conda
|
|
||||||
echo "初始化 Conda..."
|
|
||||||
"$HOME/miniconda3/bin/conda" init zsh bash
|
|
||||||
|
|
||||||
# 清理安裝檔案
|
|
||||||
rm "$INSTALLER"
|
|
||||||
|
|
||||||
echo "✓ Miniconda 安裝完成!"
|
|
||||||
echo ""
|
|
||||||
echo "⚠️ 請執行以下命令以載入 Conda:"
|
|
||||||
echo " source ~/.zshrc (如果使用 zsh)"
|
|
||||||
echo " source ~/.bash_profile (如果使用 bash)"
|
|
||||||
echo ""
|
|
||||||
echo "然後重新執行此腳本繼續設置。"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 檢查是否在 base 環境
|
|
||||||
CURRENT_ENV=$(conda info --envs | grep '*' | awk '{print $1}')
|
|
||||||
echo "當前 Conda 環境: $CURRENT_ENV"
|
|
||||||
|
|
||||||
# 創建 tool_ocr 環境
|
|
||||||
ENV_NAME="tool_ocr"
|
|
||||||
if conda env list | grep -q "^$ENV_NAME "; then
|
|
||||||
echo "✓ 環境 '$ENV_NAME' 已存在"
|
|
||||||
read -p "是否重新創建? (y/N): " -n 1 -r
|
|
||||||
echo
|
|
||||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
|
||||||
echo "移除現有環境..."
|
|
||||||
conda env remove -n "$ENV_NAME" -y
|
|
||||||
else
|
|
||||||
echo "使用現有環境"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "📦 創建 Conda 環境: $ENV_NAME (Python 3.10)..."
|
|
||||||
conda create -n "$ENV_NAME" python=3.10 -y
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "✅ Conda 環境設置完成!"
|
|
||||||
echo ""
|
|
||||||
echo "下一步:"
|
|
||||||
echo " 1. 啟動環境: conda activate $ENV_NAME"
|
|
||||||
echo " 2. 安裝依賴: pip install -r requirements.txt"
|
|
||||||
echo " 3. 下載 PaddleOCR 模型"
|
|
||||||
echo ""
|
|
||||||
165
setup_dev_env.sh
Executable file
165
setup_dev_env.sh
Executable file
@@ -0,0 +1,165 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Tool_OCR WSL Ubuntu 開發環境設置腳本
|
||||||
|
|
||||||
|
set -e # 遇到錯誤時停止
|
||||||
|
|
||||||
|
echo "================================"
|
||||||
|
echo "Tool_OCR 開發環境設置"
|
||||||
|
echo "================================"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 顏色定義
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
RED='\033[0;31m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
# 檢查是否為 root
|
||||||
|
if [ "$EUID" -eq 0 ]; then
|
||||||
|
echo -e "${RED}請不要使用 sudo 運行此腳本${NC}"
|
||||||
|
echo "腳本會在需要時提示輸入 sudo 密碼"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${YELLOW}[1/8] 更新系統套件列表...${NC}"
|
||||||
|
sudo apt update
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${YELLOW}[2/8] 安裝 Python 開發工具...${NC}"
|
||||||
|
sudo apt install -y \
|
||||||
|
python3-pip \
|
||||||
|
python3-venv \
|
||||||
|
python3-dev \
|
||||||
|
build-essential \
|
||||||
|
pkg-config
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${YELLOW}[3/8] 安裝系統層級依賴...${NC}"
|
||||||
|
sudo apt install -y \
|
||||||
|
pandoc \
|
||||||
|
libmagic1 \
|
||||||
|
libmagic-dev \
|
||||||
|
fonts-noto-cjk \
|
||||||
|
fonts-noto-cjk-extra \
|
||||||
|
fonts-liberation \
|
||||||
|
libpango-1.0-0 \
|
||||||
|
libpangocairo-1.0-0 \
|
||||||
|
libcairo2 \
|
||||||
|
libcairo2-dev \
|
||||||
|
libgdk-pixbuf2.0-0 \
|
||||||
|
libgdk-pixbuf-2.0-dev \
|
||||||
|
libffi-dev \
|
||||||
|
libffi8 \
|
||||||
|
shared-mime-info \
|
||||||
|
poppler-utils \
|
||||||
|
libgl1 \
|
||||||
|
libglib2.0-0 \
|
||||||
|
libglib2.0-dev \
|
||||||
|
libgomp1 \
|
||||||
|
libjpeg-dev \
|
||||||
|
libpng-dev \
|
||||||
|
libtiff-dev \
|
||||||
|
libopencv-dev \
|
||||||
|
libsqlite3-dev \
|
||||||
|
libreoffice-core-nogui \
|
||||||
|
libreoffice-writer-nogui \
|
||||||
|
libreoffice-impress-nogui \
|
||||||
|
ca-certificates \
|
||||||
|
curl \
|
||||||
|
wget \
|
||||||
|
libxml2 \
|
||||||
|
libxslt1-dev \
|
||||||
|
python3-cffi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${YELLOW}[4/8] 安裝 Node.js 和 npm...${NC}"
|
||||||
|
# 檢查是否已安裝 nvm
|
||||||
|
if [ ! -d "$HOME/.nvm" ]; then
|
||||||
|
echo "安裝 nvm..."
|
||||||
|
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash
|
||||||
|
|
||||||
|
# 載入 nvm
|
||||||
|
export NVM_DIR="$HOME/.nvm"
|
||||||
|
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
||||||
|
else
|
||||||
|
echo "nvm 已安裝"
|
||||||
|
export NVM_DIR="$HOME/.nvm"
|
||||||
|
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 安裝 Node.js LTS
|
||||||
|
echo "安裝 Node.js LTS..."
|
||||||
|
nvm install --lts
|
||||||
|
nvm use --lts
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${YELLOW}[5/8] 創建 Python 虛擬環境...${NC}"
|
||||||
|
if [ ! -d "venv" ]; then
|
||||||
|
python3 -m venv venv
|
||||||
|
echo "虛擬環境已創建"
|
||||||
|
else
|
||||||
|
echo "虛擬環境已存在"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${YELLOW}[6/8] 安裝 Python 依賴...${NC}"
|
||||||
|
source venv/bin/activate
|
||||||
|
pip install --upgrade pip setuptools wheel
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${YELLOW}測試關鍵套件...${NC}"
|
||||||
|
python -c "import magic; print('✓ python-magic')" || echo "✗ python-magic failed"
|
||||||
|
python -c "from weasyprint import HTML; print('✓ WeasyPrint')" || echo "✗ WeasyPrint failed"
|
||||||
|
python -c "import cv2; print('✓ OpenCV')" || echo "✗ OpenCV failed"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${YELLOW}[7/8] 安裝前端依賴...${NC}"
|
||||||
|
cd frontend
|
||||||
|
|
||||||
|
# 清理可能存在的鎖定文件
|
||||||
|
if [ -d "node_modules" ]; then
|
||||||
|
echo "清理現有 node_modules..."
|
||||||
|
rm -rf node_modules package-lock.json
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 清理 npm 緩存
|
||||||
|
npm cache clean --force
|
||||||
|
|
||||||
|
# 安裝依賴(使用 --force 避免鎖定問題)
|
||||||
|
echo "安裝前端依賴..."
|
||||||
|
npm install --force
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${YELLOW}[8/8] 創建必要的目錄...${NC}"
|
||||||
|
mkdir -p backend/uploads/{temp,processed,images}
|
||||||
|
mkdir -p backend/storage/{markdown,json,exports}
|
||||||
|
mkdir -p backend/models/paddleocr
|
||||||
|
mkdir -p backend/logs
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}================================${NC}"
|
||||||
|
echo -e "${GREEN}環境設置完成!${NC}"
|
||||||
|
echo -e "${GREEN}================================${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "下一步操作:"
|
||||||
|
echo "1. 初始化數據庫:"
|
||||||
|
echo " source venv/bin/activate"
|
||||||
|
echo " cd backend"
|
||||||
|
echo " alembic upgrade head"
|
||||||
|
echo " python create_test_user.py"
|
||||||
|
echo " cd .."
|
||||||
|
echo ""
|
||||||
|
echo "2. 啟動後端:"
|
||||||
|
echo " ./start_backend.sh"
|
||||||
|
echo ""
|
||||||
|
echo "3. 啟動前端 (新終端):"
|
||||||
|
echo " ./start_frontend.sh"
|
||||||
|
echo ""
|
||||||
|
echo "4. 訪問應用:"
|
||||||
|
echo " 前端: http://localhost:5173"
|
||||||
|
echo " API文檔: http://localhost:8000/docs"
|
||||||
|
echo " 健康檢查: http://localhost:8000/health"
|
||||||
|
echo ""
|
||||||
59
start_backend.sh
Executable file
59
start_backend.sh
Executable file
@@ -0,0 +1,59 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Tool_OCR - 後端開發服務器啟動腳本
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# 顏色定義
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
RED='\033[0;31m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
echo -e "${YELLOW}正在啟動 Tool_OCR 後端開發服務器...${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 檢查虛擬環境
|
||||||
|
if [ ! -d "venv" ]; then
|
||||||
|
echo -e "${RED}錯誤: 未找到虛擬環境${NC}"
|
||||||
|
echo "請先運行: ./setup_dev_env.sh"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 檢查 .env.local
|
||||||
|
if [ ! -f ".env.local" ]; then
|
||||||
|
echo -e "${RED}錯誤: 未找到 .env.local 配置文件${NC}"
|
||||||
|
echo "請確保 .env.local 文件存在"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 啟動虛擬環境
|
||||||
|
echo -e "${GREEN}啟動 Python 虛擬環境...${NC}"
|
||||||
|
source venv/bin/activate
|
||||||
|
|
||||||
|
# 載入環境變量
|
||||||
|
echo -e "${GREEN}載入環境變量...${NC}"
|
||||||
|
export $(cat .env.local | grep -v '^#' | xargs)
|
||||||
|
|
||||||
|
# 進入後端目錄
|
||||||
|
cd backend
|
||||||
|
|
||||||
|
# 檢查必要的目錄
|
||||||
|
echo -e "${GREEN}檢查目錄結構...${NC}"
|
||||||
|
mkdir -p uploads/{temp,processed,images}
|
||||||
|
mkdir -p storage/{markdown,json,exports}
|
||||||
|
mkdir -p models/paddleocr
|
||||||
|
mkdir -p logs
|
||||||
|
|
||||||
|
# 啟動後端服務器
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}================================${NC}"
|
||||||
|
echo -e "${GREEN}後端服務器啟動中...${NC}"
|
||||||
|
echo -e "${GREEN}================================${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "API 文檔: http://localhost:8000/docs"
|
||||||
|
echo "健康檢查: http://localhost:8000/health"
|
||||||
|
echo ""
|
||||||
|
echo "按 Ctrl+C 停止服務器"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
|
||||||
40
start_frontend.sh
Executable file
40
start_frontend.sh
Executable file
@@ -0,0 +1,40 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Tool_OCR - 前端開發服務器啟動腳本
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# 顏色定義
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
RED='\033[0;31m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
echo -e "${YELLOW}正在啟動 Tool_OCR 前端開發服務器...${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 檢查 node_modules
|
||||||
|
if [ ! -d "frontend/node_modules" ]; then
|
||||||
|
echo -e "${RED}錯誤: 未找到 node_modules${NC}"
|
||||||
|
echo "請先運行: ./setup_dev_env.sh"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 載入 nvm
|
||||||
|
export NVM_DIR="$HOME/.nvm"
|
||||||
|
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
|
||||||
|
|
||||||
|
# 進入前端目錄
|
||||||
|
cd frontend
|
||||||
|
|
||||||
|
# 啟動前端服務器
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}================================${NC}"
|
||||||
|
echo -e "${GREEN}前端服務器啟動中...${NC}"
|
||||||
|
echo -e "${GREEN}================================${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "前端界面: http://localhost:5173"
|
||||||
|
echo ""
|
||||||
|
echo "按 Ctrl+C 停止服務器"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
npm run dev
|
||||||
Reference in New Issue
Block a user