diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..58f7bf8 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,87 @@ +# Git +.git +.gitignore +.gitattributes + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +*.egg-info/ +dist/ +build/ +*.egg +.pytest_cache/ +.coverage +htmlcov/ + +# Node +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +.npm +.yarn + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Environment files +.env +.env.local +.env.*.local + +# Logs +logs/ +*.log + +# Data directories (will be mounted as volumes) +data/ +uploads/ +storage/ +models/ + +# Backend specific +backend/uploads/ +backend/storage/ +backend/models/ +backend/logs/ +backend/__pycache__/ +backend/*.egg-info/ + +# Frontend specific +frontend/node_modules/ +frontend/dist/ +frontend/.vite/ +frontend/.cache/ + +# Documentation (not needed in container) +*.md +!README.md +docs/ +demo_docs/ + +# Claude and OpenSpec +.claude/ +openspec/ + +# OS +Thumbs.db +Desktop.ini + +# Temporary files +*.tmp +*.bak +*.swp +temp/ +tmp/ diff --git a/.env b/.env index e45f23c..398acb0 100644 --- a/.env +++ b/.env @@ -1,5 +1,5 @@ -# Tool_OCR - Environment Configuration Template -# Copy this file to .env and fill in your actual values +# Tool_OCR - Docker Environment Configuration +# Copy this file to .env when deploying with Docker # ===== Database Configuration ===== MYSQL_HOST=mysql.theaken.com @@ -9,18 +9,17 @@ MYSQL_PASSWORD=WLeSCi0yhtc7 MYSQL_DATABASE=db_A060 # ===== Application Configuration ===== -# Server ports -BACKEND_PORT=12010 -FRONTEND_PORT=12011 +# External port (exposed to host) +FRONTEND_PORT=12010 -# Security +# Security (IMPORTANT: Change SECRET_KEY in production!) SECRET_KEY=your-secret-key-here-please-change-this-to-random-string ALGORITHM=HS256 ACCESS_TOKEN_EXPIRE_MINUTES=1440 # ===== OCR Configuration ===== -# PaddleOCR model directory -PADDLEOCR_MODEL_DIR=./models/paddleocr +# PaddleOCR model directory (inside container) +PADDLEOCR_MODEL_DIR=/app/backend/models/paddleocr # Supported languages (comma-separated) OCR_LANGUAGES=ch,en,japan,korean # Default confidence threshold @@ -33,24 +32,24 @@ MAX_OCR_WORKERS=4 MAX_UPLOAD_SIZE=52428800 # Allowed file extensions (comma-separated) ALLOWED_EXTENSIONS=png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx -# Upload directories -UPLOAD_DIR=./uploads -TEMP_DIR=./uploads/temp -PROCESSED_DIR=./uploads/processed -IMAGES_DIR=./uploads/images +# Upload directories (inside container) +UPLOAD_DIR=/app/backend/uploads +TEMP_DIR=/app/backend/uploads/temp +PROCESSED_DIR=/app/backend/uploads/processed +IMAGES_DIR=/app/backend/uploads/images # ===== Export Configuration ===== -# Storage directories -STORAGE_DIR=./storage -MARKDOWN_DIR=./storage/markdown -JSON_DIR=./storage/json -EXPORTS_DIR=./storage/exports +# Storage directories (inside container) +STORAGE_DIR=/app/backend/storage +MARKDOWN_DIR=/app/backend/storage/markdown +JSON_DIR=/app/backend/storage/json +EXPORTS_DIR=/app/backend/storage/exports # ===== PDF Generation Configuration ===== -# Pandoc path (auto-detected if installed via brew) -PANDOC_PATH=/opt/homebrew/bin/pandoc -# WeasyPrint font directory -FONT_DIR=/System/Library/Fonts +# Pandoc path (inside container) +PANDOC_PATH=/usr/bin/pandoc +# Font directory (inside container) +FONT_DIR=/usr/share/fonts # Default PDF page size PDF_PAGE_SIZE=A4 # Default PDF margins (mm) @@ -64,8 +63,8 @@ PDF_MARGIN_RIGHT=20 ENABLE_TRANSLATION=false # Translation engine: offline (argostranslate) or api (future) TRANSLATION_ENGINE=offline -# Argostranslate models directory -ARGOSTRANSLATE_MODELS_DIR=./models/argostranslate +# Argostranslate models directory (inside container) +ARGOSTRANSLATE_MODELS_DIR=/app/backend/models/argostranslate # ===== Background Tasks Configuration ===== # Task queue type: memory (default) or redis (future) @@ -75,8 +74,9 @@ TASK_QUEUE_TYPE=memory # ===== CORS Configuration ===== # Allowed origins (comma-separated, * for all) -CORS_ORIGINS=http://localhost:12011,http://127.0.0.1:12011 +# For Docker, use the external URL +CORS_ORIGINS=http://localhost:12010,http://127.0.0.1:12010 # ===== Logging Configuration ===== LOG_LEVEL=INFO -LOG_FILE=./logs/app.log +LOG_FILE=/app/backend/logs/app.log diff --git a/.env.docker b/.env.docker new file mode 100644 index 0000000..4037ff1 --- /dev/null +++ b/.env.docker @@ -0,0 +1,82 @@ +# Tool_OCR - Docker Environment Configuration +# Copy this file to .env when deploying with Docker + +# ===== Database Configuration ===== +MYSQL_HOST=mysql.theaken.com +MYSQL_PORT=33306 +MYSQL_USER=A060 +MYSQL_PASSWORD=WLeSCi0yhtc7 +MYSQL_DATABASE=db_A060 + +# ===== Application Configuration ===== +# External port (exposed to host) +FRONTEND_PORT=12015 + +# Security (IMPORTANT: Change SECRET_KEY in production!) +SECRET_KEY=your-secret-key-here-please-change-this-to-random-string +ALGORITHM=HS256 +ACCESS_TOKEN_EXPIRE_MINUTES=1440 + +# ===== OCR Configuration ===== +# PaddleOCR model directory (inside container) +PADDLEOCR_MODEL_DIR=/app/backend/models/paddleocr +# Supported languages (comma-separated) +OCR_LANGUAGES=ch,en,japan,korean +# Default confidence threshold +OCR_CONFIDENCE_THRESHOLD=0.5 +# Maximum concurrent OCR workers +MAX_OCR_WORKERS=4 + +# ===== File Upload Configuration ===== +# Maximum file size in bytes (50MB default) +MAX_UPLOAD_SIZE=52428800 +# Allowed file extensions (comma-separated) +ALLOWED_EXTENSIONS=png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx +# Upload directories (inside container) +UPLOAD_DIR=/app/backend/uploads +TEMP_DIR=/app/backend/uploads/temp +PROCESSED_DIR=/app/backend/uploads/processed +IMAGES_DIR=/app/backend/uploads/images + +# ===== Export Configuration ===== +# Storage directories (inside container) +STORAGE_DIR=/app/backend/storage +MARKDOWN_DIR=/app/backend/storage/markdown +JSON_DIR=/app/backend/storage/json +EXPORTS_DIR=/app/backend/storage/exports + +# ===== PDF Generation Configuration ===== +# Pandoc path (inside container) +PANDOC_PATH=/usr/bin/pandoc +# Font directory (inside container) +FONT_DIR=/usr/share/fonts +# Default PDF page size +PDF_PAGE_SIZE=A4 +# Default PDF margins (mm) +PDF_MARGIN_TOP=20 +PDF_MARGIN_BOTTOM=20 +PDF_MARGIN_LEFT=20 +PDF_MARGIN_RIGHT=20 + +# ===== Translation Configuration (Reserved) ===== +# Enable translation feature (reserved for future) +ENABLE_TRANSLATION=false +# Translation engine: offline (argostranslate) or api (future) +TRANSLATION_ENGINE=offline +# Argostranslate models directory (inside container) +ARGOSTRANSLATE_MODELS_DIR=/app/backend/models/argostranslate + +# ===== Background Tasks Configuration ===== +# Task queue type: memory (default) or redis (future) +TASK_QUEUE_TYPE=memory +# Redis URL (if using redis) +# REDIS_URL=redis://localhost:6379/0 + +# ===== CORS Configuration ===== +# Allowed origins (comma-separated, * for all) +# For Docker, use the external URL +CORS_ORIGINS=http://localhost:12015,http://127.0.0.1:12015 + +# ===== Logging Configuration ===== +LOG_LEVEL=INFO +LOG_FILE=/app/backend/logs/app.log diff --git a/API_FIX_SUMMARY.md b/API_FIX_SUMMARY.md deleted file mode 100644 index 2fd561f..0000000 --- a/API_FIX_SUMMARY.md +++ /dev/null @@ -1,368 +0,0 @@ -# API 前後端不一致問題修正總結 - -## 修正日期 -2025-01-13 - -## 修正概覽 - -本次修正針對前後端 API 整合的 6 個主要問題進行了全面修復,確保前後端資料結構完全一致。 - ---- - -## 已修正的問題 - -### ✅ 問題 1: 登入回應結構不一致 - -**修正內容**: -- 在前端 `LoginResponse` 型別新增 `expires_in` 欄位 - -**修改檔案**: -- [frontend/src/types/api.ts:12-16](frontend/src/types/api.ts#L12-L16) - -**變更**: -```typescript -// 修正前 -export interface LoginResponse { - access_token: string - token_type: string -} - -// 修正後 -export interface LoginResponse { - access_token: string - token_type: string - expires_in: number // Token expiration time in seconds -} -``` - -**影響**: -- ✅ 前端現在可以接收 Token 過期時間 -- ✅ 可實作 Token 自動續期功能 -- ✅ 可提前提醒使用者 Token 即將過期 - ---- - -### ✅ 問題 2: OCR 任務狀態 API 不存在 - -**修正內容**: -- 移除前端的 `getTaskStatus()` 方法 -- 統一使用 `getBatchStatus()` 進行批次狀態追蹤 -- 從 import 中移除 `TaskStatus` 型別 - -**修改檔案**: -- [frontend/src/services/api.ts:3-18](frontend/src/services/api.ts#L3-L18) - 移除 TaskStatus import -- [frontend/src/services/api.ts:153-160](frontend/src/services/api.ts#L153-L160) - 移除 getTaskStatus 方法 - -**變更**: -```typescript -// 修正前 -import type { - // ... 其他型別 - TaskStatus, - // ... -} - -async getTaskStatus(taskId: string): Promise { - const response = await this.client.get(`/ocr/status/${taskId}`) - return response.data -} - -// 修正後 -// TaskStatus 已從 import 移除 -// getTaskStatus 方法已刪除 -// 統一使用 getBatchStatus() 追蹤狀態 -``` - -**影響**: -- ✅ 避免呼叫不存在的 API 端點 (404 錯誤) -- ✅ 統一使用批次狀態管理 -- ✅ 簡化狀態追蹤邏輯 - ---- - -### ✅ 問題 3: OCR 處理請求/回應模型不符 - -**修正內容**: -- 修改 `ProcessRequest` 型別,改用 `detect_layout` 取代 `confidence_threshold` -- 修改 `ProcessResponse` 型別,新增 `message` 和 `total_files` 欄位,移除 `task_id` - -**修改檔案**: -- [frontend/src/types/api.ts:37-50](frontend/src/types/api.ts#L37-L50) - -**變更**: -```typescript -// 修正前 -export interface ProcessRequest { - batch_id: number - lang?: string - confidence_threshold?: number // ❌ 後端不支援此欄位 -} - -export interface ProcessResponse { - task_id: string // ❌ 後端不回傳此欄位 - batch_id: number - status: string -} - -// 修正後 -export interface ProcessRequest { - batch_id: number - lang?: string - detect_layout?: boolean // ✅ 與後端一致 -} - -export interface ProcessResponse { - message: string // ✅ 新增 - batch_id: number - total_files: number // ✅ 新增 - status: string - // task_id 已移除 -} -``` - -**影響**: -- ✅ 前端可正確傳遞版面偵測參數給後端 -- ✅ 前端可接收處理訊息和檔案總數 -- ✅ 避免型別檢查錯誤 -- ✅ 避免驗證失敗 - ---- - -### ✅ 問題 4: 上傳檔案欄位命名不一致 - -**修正內容**: -- 將 `FileInfo.format` 改名為 `FileInfo.file_format` - -**修改檔案**: -- [frontend/src/types/api.ts:28-35](frontend/src/types/api.ts#L28-L35) - -**變更**: -```typescript -// 修正前 -export interface FileInfo { - id: number - filename: string - file_size: number - format: string // ❌ 與後端欄位名稱不同 - status: 'pending' | 'processing' | 'completed' | 'failed' -} - -// 修正後 -export interface FileInfo { - id: number - filename: string - file_size: number - file_format: string // ✅ 與後端一致 - status: 'pending' | 'processing' | 'completed' | 'failed' -} -``` - -**影響**: -- ✅ 前端可直接使用後端回傳的 `file_format` 欄位 -- ✅ 無需額外的欄位映射或轉換 -- ✅ UI 可正確顯示檔案格式 - ---- - -### ✅ 問題 5: CSS 模板清單缺少 filename - -**修正內容**: -- 移除 `CSSTemplate.filename` 欄位定義 -- 改用 `name` 作為模板識別碼 - -**修改檔案**: -- [frontend/src/types/api.ts:135-139](frontend/src/types/api.ts#L135-L139) - -**變更**: -```typescript -// 修正前 -export interface CSSTemplate { - name: string - description: string - filename: string // ❌ 後端實際不回傳此欄位 -} - -// 修正後 -export interface CSSTemplate { - name: string - description: string - // filename 已移除,使用 name 作為識別碼 -} -``` - -**影響**: -- ✅ 避免 `filename` 為 undefined 的問題 -- ✅ 使用 `name` 作為 `