# ============================================ # Tool_OCR - Unified Docker Image # Frontend (React + Vite) + Backend (FastAPI) # Served by Nginx with reverse proxy # ============================================ # ============================================ # Stage 1: Build Frontend # ============================================ FROM node:20-alpine AS frontend-builder WORKDIR /app/frontend # Copy package files COPY frontend/package*.json ./ # Install all dependencies (including devDependencies for build) RUN npm ci # Copy frontend source COPY frontend/ ./ # Create production environment file RUN echo "VITE_API_BASE_URL=" > .env.production # Build frontend for production RUN npm run build # ============================================ # Stage 2: Build Backend + Final Image # ============================================ FROM python:3.10-slim-bookworm # Set working directory WORKDIR /app # Set environment variables ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ DEBIAN_FRONTEND=noninteractive # Install system dependencies # - nginx: web server and reverse proxy # - supervisor: process manager for nginx + uvicorn # - curl: for health checks # - pandoc: for markdown to PDF conversion # - poppler-utils: for pdf2image (PDF processing) # - libpango-1.0-0, libpangocairo-1.0-0: for WeasyPrint # - libgdk-pixbuf2.0-0: for WeasyPrint image handling # - libffi-dev: for cryptography # - fonts-noto-cjk: Chinese/Japanese/Korean font support # - libgomp1, libgl1-mesa-glx, libglib2.0-0: for OpenCV and PaddleOCR # - libmagic1: for python-magic file type detection # - libreoffice-writer, libreoffice-impress: for Office document conversion (doc/docx/ppt/pptx) RUN apt-get update && apt-get install -y --no-install-recommends \ nginx \ supervisor \ curl \ pandoc \ poppler-utils \ libpango-1.0-0 \ libpangocairo-1.0-0 \ libgdk-pixbuf2.0-0 \ libffi-dev \ fonts-noto-cjk \ fonts-noto-cjk-extra \ libgomp1 \ libgl1-mesa-glx \ libglib2.0-0 \ libmagic1 \ libreoffice-writer \ libreoffice-impress \ && rm -rf /var/lib/apt/lists/* # Copy Python requirements COPY requirements.txt . # Install Python dependencies with extended timeout # PaddlePaddle is 189MB and may take time to download # Timeout: 600 seconds (10 minutes), Retries: 5 RUN pip install --timeout 600 --retries 5 -r requirements.txt # Copy backend application COPY backend/ ./backend/ # Copy frontend build from frontend-builder stage COPY --from=frontend-builder /app/frontend/dist /app/frontend/dist # Copy Nginx configuration COPY docker/nginx.conf /etc/nginx/nginx.conf COPY docker/default.conf /etc/nginx/conf.d/default.conf # Copy supervisor configuration COPY docker/supervisord.conf /etc/supervisor/conf.d/supervisord.conf # Copy startup script and fix line endings (Windows CRLF -> Linux LF) COPY docker/entrypoint.sh /entrypoint.sh RUN sed -i 's/\r$//' /entrypoint.sh && chmod +x /entrypoint.sh # Create necessary directories with proper permissions RUN mkdir -p \ /app/backend/uploads/temp \ /app/backend/uploads/processed \ /app/backend/uploads/images \ /app/backend/storage/markdown \ /app/backend/storage/json \ /app/backend/storage/exports \ /app/backend/models/paddleocr \ /app/backend/logs \ /var/log/supervisor \ /var/log/nginx \ /var/cache/nginx \ /var/run \ && chmod -R 755 /app \ && chown -R www-data:www-data /var/log/nginx /var/cache/nginx # Expose port (only one port needed!) EXPOSE 12015 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ CMD curl -f http://localhost:12015/health || exit 1 # Set working directory to backend for Python app WORKDIR /app/backend # Use entrypoint script to start supervisor ENTRYPOINT ["/entrypoint.sh"]