# Tool_OCR - Backend Dependencies # Python 3.10+ # ===== Core Framework ===== fastapi==0.115.0 uvicorn[standard]==0.32.0 pydantic==2.9.2 pydantic-settings==2.6.1 email-validator>=2.0.0 # For pydantic EmailStr validation # ===== OCR Engine ===== paddleocr>=3.0.0 paddlex[ocr]>=3.0.0 # Required for PP-StructureV3 layout analysis # PaddlePaddle Installation (NOT available on PyPI for 3.x): # GPU (CUDA 12.6): pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/ # GPU (CUDA 12.9): pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu129/ # CPU: pip install paddlepaddle -i https://www.paddlepaddle.org.cn/packages/stable/cpu/ # ===== Image Processing ===== pillow>=10.0.0 pdf2image>=1.17.0 opencv-python>=4.8.0 # ===== PDF Generation ===== weasyprint>=60.0 markdown>=3.5.0 reportlab>=4.0.0 # Layout-preserving PDF generation with precise coordinate control PyPDF2>=3.0.0 # Extract dimensions from source PDF files # Note: pandoc needs to be installed via brew (brew install pandoc) # ===== Direct PDF Extraction (Dual-track Processing) ===== PyMuPDF>=1.23.0 # Primary library for editable PDF text/structure extraction pdfplumber>=0.10.0 # Fallback for table extraction and validation # Note: python-magic requires libmagic (apt install libmagic1 on Linux) # ===== Data Export ===== pandas>=2.1.0 openpyxl>=3.1.0 # Excel support # ===== Database ===== sqlalchemy>=2.0.0 pymysql>=1.1.0 alembic>=1.13.0 # ===== Authentication ===== python-jose[cryptography]>=3.3.0 passlib[bcrypt]>=1.7.4 bcrypt==4.2.1 # Pin to 4.2.1 for passlib compatibility python-multipart>=0.0.6 # ===== Configuration ===== python-dotenv>=1.0.0 pyyaml>=6.0 # ===== HTTP Client ===== httpx>=0.25.0 requests>=2.31.0 # ===== Background Tasks (Optional) ===== # redis>=5.0.0 # Uncomment if using Redis for task queue # celery>=5.3.0 # Uncomment if using Celery # ===== Translation ===== # Translation will use external API (to be implemented) # See openspec/changes/add-document-translation/ for proposal # ===== Development Tools ===== pytest>=7.4.0 pytest-asyncio>=0.21.0 pytest-cov>=4.1.0 black>=23.9.0 pylint>=3.0.0 # ===== Utilities ===== python-magic>=0.4.27 # File type detection beautifulsoup4>=4.12.0 # HTML table parsing for OCR track