This commit is contained in:
beabigegg
2025-11-12 22:53:17 +08:00
commit da700721fa
130 changed files with 23393 additions and 0 deletions

126
backend/app/core/config.py Normal file
View File

@@ -0,0 +1,126 @@
"""
Tool_OCR - Configuration Management
Loads environment variables and provides centralized configuration
"""
from typing import List
from pydantic_settings import BaseSettings
from pydantic import Field
from pathlib import Path
class Settings(BaseSettings):
"""Application settings loaded from environment variables"""
# ===== Database Configuration =====
mysql_host: str = Field(default="mysql.theaken.com")
mysql_port: int = Field(default=33306)
mysql_user: str = Field(default="A060")
mysql_password: str = Field(default="")
mysql_database: str = Field(default="db_A060")
@property
def database_url(self) -> str:
"""Construct SQLAlchemy database URL"""
return (
f"mysql+pymysql://{self.mysql_user}:{self.mysql_password}"
f"@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
)
# ===== Application Configuration =====
backend_port: int = Field(default=12010)
frontend_port: int = Field(default=12011)
secret_key: str = Field(default="your-secret-key-change-this")
algorithm: str = Field(default="HS256")
access_token_expire_minutes: int = Field(default=1440) # 24 hours
# ===== OCR Configuration =====
paddleocr_model_dir: str = Field(default="./models/paddleocr")
ocr_languages: str = Field(default="ch,en,japan,korean")
ocr_confidence_threshold: float = Field(default=0.5)
max_ocr_workers: int = Field(default=4)
@property
def ocr_languages_list(self) -> List[str]:
"""Get OCR languages as list"""
return [lang.strip() for lang in self.ocr_languages.split(",")]
# ===== File Upload Configuration =====
max_upload_size: int = Field(default=52428800) # 50MB
allowed_extensions: str = Field(default="png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx")
upload_dir: str = Field(default="./uploads")
temp_dir: str = Field(default="./uploads/temp")
processed_dir: str = Field(default="./uploads/processed")
images_dir: str = Field(default="./uploads/images")
@property
def allowed_extensions_list(self) -> List[str]:
"""Get allowed extensions as list"""
return [ext.strip() for ext in self.allowed_extensions.split(",")]
# ===== Export Configuration =====
storage_dir: str = Field(default="./storage")
markdown_dir: str = Field(default="./storage/markdown")
json_dir: str = Field(default="./storage/json")
exports_dir: str = Field(default="./storage/exports")
# ===== PDF Generation Configuration =====
pandoc_path: str = Field(default="/opt/homebrew/bin/pandoc")
font_dir: str = Field(default="/System/Library/Fonts")
pdf_page_size: str = Field(default="A4")
pdf_margin_top: int = Field(default=20)
pdf_margin_bottom: int = Field(default=20)
pdf_margin_left: int = Field(default=20)
pdf_margin_right: int = Field(default=20)
# ===== Translation Configuration (Reserved) =====
enable_translation: bool = Field(default=False)
translation_engine: str = Field(default="offline")
argostranslate_models_dir: str = Field(default="./models/argostranslate")
# ===== Background Tasks Configuration =====
task_queue_type: str = Field(default="memory")
redis_url: str = Field(default="redis://localhost:6379/0")
# ===== CORS Configuration =====
cors_origins: str = Field(default="http://localhost:12011,http://127.0.0.1:12011")
@property
def cors_origins_list(self) -> List[str]:
"""Get CORS origins as list"""
return [origin.strip() for origin in self.cors_origins.split(",")]
# ===== Logging Configuration =====
log_level: str = Field(default="INFO")
log_file: str = Field(default="./logs/app.log")
class Config:
# Look for .env in project root (one level up from backend/)
env_file = str(Path(__file__).resolve().parent.parent.parent.parent / ".env")
env_file_encoding = "utf-8"
case_sensitive = False
def ensure_directories(self):
"""Create all necessary directories if they don't exist"""
dirs = [
self.upload_dir,
self.temp_dir,
self.processed_dir,
self.images_dir,
self.storage_dir,
self.markdown_dir,
self.json_dir,
self.exports_dir,
self.paddleocr_model_dir,
Path(self.log_file).parent,
]
if self.enable_translation and self.translation_engine == "offline":
dirs.append(self.argostranslate_models_dir)
for dir_path in dirs:
Path(dir_path).mkdir(parents=True, exist_ok=True)
# Global settings instance
settings = Settings()

View File

@@ -0,0 +1,41 @@
"""
Tool_OCR - Database Connection Management
SQLAlchemy setup with async support
"""
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from app.core.config import settings
# Create database engine
engine = create_engine(
settings.database_url,
pool_pre_ping=True, # Enable connection health checks
pool_size=10,
max_overflow=20,
echo=False, # Set to True for SQL query logging
)
# Create session factory
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
# Base class for all models
Base = declarative_base()
# Dependency to get database session
def get_db():
"""
Database session dependency for FastAPI endpoints
Usage:
@app.get("/endpoint")
def endpoint(db: Session = Depends(get_db)):
# Use db session here
"""
db = SessionLocal()
try:
yield db
finally:
db.close()

138
backend/app/core/deps.py Normal file
View File

@@ -0,0 +1,138 @@
"""
Tool_OCR - FastAPI Dependencies
Authentication and database session dependencies
"""
from typing import Generator, Optional
import logging
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from sqlalchemy.orm import Session
from app.core.database import SessionLocal
from app.core.security import decode_access_token
from app.models.user import User
logger = logging.getLogger(__name__)
# HTTP Bearer token security scheme
security = HTTPBearer()
def get_db() -> Generator:
"""
Database session dependency
Yields:
Session: SQLAlchemy database session
"""
db = SessionLocal()
try:
yield db
finally:
db.close()
def get_current_user(
credentials: HTTPAuthorizationCredentials = Depends(security),
db: Session = Depends(get_db)
) -> User:
"""
Get current authenticated user from JWT token
Args:
credentials: HTTP Bearer credentials
db: Database session
Returns:
User: Current user object
Raises:
HTTPException: If token is invalid or user not found
"""
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
headers={"WWW-Authenticate": "Bearer"},
)
# Extract token
token = credentials.credentials
# Decode token
payload = decode_access_token(token)
if payload is None:
raise credentials_exception
# Extract user ID from token (convert from string to int)
user_id_str: Optional[str] = payload.get("sub")
if user_id_str is None:
raise credentials_exception
try:
user_id: int = int(user_id_str)
except (ValueError, TypeError):
raise credentials_exception
# Query user from database
user = db.query(User).filter(User.id == user_id).first()
if user is None:
raise credentials_exception
# Check if user is active
if not user.is_active:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Inactive user"
)
return user
def get_current_active_user(
current_user: User = Depends(get_current_user)
) -> User:
"""
Get current active user
Args:
current_user: Current user from get_current_user
Returns:
User: Current active user
Raises:
HTTPException: If user is inactive
"""
if not current_user.is_active:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Inactive user"
)
return current_user
def get_current_admin_user(
current_user: User = Depends(get_current_user)
) -> User:
"""
Get current admin user
Args:
current_user: Current user from get_current_user
Returns:
User: Current admin user
Raises:
HTTPException: If user is not admin
"""
if not current_user.is_admin:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Not enough privileges"
)
return current_user

View File

@@ -0,0 +1,89 @@
"""
Tool_OCR - Security Utilities
JWT token generation and password hashing
"""
from datetime import datetime, timedelta
from typing import Optional
import logging
from jose import JWTError, jwt
from passlib.context import CryptContext
from app.core.config import settings
logger = logging.getLogger(__name__)
# Password hashing context
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
def verify_password(plain_password: str, hashed_password: str) -> bool:
"""
Verify a password against a hash
Args:
plain_password: Plain text password
hashed_password: Hashed password from database
Returns:
bool: True if password matches, False otherwise
"""
return pwd_context.verify(plain_password, hashed_password)
def get_password_hash(password: str) -> str:
"""
Hash a password
Args:
password: Plain text password
Returns:
str: Hashed password
"""
return pwd_context.hash(password)
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
"""
Create JWT access token
Args:
data: Data to encode in token (typically {"sub": user_id})
expires_delta: Optional expiration time delta
Returns:
str: Encoded JWT token
"""
to_encode = data.copy()
if expires_delta:
expire = datetime.utcnow() + expires_delta
else:
expire = datetime.utcnow() + timedelta(minutes=settings.access_token_expire_minutes)
to_encode.update({"exp": expire})
encoded_jwt = jwt.encode(to_encode, settings.secret_key, algorithm=settings.algorithm)
return encoded_jwt
def decode_access_token(token: str) -> Optional[dict]:
"""
Decode and verify JWT access token
Args:
token: JWT token string
Returns:
dict: Decoded token payload, or None if invalid
"""
try:
payload = jwt.decode(token, settings.secret_key, algorithms=[settings.algorithm])
return payload
except JWTError as e:
logger.warning(f"JWT decode error: {e}")
return None