OCR/backend/app/core/config.py

"""
Tool_OCR - Configuration Management
Loads environment variables and provides centralized configuration
"""

from typing import List, Optional
from pydantic_settings import BaseSettings
from pydantic import Field
from pathlib import Path


class Settings(BaseSettings):
    """Application settings loaded from environment variables"""

    # ===== Database Configuration =====
    mysql_host: str = Field(default="mysql.theaken.com")
    mysql_port: int = Field(default=33306)
    mysql_user: str = Field(default="A060")
    mysql_password: str = Field(default="")
    mysql_database: str = Field(default="db_A060")

    @property
    def database_url(self) -> str:
        """Construct SQLAlchemy database URL"""
        return (
            f"mysql+pymysql://{self.mysql_user}:{self.mysql_password}"
            f"@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
        )

    # ===== Application Configuration =====
    backend_port: int = Field(default=12010)
    frontend_port: int = Field(default=12011)
    secret_key: str = Field(default="your-secret-key-change-this")
    algorithm: str = Field(default="HS256")
    access_token_expire_minutes: int = Field(default=1440)  # 24 hours

    # ===== External Authentication Configuration =====
    external_auth_api_url: str = Field(default="https://pj-auth-api.vercel.app")
    external_auth_endpoint: str = Field(default="/api/auth/login")
    external_auth_timeout: int = Field(default=30)
    token_refresh_buffer: int = Field(default=300)  # Refresh tokens 5 minutes before expiry

    @property
    def external_auth_full_url(self) -> str:
        """Construct full external authentication URL"""
        return f"{self.external_auth_api_url.rstrip('/')}{self.external_auth_endpoint}"

    # ===== Task Management Configuration =====
    database_table_prefix: str = Field(default="tool_ocr_")
    enable_task_history: bool = Field(default=True)
    task_retention_days: int = Field(default=30)
    max_tasks_per_user: int = Field(default=1000)

    # ===== OCR Configuration =====
    paddleocr_model_dir: str = Field(default="./models/paddleocr")
    ocr_languages: str = Field(default="ch,en,japan,korean")
    ocr_confidence_threshold: float = Field(default=0.5)
    max_ocr_workers: int = Field(default=4)

    @property
    def ocr_languages_list(self) -> List[str]:
        """Get OCR languages as list"""
        return [lang.strip() for lang in self.ocr_languages.split(",")]

    # ===== GPU Acceleration Configuration =====
    # Basic GPU settings
    force_cpu_mode: bool = Field(default=False)
    gpu_memory_fraction: float = Field(default=0.7)  # Optimized for RTX 4060 8GB
    gpu_device_id: int = Field(default=0)

    # Memory management for RTX 4060 8GB
    gpu_memory_limit_mb: int = Field(default=6144)  # 6GB max for models (leave 2GB buffer)
    gpu_memory_reserve_mb: int = Field(default=512)  # Reserve for CUDA overhead
    enable_memory_optimization: bool = Field(default=True)

    # Model loading and caching
    enable_lazy_model_loading: bool = Field(default=True)  # Load models on demand
    enable_model_cache: bool = Field(default=True)
    model_cache_limit_mb: int = Field(default=4096)  # Max 4GB for cached models
    auto_unload_unused_models: bool = Field(default=True)  # Unload unused language models
    model_idle_timeout_seconds: int = Field(default=300)  # Unload after 5 min idle

    # Batch processing configuration
    enable_batch_processing: bool = Field(default=True)
    inference_batch_size: int = Field(default=1)  # Conservative for 8GB VRAM
    max_concurrent_pages: int = Field(default=2)  # Process 2 pages concurrently

    # PP-StructureV3 optimization
    enable_chart_recognition: bool = Field(default=True)  # Chart/diagram recognition
    enable_formula_recognition: bool = Field(default=True)  # Math formula recognition
    enable_table_recognition: bool = Field(default=True)  # Table structure recognition
    enable_seal_recognition: bool = Field(default=True)  # Seal/stamp recognition
    enable_text_recognition: bool = Field(default=True)  # General text recognition

    # PP-StructureV3 Preprocessing (Stage 1)
    use_doc_orientation_classify: bool = Field(default=True)  # Auto-detect and correct document rotation
    use_doc_unwarping: bool = Field(default=True)  # Correct document warping from photos
    use_textline_orientation: bool = Field(default=True)  # Detect textline orientation
    layout_detection_threshold: float = Field(default=0.2)  # Lower threshold for more sensitive detection
    layout_nms_threshold: float = Field(default=0.2)  # Lower NMS to preserve more individual elements
    layout_merge_mode: str = Field(default="small")  # Use 'small' to minimize bbox merging
    layout_unclip_ratio: float = Field(default=1.2)  # Smaller unclip to preserve element boundaries
    text_det_thresh: float = Field(default=0.2)  # More sensitive text detection
    text_det_box_thresh: float = Field(default=0.3)  # Lower box threshold for better detection
    text_det_unclip_ratio: float = Field(default=1.2)  # Smaller unclip for tighter text boxes

    # Layout Detection Model Configuration (Stage 3)
    # Available models:
    # - None (default): Use PP-StructureV3's built-in model (PubLayNet-based)
    # - "PP-DocLayout_plus-L": Best for Chinese docs (83.2% mAP, 20 categories) - complex layouts
    # - "PP-DocLayout-L": High accuracy (90.4% mAP, 23 categories) - general purpose
    # - "picodet_lcnet_x1_0_fgd_layout_cdla": CDLA-based model for Chinese document layout
    layout_detection_model_name: Optional[str] = Field(
        default="PP-DocLayout_plus-L",
        description="Layout detection model name. PP-DocLayout_plus-L recommended for complex Chinese documents."
    )
    layout_detection_model_dir: Optional[str] = Field(
        default=None,
        description="Custom layout detection model directory. If None, downloads official model."
    )

    # Table Structure Recognition Model Configuration (Stage 4)
    # PP-StructureV3 uses separate models for wired (bordered) and wireless (borderless) tables
    # Both models should be configured for comprehensive table detection
    # Available models:
    # - "SLANeXt_wired": Best for wired/bordered tables (69.65% accuracy, 351MB)
    # - "SLANeXt_wireless": Best for wireless/borderless tables (69.65% accuracy, 351MB)
    # - "SLANet": Legacy model (59.52% accuracy, 6.9MB)
    # - "SLANet_plus": Improved legacy (63.69% accuracy, 6.9MB)
    wired_table_model_name: Optional[str] = Field(
        default="SLANeXt_wired",
        description="Table structure model for bordered tables. SLANeXt_wired recommended."
    )
    wireless_table_model_name: Optional[str] = Field(
        default="SLANeXt_wireless",
        description="Table structure model for borderless tables. SLANeXt_wireless recommended."
    )

    # Formula Recognition Model Configuration (Stage 4)
    # Available models:
    # - "PP-FormulaNet_plus-L": Best for Chinese formulas (90.64% Chinese, 92.22% English BLEU)
    # - "PP-FormulaNet-L": Good for English formulas (90.36% English BLEU)
    # - "PP-FormulaNet-S": Fast inference (87% English BLEU)
    formula_recognition_model_name: Optional[str] = Field(
        default="PP-FormulaNet_plus-L",
        description="Formula recognition model. PP-FormulaNet_plus-L recommended for Chinese formula support."
    )

    # ===== Layout Preprocessing Configuration =====
    # Image preprocessing to enhance layout detection for documents with faint lines/borders
    # Preprocessing only affects layout detection input; original image is preserved for extraction
    layout_preprocessing_mode: str = Field(
        default="auto",
        description="Preprocessing mode: 'auto' (analyze and apply), 'manual' (use config), 'disabled'"
    )
    layout_preprocessing_contrast: str = Field(
        default="clahe",
        description="Contrast enhancement method: 'none', 'histogram', 'clahe' (recommended)"
    )
    layout_preprocessing_sharpen: bool = Field(
        default=True,
        description="Enable sharpening to enhance faint lines and borders"
    )
    layout_preprocessing_binarize: bool = Field(
        default=False,
        description="Enable binarization (aggressive, use for very low contrast documents only)"
    )
    # Auto-detection thresholds
    layout_preprocessing_contrast_threshold: float = Field(
        default=40.0,
        description="Contrast (std dev) below this triggers CLAHE in auto mode"
    )
    layout_preprocessing_edge_threshold: float = Field(
        default=15.0,
        description="Edge strength below this triggers sharpening in auto mode"
    )
    layout_preprocessing_binarize_threshold: float = Field(
        default=20.0,
        description="Contrast below this triggers binarization in auto mode"
    )

    # ===== Gap Filling Configuration =====
    # Supplements PP-StructureV3 output with raw OCR regions when detection is incomplete
    gap_filling_enabled: bool = Field(default=True)  # Enable gap filling for OCR track
    gap_filling_coverage_threshold: float = Field(default=0.7)  # Activate when coverage < 70%
    gap_filling_iou_threshold: float = Field(default=0.15)  # IoU threshold for coverage detection
    gap_filling_confidence_threshold: float = Field(default=0.3)  # Min confidence for raw OCR regions
    gap_filling_dedup_iou_threshold: float = Field(default=0.5)  # IoU threshold for deduplication

    # ===== Debug Configuration =====
    # Enable debug outputs for PP-StructureV3 analysis
    pp_structure_debug_enabled: bool = Field(default=True)  # Save debug files for PP-StructureV3
    pp_structure_debug_visualization: bool = Field(default=True)  # Generate visualization images

    # Performance tuning
    use_fp16_inference: bool = Field(default=False)  # Half-precision (if supported)
    enable_cudnn_benchmark: bool = Field(default=True)  # Optimize convolution algorithms
    num_threads: int = Field(default=4)  # CPU threads for preprocessing

    # ===== Enhanced Memory Management Configuration =====
    # Memory thresholds (as ratio of total GPU memory)
    memory_warning_threshold: float = Field(default=0.80)  # 80% - start warning
    memory_critical_threshold: float = Field(default=0.95)  # 95% - throttle operations
    memory_emergency_threshold: float = Field(default=0.98)  # 98% - emergency cleanup

    # Memory monitoring
    memory_check_interval_seconds: int = Field(default=30)  # Background check interval
    enable_memory_alerts: bool = Field(default=True)  # Enable memory alerts

    # Model lifecycle management
    enable_model_lifecycle_management: bool = Field(default=True)  # Use ModelManager
    pp_structure_idle_timeout_seconds: int = Field(default=300)  # Unload PP-Structure after idle
    structure_model_memory_mb: int = Field(default=2000)  # Estimated memory for PP-StructureV3
    ocr_model_memory_mb: int = Field(default=500)  # Estimated memory per OCR language model

    # Service pool configuration
    enable_service_pool: bool = Field(default=True)  # Use OCRServicePool
    max_services_per_device: int = Field(default=1)  # Max OCRService per GPU
    max_total_services: int = Field(default=2)  # Max total OCRService instances
    service_acquire_timeout_seconds: float = Field(default=300.0)  # Timeout for acquiring service
    max_queue_size: int = Field(default=50)  # Max pending tasks per device

    # Concurrency control
    max_concurrent_predictions: int = Field(default=2)  # Max concurrent PP-StructureV3 predictions
    enable_cpu_fallback: bool = Field(default=True)  # Fall back to CPU when GPU memory low

    # Emergency recovery
    enable_emergency_cleanup: bool = Field(default=True)  # Auto-cleanup on memory pressure
    enable_worker_restart: bool = Field(default=False)  # Restart workers on OOM (requires supervisor)

    # ===== File Upload Configuration =====
    max_upload_size: int = Field(default=52428800)  # 50MB
    allowed_extensions: str = Field(default="png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx")
    upload_dir: str = Field(default="./uploads")
    temp_dir: str = Field(default="./uploads/temp")
    processed_dir: str = Field(default="./uploads/processed")
    images_dir: str = Field(default="./uploads/images")

    @property
    def allowed_extensions_list(self) -> List[str]:
        """Get allowed extensions as list"""
        return [ext.strip() for ext in self.allowed_extensions.split(",")]

    # ===== Export Configuration =====
    storage_dir: str = Field(default="./storage")
    markdown_dir: str = Field(default="./storage/markdown")
    json_dir: str = Field(default="./storage/json")
    exports_dir: str = Field(default="./storage/exports")
    result_dir: str = Field(default="./storage/results")

    # ===== PDF Generation Configuration =====
    pandoc_path: str = Field(default="/opt/homebrew/bin/pandoc")
    font_dir: str = Field(default="/System/Library/Fonts")
    pdf_page_size: str = Field(default="A4")
    pdf_margin_top: int = Field(default=20)
    pdf_margin_bottom: int = Field(default=20)
    pdf_margin_left: int = Field(default=20)
    pdf_margin_right: int = Field(default=20)

    # ===== Layout-Preserving PDF Configuration =====
    chinese_font_path: str = Field(default="./backend/fonts/NotoSansSC-Regular.ttf")
    pdf_font_size_base: int = Field(default=12)
    pdf_enable_bbox_debug: bool = Field(default=False)  # Draw bounding boxes for debugging

    # ===== Translation Configuration (Reserved) =====
    enable_translation: bool = Field(default=False)
    translation_engine: str = Field(default="offline")
    argostranslate_models_dir: str = Field(default="./models/argostranslate")

    # ===== Background Tasks Configuration =====
    task_queue_type: str = Field(default="memory")
    redis_url: str = Field(default="redis://localhost:6379/0")

    # ===== CORS Configuration =====
    cors_origins: str = Field(default="http://localhost:12011,http://127.0.0.1:12011")

    @property
    def cors_origins_list(self) -> List[str]:
        """Get CORS origins as list"""
        return [origin.strip() for origin in self.cors_origins.split(",")]

    # ===== Logging Configuration =====
    log_level: str = Field(default="INFO")
    log_file: str = Field(default="./logs/app.log")

    class Config:
        # Look for .env in project root (one level up from backend/)
        env_file = str(Path(__file__).resolve().parent.parent.parent.parent / ".env")
        env_file_encoding = "utf-8"
        case_sensitive = False

    def ensure_directories(self):
        """Create all necessary directories if they don't exist"""
        dirs = [
            self.upload_dir,
            self.temp_dir,
            self.processed_dir,
            self.images_dir,
            self.storage_dir,
            self.markdown_dir,
            self.json_dir,
            self.exports_dir,
            self.result_dir,
            self.paddleocr_model_dir,
            Path(self.log_file).parent,
        ]

        if self.enable_translation and self.translation_engine == "offline":
            dirs.append(self.argostranslate_models_dir)

        for dir_path in dirs:
            Path(dir_path).mkdir(parents=True, exist_ok=True)


# Global settings instance
settings = Settings()