feat: implement hybrid image extraction and memory management

Backend: - Add hybrid image extraction for Direct track (inline image blocks) - Add render_inline_image_regions() fallback when OCR doesn't find images - Add check_document_for_missing_images() for detecting missing images - Add memory management system (MemoryGuard, ModelManager, ServicePool) - Update pdf_generator_service to handle HYBRID processing track - Add ElementType.LOGO for logo extraction Frontend: - Fix PDF viewer re-rendering issues with memoization - Add TaskNotFound component and useTaskValidation hook - Disable StrictMode due to react-pdf incompatibility - Fix task detail and results page loading states 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 10:56:22 +08:00
parent ba8ddf2b68
commit 1afdb822c3
26 changed files with 8273 additions and 366 deletions
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -104,6 +104,37 @@ class Settings(BaseSettings):
    enable_cudnn_benchmark: bool = Field(default=True)  # Optimize convolution algorithms
    num_threads: int = Field(default=4)  # CPU threads for preprocessing

+    # ===== Enhanced Memory Management Configuration =====
+    # Memory thresholds (as ratio of total GPU memory)
+    memory_warning_threshold: float = Field(default=0.80)  # 80% - start warning
+    memory_critical_threshold: float = Field(default=0.95)  # 95% - throttle operations
+    memory_emergency_threshold: float = Field(default=0.98)  # 98% - emergency cleanup
+
+    # Memory monitoring
+    memory_check_interval_seconds: int = Field(default=30)  # Background check interval
+    enable_memory_alerts: bool = Field(default=True)  # Enable memory alerts
+
+    # Model lifecycle management
+    enable_model_lifecycle_management: bool = Field(default=True)  # Use ModelManager
+    pp_structure_idle_timeout_seconds: int = Field(default=300)  # Unload PP-Structure after idle
+    structure_model_memory_mb: int = Field(default=2000)  # Estimated memory for PP-StructureV3
+    ocr_model_memory_mb: int = Field(default=500)  # Estimated memory per OCR language model
+
+    # Service pool configuration
+    enable_service_pool: bool = Field(default=True)  # Use OCRServicePool
+    max_services_per_device: int = Field(default=1)  # Max OCRService per GPU
+    max_total_services: int = Field(default=2)  # Max total OCRService instances
+    service_acquire_timeout_seconds: float = Field(default=300.0)  # Timeout for acquiring service
+    max_queue_size: int = Field(default=50)  # Max pending tasks per device
+
+    # Concurrency control
+    max_concurrent_predictions: int = Field(default=2)  # Max concurrent PP-StructureV3 predictions
+    enable_cpu_fallback: bool = Field(default=True)  # Fall back to CPU when GPU memory low
+
+    # Emergency recovery
+    enable_emergency_cleanup: bool = Field(default=True)  # Auto-cleanup on memory pressure
+    enable_worker_restart: bool = Field(default=False)  # Restart workers on OOM (requires supervisor)
+
    # ===== File Upload Configuration =====
    max_upload_size: int = Field(default=52428800)  # 50MB
    allowed_extensions: str = Field(default="png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx")