feat: add GPU optimization and fix TableData consistency

GPU Optimization (Section 3.1): - Add comprehensive memory management for RTX 4060 8GB - Enable all recognition features (chart, formula, table, seal, text) - Implement model cache with auto-unload for idle models - Add memory monitoring and warning system Bug Fix (Section 3.3): - Fix TableData field inconsistency: 'columns' -> 'cols' - Remove invalid 'html' and 'extracted_text' parameters - Add proper TableCell conversion in _convert_table_data Documentation: - Add Future Improvements section for batch processing enhancement 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-19 09:17:27 +08:00
parent ecdce961ca
commit 8b9a364452
4 changed files with 205 additions and 24 deletions
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -84,8 +84,20 @@ class OCRService:
        self.use_gpu = False
        self.gpu_info = {}

+        # Model cache management for memory optimization
+        self._model_last_used = {}  # Track last usage time for each model
+        self._memory_warning_logged = False
+
        self._detect_and_configure_gpu()

+        # Log GPU optimization settings
+        if settings.enable_memory_optimization:
+            logger.info(f"GPU memory optimization enabled:")
+            logger.info(f"  - Memory limit: {settings.gpu_memory_limit_mb}MB")
+            logger.info(f"  - Model cache limit: {settings.model_cache_limit_mb}MB")
+            logger.info(f"  - Batch size: {settings.inference_batch_size}")
+            logger.info(f"  - Auto-unload unused models: {settings.auto_unload_unused_models}")
+
        logger.info("OCR Service initialized")

    def _detect_and_configure_gpu(self):
@@ -194,6 +206,79 @@ class OCRService:

        return status

+    def _check_gpu_memory_usage(self):
+        """
+        Check GPU memory usage and log warnings if approaching limits.
+        Implements memory optimization for RTX 4060 8GB.
+        """
+        if not self.use_gpu or not settings.enable_memory_optimization:
+            return
+
+        try:
+            device_id = self.gpu_info.get('device_id', 0)
+            memory_allocated = paddle.device.cuda.memory_allocated(device_id)
+            memory_allocated_mb = memory_allocated / (1024**2)
+            memory_limit_mb = settings.gpu_memory_limit_mb
+
+            utilization = (memory_allocated_mb / memory_limit_mb * 100) if memory_limit_mb > 0 else 0
+
+            if utilization > 90 and not self._memory_warning_logged:
+                logger.warning(f"GPU memory usage high: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")
+                logger.warning("Consider enabling auto_unload_unused_models or reducing batch size")
+                self._memory_warning_logged = True
+            elif utilization > 75:
+                logger.info(f"GPU memory: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")
+
+        except Exception as e:
+            logger.debug(f"Memory check failed: {e}")
+
+    def _cleanup_unused_models(self):
+        """
+        Clean up unused language models to free GPU memory.
+        Models idle longer than model_idle_timeout_seconds will be unloaded.
+        """
+        if not settings.auto_unload_unused_models:
+            return
+
+        current_time = datetime.now()
+        timeout = settings.model_idle_timeout_seconds
+        models_to_remove = []
+
+        for lang, last_used in self._model_last_used.items():
+            if lang == 'structure':  # Don't unload structure engine
+                continue
+            idle_seconds = (current_time - last_used).total_seconds()
+            if idle_seconds > timeout:
+                models_to_remove.append(lang)
+
+        for lang in models_to_remove:
+            if lang in self.ocr_engines:
+                logger.info(f"Unloading idle OCR engine for {lang} (idle {timeout}s)")
+                del self.ocr_engines[lang]
+                del self._model_last_used[lang]
+
+        if models_to_remove and self.use_gpu:
+            # Clear CUDA cache
+            try:
+                paddle.device.cuda.empty_cache()
+                logger.info(f"Cleared CUDA cache after unloading {len(models_to_remove)} models")
+            except Exception as e:
+                logger.debug(f"Cache clear failed: {e}")
+
+    def clear_gpu_cache(self):
+        """
+        Manually clear GPU memory cache.
+        Useful after processing large documents.
+        """
+        if not self.use_gpu:
+            return
+
+        try:
+            paddle.device.cuda.empty_cache()
+            logger.info("GPU cache cleared")
+        except Exception as e:
+            logger.warning(f"Failed to clear GPU cache: {e}")
+
    def get_ocr_engine(self, lang: str = 'ch') -> PaddleOCR:
        """
        Get or create OCR engine for specified language with GPU support
@@ -204,6 +289,10 @@ class OCRService:
        Returns:
            PaddleOCR engine instance
        """
+        # Clean up unused models before loading new ones (memory optimization)
+        if settings.auto_unload_unused_models:
+            self._cleanup_unused_models()
+
        if lang not in self.ocr_engines:
            logger.info(f"Initializing PaddleOCR engine for language: {lang} (GPU: {self.use_gpu})")

@@ -214,8 +303,16 @@ class OCRService:
                    lang=lang,
                    use_textline_orientation=True,  # Replaces deprecated use_angle_cls
                )
+
+                # Track model loading for cache management
+                self._model_last_used[lang] = datetime.now()
+
                logger.info(f"PaddleOCR engine ready for {lang} (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")

+                # Check GPU memory after loading
+                if self.use_gpu and settings.enable_memory_optimization:
+                    self._check_gpu_memory_usage()
+
            except Exception as e:
                # If GPU initialization fails, fall back to CPU
                if self.use_gpu:
@@ -227,9 +324,13 @@ class OCRService:
                        lang=lang,
                        use_textline_orientation=True,
                    )
+                    self._model_last_used[lang] = datetime.now()
                    logger.info(f"PaddleOCR engine ready for {lang} (CPU mode - fallback)")
                else:
                    raise
+        else:
+            # Update last used time for existing engine
+            self._model_last_used[lang] = datetime.now()

        return self.ocr_engines[lang]

@@ -245,18 +346,33 @@ class OCRService:

            try:
                # PaddleOCR 3.x: Device is set globally via paddle.set_device()
-                # No need to pass device/use_gpu/gpu_mem parameters
+                # Use configuration settings for memory optimization
+                use_chart = settings.enable_chart_recognition
+                use_formula = settings.enable_formula_recognition
+                use_table = settings.enable_table_recognition
+                layout_threshold = settings.layout_detection_threshold
+
+                logger.info(f"PP-StructureV3 config: table={use_table}, formula={use_formula}, chart={use_chart}")
+
                self.structure_engine = PPStructureV3(
                    use_doc_orientation_classify=False,
                    use_doc_unwarping=False,
                    use_textline_orientation=False,
-                    use_table_recognition=True,
-                    use_formula_recognition=True,
-                    use_chart_recognition=True,  # Enable chart recognition (requires PaddlePaddle >= 3.2.0 for fused_rms_norm_ext)
-                    layout_threshold=0.5,
+                    use_table_recognition=use_table,
+                    use_formula_recognition=use_formula,
+                    use_chart_recognition=use_chart,  # Disabled by default to save ~500MB VRAM
+                    layout_threshold=layout_threshold,
                )
+
+                # Track model loading for cache management
+                self._model_last_used['structure'] = datetime.now()
+
                logger.info(f"PP-StructureV3 engine ready (PaddlePaddle {paddle.__version__}, {'GPU' if self.use_gpu else 'CPU'} mode)")

+                # Check GPU memory after loading
+                if self.use_gpu and settings.enable_memory_optimization:
+                    self._check_gpu_memory_usage()
+
            except Exception as e:
                # If GPU initialization fails, fall back to CPU
                if self.use_gpu:
@@ -264,14 +380,20 @@ class OCRService:
                    self.use_gpu = False
                    # Switch to CPU device globally
                    paddle.set_device('cpu')
+
+                    use_chart = settings.enable_chart_recognition
+                    use_formula = settings.enable_formula_recognition
+                    use_table = settings.enable_table_recognition
+                    layout_threshold = settings.layout_detection_threshold
+
                    self.structure_engine = PPStructureV3(
                        use_doc_orientation_classify=False,
                        use_doc_unwarping=False,
                        use_textline_orientation=False,
-                        use_table_recognition=True,
-                        use_formula_recognition=True,
-                        use_chart_recognition=True,  # Enable chart recognition (CPU fallback mode)
-                        layout_threshold=0.5,
+                        use_table_recognition=use_table,
+                        use_formula_recognition=use_formula,
+                        use_chart_recognition=use_chart,
+                        layout_threshold=layout_threshold,
                    )
                    logger.info("PP-StructureV3 engine ready (CPU mode - fallback)")
                else: