fix: make torch import optional and add PaddlePaddle GPU memory management

Problem: - Backend failed to start with ModuleNotFoundError for torch module - torch was imported as hard dependency but not in requirements.txt - Project uses PaddlePaddle which has its own CUDA implementation Changes: - Make torch import optional with try/except in ocr_service.py - Make torch import optional in pp_structure_enhanced.py - Add cleanup_gpu_memory() method using PaddlePaddle's memory management - Add check_gpu_memory() method to monitor available GPU memory - Use paddle.device.cuda.empty_cache() for GPU cleanup - Use torch.cuda only if TORCH_AVAILABLE flag is True - Add cleanup calls after OCR processing to prevent OOM errors - Add memory checks before GPU-intensive operations Benefits: - Backend can start without torch installed - GPU memory is properly managed using PaddlePaddle - Optional torch support provides additional memory monitoring - Prevents GPU OOM errors during document processing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 16:40:44 +08:00
parent 7064ea30d5
commit b997f9355a
2 changed files with 121 additions and 0 deletions
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -9,12 +9,20 @@ from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Union
 from datetime import datetime
 import uuid
+import gc  # For garbage collection

 from paddleocr import PaddleOCR, PPStructureV3
 from PIL import Image
 from pdf2image import convert_from_path
 import paddle

+# Optional torch import for additional GPU memory management
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
 from app.core.config import settings
 from app.services.office_converter import OfficeConverter, OfficeConverterError

@@ -401,6 +409,78 @@ class OCRService:

        return self.structure_engine

+    def cleanup_gpu_memory(self):
+        """
+        Clean up GPU memory to prevent OOM errors.
+
+        This should be called after processing each document or batch.
+        Uses PaddlePaddle's built-in memory management and optionally torch if available.
+        """
+        try:
+            # Clear PyTorch GPU cache if torch is available
+            if TORCH_AVAILABLE and torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
+                logger.debug("Cleared PyTorch GPU cache")
+
+            # Clear PaddlePaddle GPU cache
+            if paddle.device.is_compiled_with_cuda():
+                paddle.device.cuda.empty_cache()
+                logger.debug("Cleared PaddlePaddle GPU cache")
+
+            # Force garbage collection
+            gc.collect()
+
+            # Log current GPU memory status
+            if TORCH_AVAILABLE and torch.cuda.is_available():
+                allocated_mb = torch.cuda.memory_allocated() / 1024**2
+                reserved_mb = torch.cuda.memory_reserved() / 1024**2
+                logger.debug(f"GPU memory after cleanup - Allocated: {allocated_mb:.1f}MB, Reserved: {reserved_mb:.1f}MB")
+
+        except Exception as e:
+            logger.warning(f"GPU memory cleanup failed (non-critical): {e}")
+            # Don't fail the processing if cleanup fails
+
+    def check_gpu_memory(self, required_mb: int = 2000) -> bool:
+        """
+        Check if sufficient GPU memory is available.
+
+        Args:
+            required_mb: Required memory in MB (default 2000MB for OCR models)
+
+        Returns:
+            True if sufficient memory is available or GPU is not used
+        """
+        try:
+            # Check GPU memory using torch if available, otherwise use PaddlePaddle
+            free_memory = None
+
+            if TORCH_AVAILABLE and torch.cuda.is_available():
+                free_memory = torch.cuda.mem_get_info()[0] / 1024**2
+            elif paddle.device.is_compiled_with_cuda():
+                # PaddlePaddle doesn't have direct API to get free memory,
+                # so we rely on cleanup and continue
+                logger.debug("Using PaddlePaddle GPU, memory info not directly available")
+                return True
+
+            if free_memory is not None:
+                if free_memory < required_mb:
+                    logger.warning(f"Low GPU memory: {free_memory:.0f}MB available, {required_mb}MB required")
+                    # Try to free memory
+                    self.cleanup_gpu_memory()
+                    # Check again
+                    if TORCH_AVAILABLE and torch.cuda.is_available():
+                        free_memory = torch.cuda.mem_get_info()[0] / 1024**2
+                        if free_memory < required_mb:
+                            logger.error(f"Insufficient GPU memory after cleanup: {free_memory:.0f}MB")
+                            return False
+                logger.debug(f"GPU memory check passed: {free_memory:.0f}MB available")
+
+            return True
+        except Exception as e:
+            logger.warning(f"GPU memory check failed: {e}")
+            return True  # Continue processing even if check fails
+
    def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]:
        """
        Convert PDF to images (one per page)
@@ -587,6 +667,10 @@ class OCRService:
            # Get OCR engine (for non-PDF images)
            ocr_engine = self.get_ocr_engine(lang)

+            # Check GPU memory before OCR processing
+            if not self.check_gpu_memory(required_mb=1500):
+                logger.warning("Insufficient GPU memory for OCR, attempting to proceed anyway")
+
            # Get the actual image dimensions that OCR will use
            from PIL import Image
            with Image.open(image_path) as img:
@@ -686,6 +770,9 @@ class OCRService:
                f"{processing_time:.2f}s"
            )

+            # Clean up GPU memory after processing
+            self.cleanup_gpu_memory()
+
            return result

        except Exception as e:
@@ -804,6 +891,8 @@ class OCRService:
                            'bbox': elem['bbox']
                        })

+                    # Clean up GPU memory after enhanced processing
+                    self.cleanup_gpu_memory()
                    return layout_data, images_metadata
                else:
                    logger.info("parsing_res_list not available, using standard processing")
@@ -815,6 +904,11 @@ class OCRService:

            # Standard processing (original implementation)
            logger.info(f"Running standard layout analysis on {image_path.name}")
+
+            # Check GPU memory before processing
+            if not self.check_gpu_memory(required_mb=2000):
+                logger.warning("Insufficient GPU memory for PP-StructureV3, attempting to proceed anyway")
+
            results = structure_engine.predict(str(image_path))

            layout_elements = []
@@ -910,6 +1004,8 @@ class OCRService:
                    'reading_order': list(range(len(layout_elements))),
                }
                logger.info(f"Detected {len(layout_elements)} layout elements")
+                # Clean up GPU memory after standard processing
+                self.cleanup_gpu_memory()
                return layout_data, images_metadata
            else:
                logger.warning("No layout elements detected")
@@ -1135,6 +1231,10 @@ class OCRService:
            # Combine results
            combined_result = self._combine_results(all_results)
            combined_result['filename'] = file_path.name
+
+            # Clean up GPU memory after processing all pages
+            self.cleanup_gpu_memory()
+
            return combined_result

        else:
--- a/backend/app/services/pp_structure_enhanced.py
+++ b/backend/app/services/pp_structure_enhanced.py
@@ -9,7 +9,16 @@ import logging
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Any
 import json
+import gc

+# Optional torch import for additional GPU memory management
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
+import paddle
 from paddleocr import PPStructureV3
 from app.models.unified_document import ElementType

@@ -155,6 +164,18 @@ class PPStructureEnhanced:
            logger.error(f"Enhanced PP-StructureV3 analysis error: {e}")
            import traceback
            traceback.print_exc()
+
+            # Clean up GPU memory on error
+            try:
+                if TORCH_AVAILABLE and torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                    torch.cuda.synchronize()
+                if paddle.device.is_compiled_with_cuda():
+                    paddle.device.cuda.empty_cache()
+                gc.collect()
+            except:
+                pass  # Ignore cleanup errors
+
            return {
                'elements': [],
                'total_elements': 0,