fix: make torch import optional and add PaddlePaddle GPU memory management

Problem: - Backend failed to start with ModuleNotFoundError for torch module - torch was imported as hard dependency but not in requirements.txt - Project uses PaddlePaddle which has its own CUDA implementation Changes: - Make torch import optional with try/except in ocr_service.py - Make torch import optional in pp_structure_enhanced.py - Add cleanup_gpu_memory() method using PaddlePaddle's memory management - Add check_gpu_memory() method to monitor available GPU memory - Use paddle.device.cuda.empty_cache() for GPU cleanup - Use torch.cuda only if TORCH_AVAILABLE flag is True - Add cleanup calls after OCR processing to prevent OOM errors - Add memory checks before GPU-intensive operations Benefits: - Backend can start without torch installed - GPU memory is properly managed using PaddlePaddle - Optional torch support provides additional memory monitoring - Prevents GPU OOM errors during document processing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 16:40:44 +08:00
parent 7064ea30d5
commit b997f9355a
2 changed files with 121 additions and 0 deletions
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -9,12 +9,20 @@ from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Union
 from datetime import datetime
 import uuid
 import gc  # For garbage collection
 from paddleocr import PaddleOCR, PPStructureV3
 from PIL import Image
 from pdf2image import convert_from_path
 import paddle
 # Optional torch import for additional GPU memory management
 try:
    import torch
    TORCH_AVAILABLE = True
 except ImportError:
    TORCH_AVAILABLE = False
 from app.core.config import settings
 from app.services.office_converter import OfficeConverter, OfficeConverterError
@@ -401,6 +409,78 @@ class OCRService:
        return self.structure_engine
    def cleanup_gpu_memory(self):
        """
        Clean up GPU memory to prevent OOM errors.
        This should be called after processing each document or batch.
        Uses PaddlePaddle's built-in memory management and optionally torch if available.
        """
        try:
            # Clear PyTorch GPU cache if torch is available
            if TORCH_AVAILABLE and torch.cuda.is_available():
                torch.cuda.empty_cache()
                torch.cuda.synchronize()
                logger.debug("Cleared PyTorch GPU cache")
            # Clear PaddlePaddle GPU cache
            if paddle.device.is_compiled_with_cuda():
                paddle.device.cuda.empty_cache()
                logger.debug("Cleared PaddlePaddle GPU cache")
            # Force garbage collection
            gc.collect()
            # Log current GPU memory status
            if TORCH_AVAILABLE and torch.cuda.is_available():
                allocated_mb = torch.cuda.memory_allocated() / 1024**2
                reserved_mb = torch.cuda.memory_reserved() / 1024**2
                logger.debug(f"GPU memory after cleanup - Allocated: {allocated_mb:.1f}MB, Reserved: {reserved_mb:.1f}MB")
        except Exception as e:
            logger.warning(f"GPU memory cleanup failed (non-critical): {e}")
            # Don't fail the processing if cleanup fails
    def check_gpu_memory(self, required_mb: int = 2000) -> bool:
        """
        Check if sufficient GPU memory is available.
        Args:
            required_mb: Required memory in MB (default 2000MB for OCR models)
        Returns:
            True if sufficient memory is available or GPU is not used
        """
        try:
            # Check GPU memory using torch if available, otherwise use PaddlePaddle
            free_memory = None
            if TORCH_AVAILABLE and torch.cuda.is_available():
                free_memory = torch.cuda.mem_get_info()[0] / 1024**2
            elif paddle.device.is_compiled_with_cuda():
                # PaddlePaddle doesn't have direct API to get free memory,
                # so we rely on cleanup and continue
                logger.debug("Using PaddlePaddle GPU, memory info not directly available")
                return True
            if free_memory is not None:
                if free_memory < required_mb:
                    logger.warning(f"Low GPU memory: {free_memory:.0f}MB available, {required_mb}MB required")
                    # Try to free memory
                    self.cleanup_gpu_memory()
                    # Check again
                    if TORCH_AVAILABLE and torch.cuda.is_available():
                        free_memory = torch.cuda.mem_get_info()[0] / 1024**2
                        if free_memory < required_mb:
                            logger.error(f"Insufficient GPU memory after cleanup: {free_memory:.0f}MB")
                            return False
                logger.debug(f"GPU memory check passed: {free_memory:.0f}MB available")
            return True
        except Exception as e:
            logger.warning(f"GPU memory check failed: {e}")
            return True  # Continue processing even if check fails
    def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]:
        """
        Convert PDF to images (one per page)
@@ -587,6 +667,10 @@ class OCRService:
            # Get OCR engine (for non-PDF images)
            ocr_engine = self.get_ocr_engine(lang)
            # Check GPU memory before OCR processing
            if not self.check_gpu_memory(required_mb=1500):
                logger.warning("Insufficient GPU memory for OCR, attempting to proceed anyway")
            # Get the actual image dimensions that OCR will use
            from PIL import Image
            with Image.open(image_path) as img:
@@ -686,6 +770,9 @@ class OCRService:
                f"{processing_time:.2f}s"
            )
            # Clean up GPU memory after processing
            self.cleanup_gpu_memory()
            return result
        except Exception as e:
@@ -804,6 +891,8 @@ class OCRService:
                            'bbox': elem['bbox']
                        })
                    # Clean up GPU memory after enhanced processing
                    self.cleanup_gpu_memory()
                    return layout_data, images_metadata
                else:
                    logger.info("parsing_res_list not available, using standard processing")
@@ -815,6 +904,11 @@ class OCRService:
            # Standard processing (original implementation)
            logger.info(f"Running standard layout analysis on {image_path.name}")
            # Check GPU memory before processing
            if not self.check_gpu_memory(required_mb=2000):
                logger.warning("Insufficient GPU memory for PP-StructureV3, attempting to proceed anyway")
            results = structure_engine.predict(str(image_path))
            layout_elements = []
@@ -910,6 +1004,8 @@ class OCRService:
                    'reading_order': list(range(len(layout_elements))),
                }
                logger.info(f"Detected {len(layout_elements)} layout elements")
                # Clean up GPU memory after standard processing
                self.cleanup_gpu_memory()
                return layout_data, images_metadata
            else:
                logger.warning("No layout elements detected")
@@ -1135,6 +1231,10 @@ class OCRService:
            # Combine results
            combined_result = self._combine_results(all_results)
            combined_result['filename'] = file_path.name
            # Clean up GPU memory after processing all pages
            self.cleanup_gpu_memory()
            return combined_result
        else:
--- a/backend/app/services/pp_structure_enhanced.py
+++ b/backend/app/services/pp_structure_enhanced.py
@@ -9,7 +9,16 @@ import logging
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Any
 import json
 import gc
 # Optional torch import for additional GPU memory management
 try:
    import torch
    TORCH_AVAILABLE = True
 except ImportError:
    TORCH_AVAILABLE = False
 import paddle
 from paddleocr import PPStructureV3
 from app.models.unified_document import ElementType
@@ -155,6 +164,18 @@ class PPStructureEnhanced:
            logger.error(f"Enhanced PP-StructureV3 analysis error: {e}")
            import traceback
            traceback.print_exc()
            # Clean up GPU memory on error
            try:
                if TORCH_AVAILABLE and torch.cuda.is_available():
                    torch.cuda.empty_cache()
                    torch.cuda.synchronize()
                if paddle.device.is_compiled_with_cuda():
                    paddle.device.cuda.empty_cache()
                gc.collect()
            except:
                pass  # Ignore cleanup errors
            return {
                'elements': [],
                'total_elements': 0,