diff --git a/backend/app/services/ocr_service.py b/backend/app/services/ocr_service.py index b64377f..db0b881 100644 --- a/backend/app/services/ocr_service.py +++ b/backend/app/services/ocr_service.py @@ -9,12 +9,20 @@ from pathlib import Path from typing import Dict, List, Optional, Tuple, Union from datetime import datetime import uuid +import gc # For garbage collection from paddleocr import PaddleOCR, PPStructureV3 from PIL import Image from pdf2image import convert_from_path import paddle +# Optional torch import for additional GPU memory management +try: + import torch + TORCH_AVAILABLE = True +except ImportError: + TORCH_AVAILABLE = False + from app.core.config import settings from app.services.office_converter import OfficeConverter, OfficeConverterError @@ -401,6 +409,78 @@ class OCRService: return self.structure_engine + def cleanup_gpu_memory(self): + """ + Clean up GPU memory to prevent OOM errors. + + This should be called after processing each document or batch. + Uses PaddlePaddle's built-in memory management and optionally torch if available. + """ + try: + # Clear PyTorch GPU cache if torch is available + if TORCH_AVAILABLE and torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.synchronize() + logger.debug("Cleared PyTorch GPU cache") + + # Clear PaddlePaddle GPU cache + if paddle.device.is_compiled_with_cuda(): + paddle.device.cuda.empty_cache() + logger.debug("Cleared PaddlePaddle GPU cache") + + # Force garbage collection + gc.collect() + + # Log current GPU memory status + if TORCH_AVAILABLE and torch.cuda.is_available(): + allocated_mb = torch.cuda.memory_allocated() / 1024**2 + reserved_mb = torch.cuda.memory_reserved() / 1024**2 + logger.debug(f"GPU memory after cleanup - Allocated: {allocated_mb:.1f}MB, Reserved: {reserved_mb:.1f}MB") + + except Exception as e: + logger.warning(f"GPU memory cleanup failed (non-critical): {e}") + # Don't fail the processing if cleanup fails + + def check_gpu_memory(self, required_mb: int = 2000) -> bool: + """ + Check if sufficient GPU memory is available. + + Args: + required_mb: Required memory in MB (default 2000MB for OCR models) + + Returns: + True if sufficient memory is available or GPU is not used + """ + try: + # Check GPU memory using torch if available, otherwise use PaddlePaddle + free_memory = None + + if TORCH_AVAILABLE and torch.cuda.is_available(): + free_memory = torch.cuda.mem_get_info()[0] / 1024**2 + elif paddle.device.is_compiled_with_cuda(): + # PaddlePaddle doesn't have direct API to get free memory, + # so we rely on cleanup and continue + logger.debug("Using PaddlePaddle GPU, memory info not directly available") + return True + + if free_memory is not None: + if free_memory < required_mb: + logger.warning(f"Low GPU memory: {free_memory:.0f}MB available, {required_mb}MB required") + # Try to free memory + self.cleanup_gpu_memory() + # Check again + if TORCH_AVAILABLE and torch.cuda.is_available(): + free_memory = torch.cuda.mem_get_info()[0] / 1024**2 + if free_memory < required_mb: + logger.error(f"Insufficient GPU memory after cleanup: {free_memory:.0f}MB") + return False + logger.debug(f"GPU memory check passed: {free_memory:.0f}MB available") + + return True + except Exception as e: + logger.warning(f"GPU memory check failed: {e}") + return True # Continue processing even if check fails + def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]: """ Convert PDF to images (one per page) @@ -587,6 +667,10 @@ class OCRService: # Get OCR engine (for non-PDF images) ocr_engine = self.get_ocr_engine(lang) + # Check GPU memory before OCR processing + if not self.check_gpu_memory(required_mb=1500): + logger.warning("Insufficient GPU memory for OCR, attempting to proceed anyway") + # Get the actual image dimensions that OCR will use from PIL import Image with Image.open(image_path) as img: @@ -686,6 +770,9 @@ class OCRService: f"{processing_time:.2f}s" ) + # Clean up GPU memory after processing + self.cleanup_gpu_memory() + return result except Exception as e: @@ -804,6 +891,8 @@ class OCRService: 'bbox': elem['bbox'] }) + # Clean up GPU memory after enhanced processing + self.cleanup_gpu_memory() return layout_data, images_metadata else: logger.info("parsing_res_list not available, using standard processing") @@ -815,6 +904,11 @@ class OCRService: # Standard processing (original implementation) logger.info(f"Running standard layout analysis on {image_path.name}") + + # Check GPU memory before processing + if not self.check_gpu_memory(required_mb=2000): + logger.warning("Insufficient GPU memory for PP-StructureV3, attempting to proceed anyway") + results = structure_engine.predict(str(image_path)) layout_elements = [] @@ -910,6 +1004,8 @@ class OCRService: 'reading_order': list(range(len(layout_elements))), } logger.info(f"Detected {len(layout_elements)} layout elements") + # Clean up GPU memory after standard processing + self.cleanup_gpu_memory() return layout_data, images_metadata else: logger.warning("No layout elements detected") @@ -1135,6 +1231,10 @@ class OCRService: # Combine results combined_result = self._combine_results(all_results) combined_result['filename'] = file_path.name + + # Clean up GPU memory after processing all pages + self.cleanup_gpu_memory() + return combined_result else: diff --git a/backend/app/services/pp_structure_enhanced.py b/backend/app/services/pp_structure_enhanced.py index f1339d5..c886dd6 100644 --- a/backend/app/services/pp_structure_enhanced.py +++ b/backend/app/services/pp_structure_enhanced.py @@ -9,7 +9,16 @@ import logging from pathlib import Path from typing import Dict, List, Optional, Tuple, Any import json +import gc +# Optional torch import for additional GPU memory management +try: + import torch + TORCH_AVAILABLE = True +except ImportError: + TORCH_AVAILABLE = False + +import paddle from paddleocr import PPStructureV3 from app.models.unified_document import ElementType @@ -155,6 +164,18 @@ class PPStructureEnhanced: logger.error(f"Enhanced PP-StructureV3 analysis error: {e}") import traceback traceback.print_exc() + + # Clean up GPU memory on error + try: + if TORCH_AVAILABLE and torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.synchronize() + if paddle.device.is_compiled_with_cuda(): + paddle.device.cuda.empty_cache() + gc.collect() + except: + pass # Ignore cleanup errors + return { 'elements': [], 'total_elements': 0,