fix: make torch import optional and add PaddlePaddle GPU memory management
Problem: - Backend failed to start with ModuleNotFoundError for torch module - torch was imported as hard dependency but not in requirements.txt - Project uses PaddlePaddle which has its own CUDA implementation Changes: - Make torch import optional with try/except in ocr_service.py - Make torch import optional in pp_structure_enhanced.py - Add cleanup_gpu_memory() method using PaddlePaddle's memory management - Add check_gpu_memory() method to monitor available GPU memory - Use paddle.device.cuda.empty_cache() for GPU cleanup - Use torch.cuda only if TORCH_AVAILABLE flag is True - Add cleanup calls after OCR processing to prevent OOM errors - Add memory checks before GPU-intensive operations Benefits: - Backend can start without torch installed - GPU memory is properly managed using PaddlePaddle - Optional torch support provides additional memory monitoring - Prevents GPU OOM errors during document processing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -9,12 +9,20 @@ from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
import gc # For garbage collection
|
||||
|
||||
from paddleocr import PaddleOCR, PPStructureV3
|
||||
from PIL import Image
|
||||
from pdf2image import convert_from_path
|
||||
import paddle
|
||||
|
||||
# Optional torch import for additional GPU memory management
|
||||
try:
|
||||
import torch
|
||||
TORCH_AVAILABLE = True
|
||||
except ImportError:
|
||||
TORCH_AVAILABLE = False
|
||||
|
||||
from app.core.config import settings
|
||||
from app.services.office_converter import OfficeConverter, OfficeConverterError
|
||||
|
||||
@@ -401,6 +409,78 @@ class OCRService:
|
||||
|
||||
return self.structure_engine
|
||||
|
||||
def cleanup_gpu_memory(self):
|
||||
"""
|
||||
Clean up GPU memory to prevent OOM errors.
|
||||
|
||||
This should be called after processing each document or batch.
|
||||
Uses PaddlePaddle's built-in memory management and optionally torch if available.
|
||||
"""
|
||||
try:
|
||||
# Clear PyTorch GPU cache if torch is available
|
||||
if TORCH_AVAILABLE and torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.synchronize()
|
||||
logger.debug("Cleared PyTorch GPU cache")
|
||||
|
||||
# Clear PaddlePaddle GPU cache
|
||||
if paddle.device.is_compiled_with_cuda():
|
||||
paddle.device.cuda.empty_cache()
|
||||
logger.debug("Cleared PaddlePaddle GPU cache")
|
||||
|
||||
# Force garbage collection
|
||||
gc.collect()
|
||||
|
||||
# Log current GPU memory status
|
||||
if TORCH_AVAILABLE and torch.cuda.is_available():
|
||||
allocated_mb = torch.cuda.memory_allocated() / 1024**2
|
||||
reserved_mb = torch.cuda.memory_reserved() / 1024**2
|
||||
logger.debug(f"GPU memory after cleanup - Allocated: {allocated_mb:.1f}MB, Reserved: {reserved_mb:.1f}MB")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"GPU memory cleanup failed (non-critical): {e}")
|
||||
# Don't fail the processing if cleanup fails
|
||||
|
||||
def check_gpu_memory(self, required_mb: int = 2000) -> bool:
|
||||
"""
|
||||
Check if sufficient GPU memory is available.
|
||||
|
||||
Args:
|
||||
required_mb: Required memory in MB (default 2000MB for OCR models)
|
||||
|
||||
Returns:
|
||||
True if sufficient memory is available or GPU is not used
|
||||
"""
|
||||
try:
|
||||
# Check GPU memory using torch if available, otherwise use PaddlePaddle
|
||||
free_memory = None
|
||||
|
||||
if TORCH_AVAILABLE and torch.cuda.is_available():
|
||||
free_memory = torch.cuda.mem_get_info()[0] / 1024**2
|
||||
elif paddle.device.is_compiled_with_cuda():
|
||||
# PaddlePaddle doesn't have direct API to get free memory,
|
||||
# so we rely on cleanup and continue
|
||||
logger.debug("Using PaddlePaddle GPU, memory info not directly available")
|
||||
return True
|
||||
|
||||
if free_memory is not None:
|
||||
if free_memory < required_mb:
|
||||
logger.warning(f"Low GPU memory: {free_memory:.0f}MB available, {required_mb}MB required")
|
||||
# Try to free memory
|
||||
self.cleanup_gpu_memory()
|
||||
# Check again
|
||||
if TORCH_AVAILABLE and torch.cuda.is_available():
|
||||
free_memory = torch.cuda.mem_get_info()[0] / 1024**2
|
||||
if free_memory < required_mb:
|
||||
logger.error(f"Insufficient GPU memory after cleanup: {free_memory:.0f}MB")
|
||||
return False
|
||||
logger.debug(f"GPU memory check passed: {free_memory:.0f}MB available")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"GPU memory check failed: {e}")
|
||||
return True # Continue processing even if check fails
|
||||
|
||||
def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]:
|
||||
"""
|
||||
Convert PDF to images (one per page)
|
||||
@@ -587,6 +667,10 @@ class OCRService:
|
||||
# Get OCR engine (for non-PDF images)
|
||||
ocr_engine = self.get_ocr_engine(lang)
|
||||
|
||||
# Check GPU memory before OCR processing
|
||||
if not self.check_gpu_memory(required_mb=1500):
|
||||
logger.warning("Insufficient GPU memory for OCR, attempting to proceed anyway")
|
||||
|
||||
# Get the actual image dimensions that OCR will use
|
||||
from PIL import Image
|
||||
with Image.open(image_path) as img:
|
||||
@@ -686,6 +770,9 @@ class OCRService:
|
||||
f"{processing_time:.2f}s"
|
||||
)
|
||||
|
||||
# Clean up GPU memory after processing
|
||||
self.cleanup_gpu_memory()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
@@ -804,6 +891,8 @@ class OCRService:
|
||||
'bbox': elem['bbox']
|
||||
})
|
||||
|
||||
# Clean up GPU memory after enhanced processing
|
||||
self.cleanup_gpu_memory()
|
||||
return layout_data, images_metadata
|
||||
else:
|
||||
logger.info("parsing_res_list not available, using standard processing")
|
||||
@@ -815,6 +904,11 @@ class OCRService:
|
||||
|
||||
# Standard processing (original implementation)
|
||||
logger.info(f"Running standard layout analysis on {image_path.name}")
|
||||
|
||||
# Check GPU memory before processing
|
||||
if not self.check_gpu_memory(required_mb=2000):
|
||||
logger.warning("Insufficient GPU memory for PP-StructureV3, attempting to proceed anyway")
|
||||
|
||||
results = structure_engine.predict(str(image_path))
|
||||
|
||||
layout_elements = []
|
||||
@@ -910,6 +1004,8 @@ class OCRService:
|
||||
'reading_order': list(range(len(layout_elements))),
|
||||
}
|
||||
logger.info(f"Detected {len(layout_elements)} layout elements")
|
||||
# Clean up GPU memory after standard processing
|
||||
self.cleanup_gpu_memory()
|
||||
return layout_data, images_metadata
|
||||
else:
|
||||
logger.warning("No layout elements detected")
|
||||
@@ -1135,6 +1231,10 @@ class OCRService:
|
||||
# Combine results
|
||||
combined_result = self._combine_results(all_results)
|
||||
combined_result['filename'] = file_path.name
|
||||
|
||||
# Clean up GPU memory after processing all pages
|
||||
self.cleanup_gpu_memory()
|
||||
|
||||
return combined_result
|
||||
|
||||
else:
|
||||
|
||||
@@ -9,7 +9,16 @@ import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
import json
|
||||
import gc
|
||||
|
||||
# Optional torch import for additional GPU memory management
|
||||
try:
|
||||
import torch
|
||||
TORCH_AVAILABLE = True
|
||||
except ImportError:
|
||||
TORCH_AVAILABLE = False
|
||||
|
||||
import paddle
|
||||
from paddleocr import PPStructureV3
|
||||
from app.models.unified_document import ElementType
|
||||
|
||||
@@ -155,6 +164,18 @@ class PPStructureEnhanced:
|
||||
logger.error(f"Enhanced PP-StructureV3 analysis error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Clean up GPU memory on error
|
||||
try:
|
||||
if TORCH_AVAILABLE and torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.synchronize()
|
||||
if paddle.device.is_compiled_with_cuda():
|
||||
paddle.device.cuda.empty_cache()
|
||||
gc.collect()
|
||||
except:
|
||||
pass # Ignore cleanup errors
|
||||
|
||||
return {
|
||||
'elements': [],
|
||||
'total_elements': 0,
|
||||
|
||||
Reference in New Issue
Block a user