diff --git a/backend/app/services/ocr_service.py b/backend/app/services/ocr_service.py
index b64377f..db0b881 100644
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -9,12 +9,20 @@ from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Union
 from datetime import datetime
 import uuid
+import gc  # For garbage collection
 
 from paddleocr import PaddleOCR, PPStructureV3
 from PIL import Image
 from pdf2image import convert_from_path
 import paddle
 
+# Optional torch import for additional GPU memory management
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
 from app.core.config import settings
 from app.services.office_converter import OfficeConverter, OfficeConverterError
 
@@ -401,6 +409,78 @@ class OCRService:
 
         return self.structure_engine
 
+    def cleanup_gpu_memory(self):
+        """
+        Clean up GPU memory to prevent OOM errors.
+
+        This should be called after processing each document or batch.
+        Uses PaddlePaddle's built-in memory management and optionally torch if available.
+        """
+        try:
+            # Clear PyTorch GPU cache if torch is available
+            if TORCH_AVAILABLE and torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
+                logger.debug("Cleared PyTorch GPU cache")
+
+            # Clear PaddlePaddle GPU cache
+            if paddle.device.is_compiled_with_cuda():
+                paddle.device.cuda.empty_cache()
+                logger.debug("Cleared PaddlePaddle GPU cache")
+
+            # Force garbage collection
+            gc.collect()
+
+            # Log current GPU memory status
+            if TORCH_AVAILABLE and torch.cuda.is_available():
+                allocated_mb = torch.cuda.memory_allocated() / 1024**2
+                reserved_mb = torch.cuda.memory_reserved() / 1024**2
+                logger.debug(f"GPU memory after cleanup - Allocated: {allocated_mb:.1f}MB, Reserved: {reserved_mb:.1f}MB")
+
+        except Exception as e:
+            logger.warning(f"GPU memory cleanup failed (non-critical): {e}")
+            # Don't fail the processing if cleanup fails
+
+    def check_gpu_memory(self, required_mb: int = 2000) -> bool:
+        """
+        Check if sufficient GPU memory is available.
+
+        Args:
+            required_mb: Required memory in MB (default 2000MB for OCR models)
+
+        Returns:
+            True if sufficient memory is available or GPU is not used
+        """
+        try:
+            # Check GPU memory using torch if available, otherwise use PaddlePaddle
+            free_memory = None
+
+            if TORCH_AVAILABLE and torch.cuda.is_available():
+                free_memory = torch.cuda.mem_get_info()[0] / 1024**2
+            elif paddle.device.is_compiled_with_cuda():
+                # PaddlePaddle doesn't have direct API to get free memory,
+                # so we rely on cleanup and continue
+                logger.debug("Using PaddlePaddle GPU, memory info not directly available")
+                return True
+
+            if free_memory is not None:
+                if free_memory < required_mb:
+                    logger.warning(f"Low GPU memory: {free_memory:.0f}MB available, {required_mb}MB required")
+                    # Try to free memory
+                    self.cleanup_gpu_memory()
+                    # Check again
+                    if TORCH_AVAILABLE and torch.cuda.is_available():
+                        free_memory = torch.cuda.mem_get_info()[0] / 1024**2
+                        if free_memory < required_mb:
+                            logger.error(f"Insufficient GPU memory after cleanup: {free_memory:.0f}MB")
+                            return False
+                logger.debug(f"GPU memory check passed: {free_memory:.0f}MB available")
+
+            return True
+        except Exception as e:
+            logger.warning(f"GPU memory check failed: {e}")
+            return True  # Continue processing even if check fails
+
     def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]:
         """
         Convert PDF to images (one per page)
@@ -587,6 +667,10 @@ class OCRService:
             # Get OCR engine (for non-PDF images)
             ocr_engine = self.get_ocr_engine(lang)
 
+            # Check GPU memory before OCR processing
+            if not self.check_gpu_memory(required_mb=1500):
+                logger.warning("Insufficient GPU memory for OCR, attempting to proceed anyway")
+
             # Get the actual image dimensions that OCR will use
             from PIL import Image
             with Image.open(image_path) as img:
@@ -686,6 +770,9 @@ class OCRService:
                 f"{processing_time:.2f}s"
             )
 
+            # Clean up GPU memory after processing
+            self.cleanup_gpu_memory()
+
             return result
 
         except Exception as e:
@@ -804,6 +891,8 @@ class OCRService:
                             'bbox': elem['bbox']
                         })
 
+                    # Clean up GPU memory after enhanced processing
+                    self.cleanup_gpu_memory()
                     return layout_data, images_metadata
                 else:
                     logger.info("parsing_res_list not available, using standard processing")
@@ -815,6 +904,11 @@ class OCRService:
 
             # Standard processing (original implementation)
             logger.info(f"Running standard layout analysis on {image_path.name}")
+
+            # Check GPU memory before processing
+            if not self.check_gpu_memory(required_mb=2000):
+                logger.warning("Insufficient GPU memory for PP-StructureV3, attempting to proceed anyway")
+
             results = structure_engine.predict(str(image_path))
 
             layout_elements = []
@@ -910,6 +1004,8 @@ class OCRService:
                     'reading_order': list(range(len(layout_elements))),
                 }
                 logger.info(f"Detected {len(layout_elements)} layout elements")
+                # Clean up GPU memory after standard processing
+                self.cleanup_gpu_memory()
                 return layout_data, images_metadata
             else:
                 logger.warning("No layout elements detected")
@@ -1135,6 +1231,10 @@ class OCRService:
             # Combine results
             combined_result = self._combine_results(all_results)
             combined_result['filename'] = file_path.name
+
+            # Clean up GPU memory after processing all pages
+            self.cleanup_gpu_memory()
+
             return combined_result
 
         else:
diff --git a/backend/app/services/pp_structure_enhanced.py b/backend/app/services/pp_structure_enhanced.py
index f1339d5..c886dd6 100644
--- a/backend/app/services/pp_structure_enhanced.py
+++ b/backend/app/services/pp_structure_enhanced.py
@@ -9,7 +9,16 @@ import logging
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Any
 import json
+import gc
 
+# Optional torch import for additional GPU memory management
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
+import paddle
 from paddleocr import PPStructureV3
 from app.models.unified_document import ElementType
 
@@ -155,6 +164,18 @@ class PPStructureEnhanced:
             logger.error(f"Enhanced PP-StructureV3 analysis error: {e}")
             import traceback
             traceback.print_exc()
+
+            # Clean up GPU memory on error
+            try:
+                if TORCH_AVAILABLE and torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                    torch.cuda.synchronize()
+                if paddle.device.is_compiled_with_cuda():
+                    paddle.device.cuda.empty_cache()
+                gc.collect()
+            except:
+                pass  # Ignore cleanup errors
+
             return {
                 'elements': [],
                 'total_elements': 0,