Backend: - Add hybrid image extraction for Direct track (inline image blocks) - Add render_inline_image_regions() fallback when OCR doesn't find images - Add check_document_for_missing_images() for detecting missing images - Add memory management system (MemoryGuard, ModelManager, ServicePool) - Update pdf_generator_service to handle HYBRID processing track - Add ElementType.LOGO for logo extraction Frontend: - Fix PDF viewer re-rendering issues with memoization - Add TaskNotFound component and useTaskValidation hook - Disable StrictMode due to react-pdf incompatibility - Fix task detail and results page loading states 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
381 lines
13 KiB
Python
381 lines
13 KiB
Python
"""
|
|
Tests for OCR Service Memory Integration
|
|
|
|
Tests the integration of MemoryGuard with OCRService patterns,
|
|
including pre-operation memory checks and CPU fallback logic.
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
import sys
|
|
|
|
# Mock paddle before importing memory_manager
|
|
paddle_mock = MagicMock()
|
|
paddle_mock.is_compiled_with_cuda.return_value = False
|
|
paddle_mock.device.cuda.device_count.return_value = 0
|
|
paddle_mock.device.cuda.memory_allocated.return_value = 0
|
|
paddle_mock.device.cuda.memory_reserved.return_value = 0
|
|
paddle_mock.device.cuda.empty_cache = MagicMock()
|
|
sys.modules['paddle'] = paddle_mock
|
|
|
|
from app.services.memory_manager import (
|
|
MemoryGuard,
|
|
MemoryConfig,
|
|
MemoryStats,
|
|
)
|
|
|
|
|
|
class TestMemoryGuardIntegration:
|
|
"""Tests for MemoryGuard integration patterns used in OCRService"""
|
|
|
|
def setup_method(self):
|
|
"""Setup for each test"""
|
|
self.config = MemoryConfig(
|
|
warning_threshold=0.80,
|
|
critical_threshold=0.95,
|
|
emergency_threshold=0.98,
|
|
enable_cpu_fallback=True,
|
|
)
|
|
|
|
def teardown_method(self):
|
|
"""Cleanup after each test"""
|
|
pass
|
|
|
|
def test_memory_check_below_threshold_allows_processing(self):
|
|
"""Test that memory check returns True when below thresholds"""
|
|
guard = MemoryGuard(self.config)
|
|
|
|
# Mock stats below warning threshold
|
|
with patch.object(guard, 'get_memory_stats') as mock_stats:
|
|
mock_stats.return_value = MemoryStats(
|
|
gpu_used_ratio=0.50,
|
|
gpu_free_mb=4000,
|
|
gpu_total_mb=8000,
|
|
)
|
|
|
|
is_available, stats = guard.check_memory(required_mb=2000)
|
|
|
|
assert is_available is True
|
|
assert stats.gpu_free_mb >= 2000
|
|
|
|
guard.shutdown()
|
|
|
|
def test_memory_check_above_critical_blocks_processing(self):
|
|
"""Test that memory check returns False when above critical threshold"""
|
|
guard = MemoryGuard(self.config)
|
|
|
|
# Mock stats above critical threshold
|
|
with patch.object(guard, 'get_memory_stats') as mock_stats:
|
|
mock_stats.return_value = MemoryStats(
|
|
gpu_used_ratio=0.96,
|
|
gpu_free_mb=320,
|
|
gpu_total_mb=8000,
|
|
)
|
|
|
|
is_available, stats = guard.check_memory(required_mb=1000)
|
|
|
|
assert is_available is False
|
|
|
|
guard.shutdown()
|
|
|
|
def test_memory_check_insufficient_free_memory(self):
|
|
"""Test that memory check returns False when free memory < required"""
|
|
guard = MemoryGuard(self.config)
|
|
|
|
# Mock stats with insufficient free memory but below critical ratio
|
|
with patch.object(guard, 'get_memory_stats') as mock_stats:
|
|
mock_stats.return_value = MemoryStats(
|
|
gpu_used_ratio=0.70,
|
|
gpu_free_mb=500,
|
|
gpu_total_mb=8000,
|
|
)
|
|
|
|
is_available, stats = guard.check_memory(required_mb=1000)
|
|
|
|
# Should return False (not enough free memory)
|
|
assert is_available is False
|
|
|
|
guard.shutdown()
|
|
|
|
|
|
class TestCPUFallbackPattern:
|
|
"""Tests for CPU fallback pattern as used in OCRService"""
|
|
|
|
def test_cpu_fallback_activation_pattern(self):
|
|
"""Test the CPU fallback activation pattern"""
|
|
# Simulate the pattern used in OCRService._activate_cpu_fallback
|
|
|
|
class MockOCRService:
|
|
def __init__(self):
|
|
self._cpu_fallback_active = False
|
|
self.use_gpu = True
|
|
self.gpu_available = True
|
|
self.gpu_info = {'device_id': 0}
|
|
self._memory_guard = Mock()
|
|
|
|
def _activate_cpu_fallback(self):
|
|
if self._cpu_fallback_active:
|
|
return
|
|
|
|
self._cpu_fallback_active = True
|
|
self.use_gpu = False
|
|
self.gpu_info['cpu_fallback'] = True
|
|
self.gpu_info['fallback_reason'] = 'GPU memory insufficient'
|
|
|
|
if self._memory_guard:
|
|
self._memory_guard.clear_gpu_cache()
|
|
|
|
service = MockOCRService()
|
|
|
|
# Verify initial state
|
|
assert service._cpu_fallback_active is False
|
|
assert service.use_gpu is True
|
|
|
|
# Activate fallback
|
|
service._activate_cpu_fallback()
|
|
|
|
# Verify fallback state
|
|
assert service._cpu_fallback_active is True
|
|
assert service.use_gpu is False
|
|
assert service.gpu_info.get('cpu_fallback') is True
|
|
service._memory_guard.clear_gpu_cache.assert_called_once()
|
|
|
|
def test_cpu_fallback_idempotent(self):
|
|
"""Test that CPU fallback activation is idempotent"""
|
|
class MockOCRService:
|
|
def __init__(self):
|
|
self._cpu_fallback_active = False
|
|
self.use_gpu = True
|
|
self._memory_guard = Mock()
|
|
self.gpu_info = {}
|
|
|
|
def _activate_cpu_fallback(self):
|
|
if self._cpu_fallback_active:
|
|
return
|
|
self._cpu_fallback_active = True
|
|
self.use_gpu = False
|
|
if self._memory_guard:
|
|
self._memory_guard.clear_gpu_cache()
|
|
|
|
service = MockOCRService()
|
|
|
|
# Activate twice
|
|
service._activate_cpu_fallback()
|
|
service._activate_cpu_fallback()
|
|
|
|
# clear_gpu_cache should only be called once
|
|
assert service._memory_guard.clear_gpu_cache.call_count == 1
|
|
|
|
def test_gpu_mode_restoration_pattern(self):
|
|
"""Test the GPU mode restoration pattern"""
|
|
# Simulate the pattern used in OCRService._restore_gpu_mode
|
|
|
|
class MockOCRService:
|
|
def __init__(self):
|
|
self._cpu_fallback_active = True
|
|
self.use_gpu = False
|
|
self.gpu_available = True
|
|
self.gpu_info = {
|
|
'device_id': 0,
|
|
'cpu_fallback': True,
|
|
'fallback_reason': 'test'
|
|
}
|
|
self._memory_guard = Mock()
|
|
|
|
def _restore_gpu_mode(self):
|
|
if not self._cpu_fallback_active:
|
|
return
|
|
|
|
if not self.gpu_available:
|
|
return
|
|
|
|
# Check if GPU memory is now available
|
|
if self._memory_guard:
|
|
is_available, stats = self._memory_guard.check_memory(required_mb=2000)
|
|
if is_available:
|
|
self._cpu_fallback_active = False
|
|
self.use_gpu = True
|
|
self.gpu_info.pop('cpu_fallback', None)
|
|
self.gpu_info.pop('fallback_reason', None)
|
|
|
|
service = MockOCRService()
|
|
|
|
# Mock memory guard to indicate sufficient memory
|
|
mock_stats = Mock()
|
|
mock_stats.gpu_free_mb = 5000
|
|
service._memory_guard.check_memory.return_value = (True, mock_stats)
|
|
|
|
# Restore GPU mode
|
|
service._restore_gpu_mode()
|
|
|
|
# Verify GPU mode restored
|
|
assert service._cpu_fallback_active is False
|
|
assert service.use_gpu is True
|
|
assert 'cpu_fallback' not in service.gpu_info
|
|
|
|
def test_gpu_mode_not_restored_when_memory_still_low(self):
|
|
"""Test that GPU mode is not restored when memory is still low"""
|
|
class MockOCRService:
|
|
def __init__(self):
|
|
self._cpu_fallback_active = True
|
|
self.use_gpu = False
|
|
self.gpu_available = True
|
|
self.gpu_info = {'cpu_fallback': True}
|
|
self._memory_guard = Mock()
|
|
|
|
def _restore_gpu_mode(self):
|
|
if not self._cpu_fallback_active:
|
|
return
|
|
if not self.gpu_available:
|
|
return
|
|
if self._memory_guard:
|
|
is_available, stats = self._memory_guard.check_memory(required_mb=2000)
|
|
if is_available:
|
|
self._cpu_fallback_active = False
|
|
self.use_gpu = True
|
|
|
|
service = MockOCRService()
|
|
|
|
# Mock memory guard to indicate insufficient memory
|
|
mock_stats = Mock()
|
|
mock_stats.gpu_free_mb = 500
|
|
service._memory_guard.check_memory.return_value = (False, mock_stats)
|
|
|
|
# Try to restore GPU mode
|
|
service._restore_gpu_mode()
|
|
|
|
# Verify still in fallback mode
|
|
assert service._cpu_fallback_active is True
|
|
assert service.use_gpu is False
|
|
|
|
|
|
class TestPreOperationMemoryCheckPattern:
|
|
"""Tests for pre-operation memory check pattern as used in OCRService"""
|
|
|
|
def test_pre_operation_check_with_fallback(self):
|
|
"""Test the pre-operation memory check pattern with fallback"""
|
|
guard = MemoryGuard(MemoryConfig(
|
|
warning_threshold=0.80,
|
|
critical_threshold=0.95,
|
|
enable_cpu_fallback=True,
|
|
))
|
|
|
|
# Simulate the pattern:
|
|
# 1. Check if in CPU fallback mode
|
|
# 2. Try to restore GPU mode if memory available
|
|
# 3. Perform memory check for operation
|
|
|
|
class MockService:
|
|
def __init__(self):
|
|
self._cpu_fallback_active = False
|
|
self.use_gpu = True
|
|
self.gpu_available = True
|
|
self._memory_guard = guard
|
|
|
|
def _restore_gpu_mode(self):
|
|
pass # Simplified
|
|
|
|
def pre_operation_check(self, required_mb: int) -> bool:
|
|
# Try restore first
|
|
if self._cpu_fallback_active:
|
|
self._restore_gpu_mode()
|
|
|
|
# Perform memory check
|
|
if not self.use_gpu:
|
|
return True # CPU mode, no GPU check needed
|
|
|
|
is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
|
|
return is_available
|
|
|
|
service = MockService()
|
|
|
|
# Mock sufficient memory
|
|
with patch.object(guard, 'get_memory_stats') as mock_stats:
|
|
mock_stats.return_value = MemoryStats(
|
|
gpu_used_ratio=0.50,
|
|
gpu_free_mb=4000,
|
|
gpu_total_mb=8000,
|
|
)
|
|
|
|
result = service.pre_operation_check(required_mb=2000)
|
|
assert result is True
|
|
|
|
guard.shutdown()
|
|
|
|
def test_pre_operation_check_returns_true_in_cpu_mode(self):
|
|
"""Test that pre-operation check returns True when in CPU mode"""
|
|
class MockService:
|
|
def __init__(self):
|
|
self._cpu_fallback_active = True
|
|
self.use_gpu = False
|
|
self._memory_guard = Mock()
|
|
|
|
def pre_operation_check(self, required_mb: int) -> bool:
|
|
if not self.use_gpu:
|
|
return True # CPU mode, no GPU check needed
|
|
return False
|
|
|
|
service = MockService()
|
|
result = service.pre_operation_check(required_mb=5000)
|
|
|
|
# Should return True because we're in CPU mode
|
|
assert result is True
|
|
# Memory guard should not be called
|
|
service._memory_guard.check_memory.assert_not_called()
|
|
|
|
|
|
class TestMemoryCheckWithCleanup:
|
|
"""Tests for memory check with cleanup pattern"""
|
|
|
|
def test_memory_check_triggers_cleanup_on_failure(self):
|
|
"""Test that memory check triggers cleanup when insufficient"""
|
|
guard = MemoryGuard(MemoryConfig(
|
|
warning_threshold=0.80,
|
|
critical_threshold=0.95,
|
|
))
|
|
|
|
# Track cleanup calls
|
|
cleanup_called = False
|
|
|
|
def mock_cleanup():
|
|
nonlocal cleanup_called
|
|
cleanup_called = True
|
|
|
|
class MockService:
|
|
def __init__(self):
|
|
self._memory_guard = guard
|
|
self.cleanup_func = mock_cleanup
|
|
|
|
def check_gpu_memory(self, required_mb: int) -> bool:
|
|
# First check
|
|
with patch.object(self._memory_guard, 'get_memory_stats') as mock_stats:
|
|
# First call - low memory
|
|
mock_stats.return_value = MemoryStats(
|
|
gpu_used_ratio=0.96,
|
|
gpu_free_mb=300,
|
|
gpu_total_mb=8000,
|
|
)
|
|
|
|
is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
|
|
|
|
if not is_available:
|
|
# Trigger cleanup
|
|
self.cleanup_func()
|
|
self._memory_guard.clear_gpu_cache()
|
|
return False
|
|
|
|
return True
|
|
|
|
service = MockService()
|
|
result = service.check_gpu_memory(required_mb=1000)
|
|
|
|
# Cleanup should have been triggered
|
|
assert cleanup_called is True
|
|
assert result is False
|
|
|
|
guard.shutdown()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|