feat: implement hybrid image extraction and memory management

Backend:
- Add hybrid image extraction for Direct track (inline image blocks)
- Add render_inline_image_regions() fallback when OCR doesn't find images
- Add check_document_for_missing_images() for detecting missing images
- Add memory management system (MemoryGuard, ModelManager, ServicePool)
- Update pdf_generator_service to handle HYBRID processing track
- Add ElementType.LOGO for logo extraction

Frontend:
- Fix PDF viewer re-rendering issues with memoization
- Add TaskNotFound component and useTaskValidation hook
- Disable StrictMode due to react-pdf incompatibility
- Fix task detail and results page loading states

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-26 10:56:22 +08:00
parent ba8ddf2b68
commit 1afdb822c3
26 changed files with 8273 additions and 366 deletions

View File

@@ -0,0 +1,380 @@
"""
Tests for OCR Service Memory Integration
Tests the integration of MemoryGuard with OCRService patterns,
including pre-operation memory checks and CPU fallback logic.
"""
import pytest
from unittest.mock import Mock, patch, MagicMock
import sys
# Mock paddle before importing memory_manager
paddle_mock = MagicMock()
paddle_mock.is_compiled_with_cuda.return_value = False
paddle_mock.device.cuda.device_count.return_value = 0
paddle_mock.device.cuda.memory_allocated.return_value = 0
paddle_mock.device.cuda.memory_reserved.return_value = 0
paddle_mock.device.cuda.empty_cache = MagicMock()
sys.modules['paddle'] = paddle_mock
from app.services.memory_manager import (
MemoryGuard,
MemoryConfig,
MemoryStats,
)
class TestMemoryGuardIntegration:
"""Tests for MemoryGuard integration patterns used in OCRService"""
def setup_method(self):
"""Setup for each test"""
self.config = MemoryConfig(
warning_threshold=0.80,
critical_threshold=0.95,
emergency_threshold=0.98,
enable_cpu_fallback=True,
)
def teardown_method(self):
"""Cleanup after each test"""
pass
def test_memory_check_below_threshold_allows_processing(self):
"""Test that memory check returns True when below thresholds"""
guard = MemoryGuard(self.config)
# Mock stats below warning threshold
with patch.object(guard, 'get_memory_stats') as mock_stats:
mock_stats.return_value = MemoryStats(
gpu_used_ratio=0.50,
gpu_free_mb=4000,
gpu_total_mb=8000,
)
is_available, stats = guard.check_memory(required_mb=2000)
assert is_available is True
assert stats.gpu_free_mb >= 2000
guard.shutdown()
def test_memory_check_above_critical_blocks_processing(self):
"""Test that memory check returns False when above critical threshold"""
guard = MemoryGuard(self.config)
# Mock stats above critical threshold
with patch.object(guard, 'get_memory_stats') as mock_stats:
mock_stats.return_value = MemoryStats(
gpu_used_ratio=0.96,
gpu_free_mb=320,
gpu_total_mb=8000,
)
is_available, stats = guard.check_memory(required_mb=1000)
assert is_available is False
guard.shutdown()
def test_memory_check_insufficient_free_memory(self):
"""Test that memory check returns False when free memory < required"""
guard = MemoryGuard(self.config)
# Mock stats with insufficient free memory but below critical ratio
with patch.object(guard, 'get_memory_stats') as mock_stats:
mock_stats.return_value = MemoryStats(
gpu_used_ratio=0.70,
gpu_free_mb=500,
gpu_total_mb=8000,
)
is_available, stats = guard.check_memory(required_mb=1000)
# Should return False (not enough free memory)
assert is_available is False
guard.shutdown()
class TestCPUFallbackPattern:
"""Tests for CPU fallback pattern as used in OCRService"""
def test_cpu_fallback_activation_pattern(self):
"""Test the CPU fallback activation pattern"""
# Simulate the pattern used in OCRService._activate_cpu_fallback
class MockOCRService:
def __init__(self):
self._cpu_fallback_active = False
self.use_gpu = True
self.gpu_available = True
self.gpu_info = {'device_id': 0}
self._memory_guard = Mock()
def _activate_cpu_fallback(self):
if self._cpu_fallback_active:
return
self._cpu_fallback_active = True
self.use_gpu = False
self.gpu_info['cpu_fallback'] = True
self.gpu_info['fallback_reason'] = 'GPU memory insufficient'
if self._memory_guard:
self._memory_guard.clear_gpu_cache()
service = MockOCRService()
# Verify initial state
assert service._cpu_fallback_active is False
assert service.use_gpu is True
# Activate fallback
service._activate_cpu_fallback()
# Verify fallback state
assert service._cpu_fallback_active is True
assert service.use_gpu is False
assert service.gpu_info.get('cpu_fallback') is True
service._memory_guard.clear_gpu_cache.assert_called_once()
def test_cpu_fallback_idempotent(self):
"""Test that CPU fallback activation is idempotent"""
class MockOCRService:
def __init__(self):
self._cpu_fallback_active = False
self.use_gpu = True
self._memory_guard = Mock()
self.gpu_info = {}
def _activate_cpu_fallback(self):
if self._cpu_fallback_active:
return
self._cpu_fallback_active = True
self.use_gpu = False
if self._memory_guard:
self._memory_guard.clear_gpu_cache()
service = MockOCRService()
# Activate twice
service._activate_cpu_fallback()
service._activate_cpu_fallback()
# clear_gpu_cache should only be called once
assert service._memory_guard.clear_gpu_cache.call_count == 1
def test_gpu_mode_restoration_pattern(self):
"""Test the GPU mode restoration pattern"""
# Simulate the pattern used in OCRService._restore_gpu_mode
class MockOCRService:
def __init__(self):
self._cpu_fallback_active = True
self.use_gpu = False
self.gpu_available = True
self.gpu_info = {
'device_id': 0,
'cpu_fallback': True,
'fallback_reason': 'test'
}
self._memory_guard = Mock()
def _restore_gpu_mode(self):
if not self._cpu_fallback_active:
return
if not self.gpu_available:
return
# Check if GPU memory is now available
if self._memory_guard:
is_available, stats = self._memory_guard.check_memory(required_mb=2000)
if is_available:
self._cpu_fallback_active = False
self.use_gpu = True
self.gpu_info.pop('cpu_fallback', None)
self.gpu_info.pop('fallback_reason', None)
service = MockOCRService()
# Mock memory guard to indicate sufficient memory
mock_stats = Mock()
mock_stats.gpu_free_mb = 5000
service._memory_guard.check_memory.return_value = (True, mock_stats)
# Restore GPU mode
service._restore_gpu_mode()
# Verify GPU mode restored
assert service._cpu_fallback_active is False
assert service.use_gpu is True
assert 'cpu_fallback' not in service.gpu_info
def test_gpu_mode_not_restored_when_memory_still_low(self):
"""Test that GPU mode is not restored when memory is still low"""
class MockOCRService:
def __init__(self):
self._cpu_fallback_active = True
self.use_gpu = False
self.gpu_available = True
self.gpu_info = {'cpu_fallback': True}
self._memory_guard = Mock()
def _restore_gpu_mode(self):
if not self._cpu_fallback_active:
return
if not self.gpu_available:
return
if self._memory_guard:
is_available, stats = self._memory_guard.check_memory(required_mb=2000)
if is_available:
self._cpu_fallback_active = False
self.use_gpu = True
service = MockOCRService()
# Mock memory guard to indicate insufficient memory
mock_stats = Mock()
mock_stats.gpu_free_mb = 500
service._memory_guard.check_memory.return_value = (False, mock_stats)
# Try to restore GPU mode
service._restore_gpu_mode()
# Verify still in fallback mode
assert service._cpu_fallback_active is True
assert service.use_gpu is False
class TestPreOperationMemoryCheckPattern:
"""Tests for pre-operation memory check pattern as used in OCRService"""
def test_pre_operation_check_with_fallback(self):
"""Test the pre-operation memory check pattern with fallback"""
guard = MemoryGuard(MemoryConfig(
warning_threshold=0.80,
critical_threshold=0.95,
enable_cpu_fallback=True,
))
# Simulate the pattern:
# 1. Check if in CPU fallback mode
# 2. Try to restore GPU mode if memory available
# 3. Perform memory check for operation
class MockService:
def __init__(self):
self._cpu_fallback_active = False
self.use_gpu = True
self.gpu_available = True
self._memory_guard = guard
def _restore_gpu_mode(self):
pass # Simplified
def pre_operation_check(self, required_mb: int) -> bool:
# Try restore first
if self._cpu_fallback_active:
self._restore_gpu_mode()
# Perform memory check
if not self.use_gpu:
return True # CPU mode, no GPU check needed
is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
return is_available
service = MockService()
# Mock sufficient memory
with patch.object(guard, 'get_memory_stats') as mock_stats:
mock_stats.return_value = MemoryStats(
gpu_used_ratio=0.50,
gpu_free_mb=4000,
gpu_total_mb=8000,
)
result = service.pre_operation_check(required_mb=2000)
assert result is True
guard.shutdown()
def test_pre_operation_check_returns_true_in_cpu_mode(self):
"""Test that pre-operation check returns True when in CPU mode"""
class MockService:
def __init__(self):
self._cpu_fallback_active = True
self.use_gpu = False
self._memory_guard = Mock()
def pre_operation_check(self, required_mb: int) -> bool:
if not self.use_gpu:
return True # CPU mode, no GPU check needed
return False
service = MockService()
result = service.pre_operation_check(required_mb=5000)
# Should return True because we're in CPU mode
assert result is True
# Memory guard should not be called
service._memory_guard.check_memory.assert_not_called()
class TestMemoryCheckWithCleanup:
"""Tests for memory check with cleanup pattern"""
def test_memory_check_triggers_cleanup_on_failure(self):
"""Test that memory check triggers cleanup when insufficient"""
guard = MemoryGuard(MemoryConfig(
warning_threshold=0.80,
critical_threshold=0.95,
))
# Track cleanup calls
cleanup_called = False
def mock_cleanup():
nonlocal cleanup_called
cleanup_called = True
class MockService:
def __init__(self):
self._memory_guard = guard
self.cleanup_func = mock_cleanup
def check_gpu_memory(self, required_mb: int) -> bool:
# First check
with patch.object(self._memory_guard, 'get_memory_stats') as mock_stats:
# First call - low memory
mock_stats.return_value = MemoryStats(
gpu_used_ratio=0.96,
gpu_free_mb=300,
gpu_total_mb=8000,
)
is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
if not is_available:
# Trigger cleanup
self.cleanup_func()
self._memory_guard.clear_gpu_cache()
return False
return True
service = MockService()
result = service.check_gpu_memory(required_mb=1000)
# Cleanup should have been triggered
assert cleanup_called is True
assert result is False
guard.shutdown()
if __name__ == "__main__":
pytest.main([__file__, "-v"])