feat: simplify layout model selection and archive proposals
Changes: - Replace PP-Structure 7-slider parameter UI with simple 3-option layout model selector - Add layout model mapping: chinese (PP-DocLayout-S), default (PubLayNet), cdla - Add LayoutModelSelector component and zh-TW translations - Fix "default" model behavior with sentinel value for PubLayNet - Add gap filling service for OCR track coverage improvement - Add PP-Structure debug utilities - Archive completed/incomplete proposals: - add-ocr-track-gap-filling (complete) - fix-ocr-track-table-rendering (incomplete) - simplify-ppstructure-model-selection (22/25 tasks) - Add new layout model tests, archive old PP-Structure param tests - Update OpenSpec ocr-processing spec with layout model requirements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
244
backend/tests/services/test_layout_model.py
Normal file
244
backend/tests/services/test_layout_model.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""
|
||||
Unit tests for Layout Model Selection feature in OCR Service.
|
||||
|
||||
This replaces the deprecated PP-StructureV3 parameter tests.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
# Mock all external dependencies before importing OCRService
|
||||
sys.modules['paddleocr'] = MagicMock()
|
||||
sys.modules['PIL'] = MagicMock()
|
||||
sys.modules['pdf2image'] = MagicMock()
|
||||
|
||||
# Mock paddle with version attribute
|
||||
paddle_mock = MagicMock()
|
||||
paddle_mock.__version__ = '2.5.0'
|
||||
paddle_mock.device.get_device.return_value = 'cpu'
|
||||
paddle_mock.device.get_available_device.return_value = 'cpu'
|
||||
sys.modules['paddle'] = paddle_mock
|
||||
|
||||
# Mock torch
|
||||
torch_mock = MagicMock()
|
||||
torch_mock.cuda.is_available.return_value = False
|
||||
sys.modules['torch'] = torch_mock
|
||||
|
||||
from app.services.ocr_service import OCRService, LAYOUT_MODEL_MAPPING, _USE_PUBLAYNET_DEFAULT
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
class TestLayoutModelMapping:
|
||||
"""Test layout model name mapping"""
|
||||
|
||||
def test_layout_model_mapping_exists(self):
|
||||
"""Verify LAYOUT_MODEL_MAPPING constant exists and has correct values"""
|
||||
assert 'chinese' in LAYOUT_MODEL_MAPPING
|
||||
assert 'default' in LAYOUT_MODEL_MAPPING
|
||||
assert 'cdla' in LAYOUT_MODEL_MAPPING
|
||||
|
||||
def test_chinese_model_maps_to_pp_doclayout(self):
|
||||
"""Verify 'chinese' maps to PP-DocLayout-S"""
|
||||
assert LAYOUT_MODEL_MAPPING['chinese'] == 'PP-DocLayout-S'
|
||||
|
||||
def test_default_model_maps_to_publaynet_sentinel(self):
|
||||
"""Verify 'default' maps to sentinel value for PubLayNet default"""
|
||||
# The 'default' model uses a sentinel value that signals "use PubLayNet default (no custom model)"
|
||||
assert LAYOUT_MODEL_MAPPING['default'] == _USE_PUBLAYNET_DEFAULT
|
||||
|
||||
def test_cdla_model_maps_to_picodet(self):
|
||||
"""Verify 'cdla' maps to picodet_lcnet_x1_0_fgd_layout_cdla"""
|
||||
assert LAYOUT_MODEL_MAPPING['cdla'] == 'picodet_lcnet_x1_0_fgd_layout_cdla'
|
||||
|
||||
|
||||
class TestLayoutModelEngine:
|
||||
"""Test engine creation with different layout models"""
|
||||
|
||||
def test_chinese_model_creates_engine_with_pp_doclayout(self):
|
||||
"""Verify 'chinese' layout model uses PP-DocLayout-S"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch.object(ocr_service, 'structure_engine', None):
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
engine = ocr_service._ensure_structure_engine(layout_model='chinese')
|
||||
|
||||
mock_ppstructure.assert_called_once()
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
|
||||
assert call_kwargs.get('layout_detection_model_name') == 'PP-DocLayout-S'
|
||||
|
||||
def test_default_model_creates_engine_without_model_name(self):
|
||||
"""Verify 'default' layout model does not specify model name (uses default)"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch.object(ocr_service, 'structure_engine', None):
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
engine = ocr_service._ensure_structure_engine(layout_model='default')
|
||||
|
||||
mock_ppstructure.assert_called_once()
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
|
||||
# For 'default', layout_detection_model_name should be None or not set
|
||||
assert call_kwargs.get('layout_detection_model_name') is None
|
||||
|
||||
def test_cdla_model_creates_engine_with_picodet(self):
|
||||
"""Verify 'cdla' layout model uses picodet_lcnet_x1_0_fgd_layout_cdla"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch.object(ocr_service, 'structure_engine', None):
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
engine = ocr_service._ensure_structure_engine(layout_model='cdla')
|
||||
|
||||
mock_ppstructure.assert_called_once()
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
|
||||
assert call_kwargs.get('layout_detection_model_name') == 'picodet_lcnet_x1_0_fgd_layout_cdla'
|
||||
|
||||
def test_none_layout_model_uses_chinese_default(self):
|
||||
"""Verify None layout_model defaults to 'chinese' model"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch.object(ocr_service, 'structure_engine', None):
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# Pass None for layout_model
|
||||
engine = ocr_service._ensure_structure_engine(layout_model=None)
|
||||
|
||||
mock_ppstructure.assert_called_once()
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
|
||||
# Should use 'chinese' model as default
|
||||
assert call_kwargs.get('layout_detection_model_name') == 'PP-DocLayout-S'
|
||||
|
||||
|
||||
class TestLayoutModelCaching:
|
||||
"""Test engine caching behavior with layout models"""
|
||||
|
||||
def test_same_layout_model_uses_cached_engine(self):
|
||||
"""Verify same layout model reuses cached engine"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# First call with 'chinese'
|
||||
engine1 = ocr_service._ensure_structure_engine(layout_model='chinese')
|
||||
|
||||
# Second call with same model should use cache
|
||||
engine2 = ocr_service._ensure_structure_engine(layout_model='chinese')
|
||||
|
||||
# Verify only one engine was created
|
||||
assert mock_ppstructure.call_count == 1
|
||||
assert engine1 is engine2
|
||||
|
||||
def test_different_layout_model_creates_new_engine(self):
|
||||
"""Verify different layout model creates new engine"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine1 = Mock()
|
||||
mock_engine2 = Mock()
|
||||
mock_ppstructure.side_effect = [mock_engine1, mock_engine2]
|
||||
|
||||
# First call with 'chinese'
|
||||
engine1 = ocr_service._ensure_structure_engine(layout_model='chinese')
|
||||
|
||||
# Second call with 'cdla' should create new engine
|
||||
engine2 = ocr_service._ensure_structure_engine(layout_model='cdla')
|
||||
|
||||
# Verify two engines were created
|
||||
assert mock_ppstructure.call_count == 2
|
||||
assert engine1 is not engine2
|
||||
|
||||
|
||||
class TestLayoutModelFlow:
|
||||
"""Test layout model parameter flow through processing pipeline"""
|
||||
|
||||
def test_layout_model_passed_to_engine_creation(self):
|
||||
"""Verify layout_model is passed through to _ensure_structure_engine"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
# Test that _ensure_structure_engine accepts layout_model parameter
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# Call with specific layout_model
|
||||
engine = ocr_service._ensure_structure_engine(layout_model='cdla')
|
||||
|
||||
# Verify correct model was requested
|
||||
mock_ppstructure.assert_called_once()
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
assert call_kwargs.get('layout_detection_model_name') == 'picodet_lcnet_x1_0_fgd_layout_cdla'
|
||||
|
||||
def test_layout_model_default_behavior(self):
|
||||
"""Verify default layout model behavior when None is passed"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# Call without layout_model (None)
|
||||
engine = ocr_service._ensure_structure_engine(layout_model=None)
|
||||
|
||||
# Should use config default (PP-DocLayout-S)
|
||||
mock_ppstructure.assert_called_once()
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
assert call_kwargs.get('layout_detection_model_name') == settings.layout_detection_model_name
|
||||
|
||||
def test_layout_model_unknown_value_falls_back(self):
|
||||
"""Verify unknown layout model falls back to config default"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# Call with unknown layout_model
|
||||
engine = ocr_service._ensure_structure_engine(layout_model='unknown_model')
|
||||
|
||||
# Should use config default
|
||||
mock_ppstructure.assert_called_once()
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
assert call_kwargs.get('layout_detection_model_name') == settings.layout_detection_model_name
|
||||
|
||||
|
||||
class TestLayoutModelLogging:
|
||||
"""Test layout model logging"""
|
||||
|
||||
def test_layout_model_is_logged(self):
|
||||
"""Verify layout model selection is logged"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
with patch('app.services.ocr_service.logger') as mock_logger:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# Call with specific layout_model
|
||||
ocr_service._ensure_structure_engine(layout_model='cdla')
|
||||
|
||||
# Verify logging occurred
|
||||
assert mock_logger.info.call_count >= 1
|
||||
# Check that model name was logged
|
||||
log_calls = [str(call) for call in mock_logger.info.call_args_list]
|
||||
assert any('cdla' in str(call).lower() or 'layout' in str(call).lower() for call in log_calls)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
@@ -1,299 +0,0 @@
|
||||
"""
|
||||
Unit tests for PP-StructureV3 parameter customization
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
# Mock all external dependencies before importing OCRService
|
||||
sys.modules['paddleocr'] = MagicMock()
|
||||
sys.modules['PIL'] = MagicMock()
|
||||
sys.modules['pdf2image'] = MagicMock()
|
||||
|
||||
# Mock paddle with version attribute
|
||||
paddle_mock = MagicMock()
|
||||
paddle_mock.__version__ = '2.5.0'
|
||||
paddle_mock.device.get_device.return_value = 'cpu'
|
||||
paddle_mock.device.get_available_device.return_value = 'cpu'
|
||||
sys.modules['paddle'] = paddle_mock
|
||||
|
||||
# Mock torch
|
||||
torch_mock = MagicMock()
|
||||
torch_mock.cuda.is_available.return_value = False
|
||||
sys.modules['torch'] = torch_mock
|
||||
|
||||
from app.services.ocr_service import OCRService
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
class TestPPStructureParamsValidation:
|
||||
"""Test parameter validation and defaults"""
|
||||
|
||||
def test_default_parameters_used_when_none_provided(self):
|
||||
"""Verify that default settings are used when no custom params provided"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch.object(ocr_service, 'structure_engine', None):
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# Call without custom params
|
||||
engine = ocr_service._ensure_structure_engine(custom_params=None)
|
||||
|
||||
# Verify default settings were used
|
||||
mock_ppstructure.assert_called_once()
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
|
||||
assert call_kwargs['layout_threshold'] == settings.layout_detection_threshold
|
||||
assert call_kwargs['layout_nms'] == settings.layout_nms_threshold
|
||||
assert call_kwargs['text_det_thresh'] == settings.text_det_thresh
|
||||
|
||||
def test_custom_parameters_override_defaults(self):
|
||||
"""Verify that custom parameters override default settings"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
custom_params = {
|
||||
'layout_detection_threshold': 0.1,
|
||||
'layout_nms_threshold': 0.15,
|
||||
'text_det_thresh': 0.25,
|
||||
'layout_merge_bboxes_mode': 'large'
|
||||
}
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# Call with custom params
|
||||
engine = ocr_service._ensure_structure_engine(custom_params=custom_params)
|
||||
|
||||
# Verify custom params were used
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
|
||||
assert call_kwargs['layout_threshold'] == 0.1
|
||||
assert call_kwargs['layout_nms'] == 0.15
|
||||
assert call_kwargs['text_det_thresh'] == 0.25
|
||||
assert call_kwargs['layout_merge_bboxes_mode'] == 'large'
|
||||
|
||||
def test_partial_custom_parameters(self):
|
||||
"""Verify that partial custom params work (custom + defaults mix)"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
custom_params = {
|
||||
'layout_detection_threshold': 0.15,
|
||||
# Other params should use defaults
|
||||
}
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
engine = ocr_service._ensure_structure_engine(custom_params=custom_params)
|
||||
|
||||
call_kwargs = mock_ppstructure.call_args[1]
|
||||
|
||||
# Custom param used
|
||||
assert call_kwargs['layout_threshold'] == 0.15
|
||||
# Default params used
|
||||
assert call_kwargs['layout_nms'] == settings.layout_nms_threshold
|
||||
assert call_kwargs['text_det_thresh'] == settings.text_det_thresh
|
||||
|
||||
def test_custom_params_do_not_cache_engine(self):
|
||||
"""Verify that custom params create a new engine (no caching)"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
custom_params = {'layout_detection_threshold': 0.1}
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine1 = Mock()
|
||||
mock_engine2 = Mock()
|
||||
mock_ppstructure.side_effect = [mock_engine1, mock_engine2]
|
||||
|
||||
# First call with custom params
|
||||
engine1 = ocr_service._ensure_structure_engine(custom_params=custom_params)
|
||||
|
||||
# Second call with same custom params should create NEW engine
|
||||
engine2 = ocr_service._ensure_structure_engine(custom_params=custom_params)
|
||||
|
||||
# Verify two different engines were created
|
||||
assert mock_ppstructure.call_count == 2
|
||||
assert engine1 is mock_engine1
|
||||
assert engine2 is mock_engine2
|
||||
|
||||
def test_default_params_use_cached_engine(self):
|
||||
"""Verify that default params use cached engine"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# First call without custom params
|
||||
engine1 = ocr_service._ensure_structure_engine(custom_params=None)
|
||||
|
||||
# Second call without custom params should use cached engine
|
||||
engine2 = ocr_service._ensure_structure_engine(custom_params=None)
|
||||
|
||||
# Verify only one engine was created (caching works)
|
||||
assert mock_ppstructure.call_count == 1
|
||||
assert engine1 is engine2
|
||||
|
||||
def test_invalid_custom_params_fallback_to_default(self):
|
||||
"""Verify that invalid custom params fall back to default cached engine"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
# Create a cached default engine first
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
default_engine = Mock()
|
||||
mock_ppstructure.return_value = default_engine
|
||||
|
||||
# Initialize default engine
|
||||
ocr_service._ensure_structure_engine(custom_params=None)
|
||||
|
||||
# Now test with invalid custom params that will raise error
|
||||
mock_ppstructure.side_effect = ValueError("Invalid parameter")
|
||||
|
||||
# Should fall back to cached default engine
|
||||
engine = ocr_service._ensure_structure_engine(custom_params={'invalid': 'params'})
|
||||
|
||||
# Should return the default cached engine
|
||||
assert engine is default_engine
|
||||
|
||||
|
||||
class TestPPStructureParamsFlow:
|
||||
"""Test parameter flow through processing pipeline"""
|
||||
|
||||
def test_params_flow_through_process_image(self):
|
||||
"""Verify params flow from process_image to analyze_layout"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
custom_params = {'layout_detection_threshold': 0.12}
|
||||
|
||||
with patch.object(ocr_service, 'get_ocr_engine') as mock_get_ocr:
|
||||
with patch.object(ocr_service, 'analyze_layout') as mock_analyze:
|
||||
mock_get_ocr.return_value = Mock()
|
||||
mock_analyze.return_value = (None, [])
|
||||
|
||||
# Mock OCR result
|
||||
mock_engine = Mock()
|
||||
mock_engine.ocr.return_value = [[[[0, 0], [100, 0], [100, 50], [0, 50]], ('test', 0.9)]]
|
||||
mock_get_ocr.return_value = mock_engine
|
||||
|
||||
# Process with custom params
|
||||
ocr_service.process_image(
|
||||
image_path=Path('/tmp/test.jpg'),
|
||||
detect_layout=True,
|
||||
pp_structure_params=custom_params
|
||||
)
|
||||
|
||||
# Verify params were passed to analyze_layout
|
||||
mock_analyze.assert_called_once()
|
||||
call_kwargs = mock_analyze.call_args[1]
|
||||
assert call_kwargs['pp_structure_params'] == custom_params
|
||||
|
||||
def test_params_flow_through_process_with_dual_track(self):
|
||||
"""Verify params flow through dual-track processing"""
|
||||
ocr_service = OCRService()
|
||||
ocr_service.dual_track_enabled = True
|
||||
|
||||
custom_params = {'text_det_thresh': 0.15}
|
||||
|
||||
with patch.object(ocr_service, 'process_file_traditional') as mock_traditional:
|
||||
with patch('app.services.ocr_service.DocumentTypeDetector') as mock_detector:
|
||||
# Mock detector to return OCR track
|
||||
mock_recommendation = Mock()
|
||||
mock_recommendation.track = 'ocr'
|
||||
mock_recommendation.confidence = 0.9
|
||||
mock_recommendation.reason = 'Test'
|
||||
mock_recommendation.metadata = {}
|
||||
|
||||
mock_detector_instance = Mock()
|
||||
mock_detector_instance.detect.return_value = mock_recommendation
|
||||
mock_detector.return_value = mock_detector_instance
|
||||
|
||||
mock_traditional.return_value = {'status': 'success'}
|
||||
|
||||
# Process with custom params
|
||||
ocr_service.process_with_dual_track(
|
||||
file_path=Path('/tmp/test.pdf'),
|
||||
force_track='ocr',
|
||||
pp_structure_params=custom_params
|
||||
)
|
||||
|
||||
# Verify params were passed to traditional processing
|
||||
mock_traditional.assert_called_once()
|
||||
call_kwargs = mock_traditional.call_args[1]
|
||||
assert call_kwargs['pp_structure_params'] == custom_params
|
||||
|
||||
def test_params_not_passed_to_direct_track(self):
|
||||
"""Verify params are NOT used for direct extraction track"""
|
||||
ocr_service = OCRService()
|
||||
ocr_service.dual_track_enabled = True
|
||||
|
||||
custom_params = {'layout_detection_threshold': 0.1}
|
||||
|
||||
with patch('app.services.ocr_service.DocumentTypeDetector') as mock_detector:
|
||||
with patch('app.services.ocr_service.DirectExtractionEngine') as mock_direct:
|
||||
# Mock detector to return DIRECT track
|
||||
mock_recommendation = Mock()
|
||||
mock_recommendation.track = 'direct'
|
||||
mock_recommendation.confidence = 0.95
|
||||
mock_recommendation.reason = 'Editable PDF'
|
||||
mock_recommendation.metadata = {}
|
||||
|
||||
mock_detector_instance = Mock()
|
||||
mock_detector_instance.detect.return_value = mock_recommendation
|
||||
mock_detector.return_value = mock_detector_instance
|
||||
|
||||
# Mock direct extraction engine
|
||||
mock_direct_instance = Mock()
|
||||
mock_direct_instance.extract.return_value = Mock(
|
||||
document_id='test-id',
|
||||
metadata=Mock(processing_track='direct')
|
||||
)
|
||||
mock_direct.return_value = mock_direct_instance
|
||||
|
||||
# Process with custom params on DIRECT track
|
||||
result = ocr_service.process_with_dual_track(
|
||||
file_path=Path('/tmp/test.pdf'),
|
||||
pp_structure_params=custom_params
|
||||
)
|
||||
|
||||
# Verify direct extraction was used (not OCR)
|
||||
mock_direct_instance.extract.assert_called_once()
|
||||
# PP-StructureV3 params should NOT be passed to direct extraction
|
||||
call_kwargs = mock_direct_instance.extract.call_args[1]
|
||||
assert 'pp_structure_params' not in call_kwargs
|
||||
|
||||
|
||||
class TestPPStructureParamsLogging:
|
||||
"""Test parameter logging"""
|
||||
|
||||
def test_custom_params_are_logged(self):
|
||||
"""Verify custom parameters are logged for debugging"""
|
||||
ocr_service = OCRService()
|
||||
|
||||
custom_params = {
|
||||
'layout_detection_threshold': 0.1,
|
||||
'text_det_thresh': 0.15
|
||||
}
|
||||
|
||||
with patch('app.services.ocr_service.PPStructureV3') as mock_ppstructure:
|
||||
with patch('app.services.ocr_service.logger') as mock_logger:
|
||||
mock_engine = Mock()
|
||||
mock_ppstructure.return_value = mock_engine
|
||||
|
||||
# Call with custom params
|
||||
ocr_service._ensure_structure_engine(custom_params=custom_params)
|
||||
|
||||
# Verify logging
|
||||
assert mock_logger.info.call_count >= 2
|
||||
# Check that custom params were logged
|
||||
log_calls = [str(call) for call in mock_logger.info.call_args_list]
|
||||
assert any('custom' in str(call).lower() for call in log_calls)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Reference in New Issue
Block a user