feat: upgrade PP-StructureV3 models to latest versions

- Layout: PP-DocLayout-S → PP-DocLayout_plus-L (83.2% mAP)
- Table: Single model → Dual SLANeXt (wired/wireless)
- Formula: PP-FormulaNet_plus-L for enhanced recognition
- Add preprocessing flags support (orientation, unwarping)
- Update frontend i18n descriptions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-27 14:21:24 +08:00
parent 59206a6ab8
commit 6235280c45
9 changed files with 504 additions and 25 deletions

View File

@@ -40,8 +40,8 @@ class TestLayoutModelMapping:
assert 'cdla' in LAYOUT_MODEL_MAPPING
def test_chinese_model_maps_to_pp_doclayout(self):
"""Verify 'chinese' maps to PP-DocLayout-S"""
assert LAYOUT_MODEL_MAPPING['chinese'] == 'PP-DocLayout-S'
"""Verify 'chinese' maps to PP-DocLayout_plus-L"""
assert LAYOUT_MODEL_MAPPING['chinese'] == 'PP-DocLayout_plus-L'
def test_default_model_maps_to_publaynet_sentinel(self):
"""Verify 'default' maps to sentinel value for PubLayNet default"""
@@ -57,7 +57,7 @@ class TestLayoutModelEngine:
"""Test engine creation with different layout models"""
def test_chinese_model_creates_engine_with_pp_doclayout(self):
"""Verify 'chinese' layout model uses PP-DocLayout-S"""
"""Verify 'chinese' layout model uses PP-DocLayout_plus-L"""
ocr_service = OCRService()
with patch.object(ocr_service, 'structure_engine', None):
@@ -70,7 +70,7 @@ class TestLayoutModelEngine:
mock_ppstructure.assert_called_once()
call_kwargs = mock_ppstructure.call_args[1]
assert call_kwargs.get('layout_detection_model_name') == 'PP-DocLayout-S'
assert call_kwargs.get('layout_detection_model_name') == 'PP-DocLayout_plus-L'
def test_default_model_creates_engine_without_model_name(self):
"""Verify 'default' layout model does not specify model name (uses default)"""
@@ -121,7 +121,7 @@ class TestLayoutModelEngine:
call_kwargs = mock_ppstructure.call_args[1]
# Should use 'chinese' model as default
assert call_kwargs.get('layout_detection_model_name') == 'PP-DocLayout-S'
assert call_kwargs.get('layout_detection_model_name') == 'PP-DocLayout_plus-L'
class TestLayoutModelCaching: