feat: simplify layout model selection and archive proposals
Changes: - Replace PP-Structure 7-slider parameter UI with simple 3-option layout model selector - Add layout model mapping: chinese (PP-DocLayout-S), default (PubLayNet), cdla - Add LayoutModelSelector component and zh-TW translations - Fix "default" model behavior with sentinel value for PubLayNet - Add gap filling service for OCR track coverage improvement - Add PP-Structure debug utilities - Archive completed/incomplete proposals: - add-ocr-track-gap-filling (complete) - fix-ocr-track-table-rendering (incomplete) - simplify-ppstructure-model-selection (22/25 tasks) - Add new layout model tests, archive old PP-Structure param tests - Update OpenSpec ocr-processing spec with layout model requirements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
349
backend/tests/archived/test_ppstructure_params_api.py
Normal file
349
backend/tests/archived/test_ppstructure_params_api.py
Normal file
@@ -0,0 +1,349 @@
|
||||
"""
|
||||
API integration tests for PP-StructureV3 parameter customization
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import json
|
||||
from fastapi.testclient import TestClient
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch
|
||||
from app.main import app
|
||||
from app.core.database import get_db
|
||||
from app.models.user import User
|
||||
from app.models.task import Task, TaskStatus, TaskFile
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""Create test client"""
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_user(db_session):
|
||||
"""Create test user"""
|
||||
user = User(
|
||||
email="test@example.com",
|
||||
hashed_password="test_hash",
|
||||
is_active=True
|
||||
)
|
||||
db_session.add(user)
|
||||
db_session.commit()
|
||||
db_session.refresh(user)
|
||||
return user
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_task(db_session, test_user):
|
||||
"""Create test task with uploaded file"""
|
||||
task = Task(
|
||||
user_id=test_user.id,
|
||||
task_id="test-task-123",
|
||||
filename="test.pdf",
|
||||
status=TaskStatus.PENDING
|
||||
)
|
||||
db_session.add(task)
|
||||
db_session.commit()
|
||||
db_session.refresh(task)
|
||||
|
||||
# Add task file
|
||||
task_file = TaskFile(
|
||||
task_id=task.id,
|
||||
original_name="test.pdf",
|
||||
stored_path="/tmp/test.pdf",
|
||||
file_size=1024,
|
||||
mime_type="application/pdf"
|
||||
)
|
||||
db_session.add(task_file)
|
||||
db_session.commit()
|
||||
|
||||
return task
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def auth_headers(test_user):
|
||||
"""Create auth headers for API calls"""
|
||||
# Mock JWT token
|
||||
return {"Authorization": "Bearer test_token"}
|
||||
|
||||
|
||||
class TestProcessingOptionsSchema:
|
||||
"""Test ProcessingOptions schema validation"""
|
||||
|
||||
def test_processing_options_accepts_pp_structure_params(self):
|
||||
"""Verify ProcessingOptions schema accepts pp_structure_params"""
|
||||
from app.schemas.task import ProcessingOptions, PPStructureV3Params
|
||||
|
||||
# Valid params
|
||||
params = PPStructureV3Params(
|
||||
layout_detection_threshold=0.15,
|
||||
layout_nms_threshold=0.2,
|
||||
text_det_thresh=0.25,
|
||||
layout_merge_bboxes_mode='small'
|
||||
)
|
||||
|
||||
options = ProcessingOptions(
|
||||
use_dual_track=True,
|
||||
language='ch',
|
||||
pp_structure_params=params
|
||||
)
|
||||
|
||||
assert options.pp_structure_params is not None
|
||||
assert options.pp_structure_params.layout_detection_threshold == 0.15
|
||||
|
||||
def test_ppstructure_params_validation_min_max(self):
|
||||
"""Verify parameter validation (min/max constraints)"""
|
||||
from app.schemas.task import PPStructureV3Params
|
||||
from pydantic import ValidationError
|
||||
|
||||
# Invalid: threshold > 1
|
||||
with pytest.raises(ValidationError):
|
||||
PPStructureV3Params(layout_detection_threshold=1.5)
|
||||
|
||||
# Invalid: threshold < 0
|
||||
with pytest.raises(ValidationError):
|
||||
PPStructureV3Params(layout_nms_threshold=-0.1)
|
||||
|
||||
# Valid: within range
|
||||
params = PPStructureV3Params(
|
||||
layout_detection_threshold=0.5,
|
||||
layout_nms_threshold=0.3
|
||||
)
|
||||
assert params.layout_detection_threshold == 0.5
|
||||
|
||||
def test_ppstructure_params_merge_mode_validation(self):
|
||||
"""Verify merge mode validation"""
|
||||
from app.schemas.task import PPStructureV3Params
|
||||
from pydantic import ValidationError
|
||||
|
||||
# Valid modes
|
||||
for mode in ['small', 'large', 'union']:
|
||||
params = PPStructureV3Params(layout_merge_bboxes_mode=mode)
|
||||
assert params.layout_merge_bboxes_mode == mode
|
||||
|
||||
# Invalid mode
|
||||
with pytest.raises(ValidationError):
|
||||
PPStructureV3Params(layout_merge_bboxes_mode='invalid')
|
||||
|
||||
def test_ppstructure_params_optional_fields(self):
|
||||
"""Verify all fields are optional"""
|
||||
from app.schemas.task import PPStructureV3Params
|
||||
|
||||
# Empty params should be valid
|
||||
params = PPStructureV3Params()
|
||||
assert params.model_dump(exclude_none=True) == {}
|
||||
|
||||
# Partial params should be valid
|
||||
params = PPStructureV3Params(layout_detection_threshold=0.2)
|
||||
data = params.model_dump(exclude_none=True)
|
||||
assert 'layout_detection_threshold' in data
|
||||
assert 'layout_nms_threshold' not in data
|
||||
|
||||
|
||||
class TestStartTaskEndpoint:
|
||||
"""Test /tasks/{task_id}/start endpoint with PP-StructureV3 params"""
|
||||
|
||||
@patch('app.routers.tasks.process_task_ocr')
|
||||
def test_start_task_with_custom_params(self, mock_process_ocr, client, test_task, auth_headers, db_session):
|
||||
"""Verify custom PP-StructureV3 params are accepted and passed to OCR service"""
|
||||
|
||||
# Override get_db dependency
|
||||
def override_get_db():
|
||||
try:
|
||||
yield db_session
|
||||
finally:
|
||||
pass
|
||||
|
||||
# Override auth dependency
|
||||
def override_get_current_user():
|
||||
return test_task.user
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
from app.core.deps import get_current_user
|
||||
app.dependency_overrides[get_current_user] = override_get_current_user
|
||||
|
||||
# Request body with custom params
|
||||
request_body = {
|
||||
"use_dual_track": True,
|
||||
"language": "ch",
|
||||
"pp_structure_params": {
|
||||
"layout_detection_threshold": 0.15,
|
||||
"layout_nms_threshold": 0.2,
|
||||
"text_det_thresh": 0.25,
|
||||
"layout_merge_bboxes_mode": "small"
|
||||
}
|
||||
}
|
||||
|
||||
# Make API call
|
||||
response = client.post(
|
||||
f"/api/v2/tasks/{test_task.task_id}/start",
|
||||
json=request_body
|
||||
)
|
||||
|
||||
# Verify response
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data['status'] == 'processing'
|
||||
|
||||
# Verify background task was called with custom params
|
||||
mock_process_ocr.assert_called_once()
|
||||
call_kwargs = mock_process_ocr.call_args[1]
|
||||
|
||||
assert 'pp_structure_params' in call_kwargs
|
||||
assert call_kwargs['pp_structure_params']['layout_detection_threshold'] == 0.15
|
||||
assert call_kwargs['pp_structure_params']['text_det_thresh'] == 0.25
|
||||
|
||||
# Clean up
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
@patch('app.routers.tasks.process_task_ocr')
|
||||
def test_start_task_without_custom_params(self, mock_process_ocr, client, test_task, auth_headers, db_session):
|
||||
"""Verify task can start without custom params (backward compatibility)"""
|
||||
|
||||
# Override dependencies
|
||||
def override_get_db():
|
||||
try:
|
||||
yield db_session
|
||||
finally:
|
||||
pass
|
||||
|
||||
def override_get_current_user():
|
||||
return test_task.user
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
from app.core.deps import get_current_user
|
||||
app.dependency_overrides[get_current_user] = override_get_current_user
|
||||
|
||||
# Request without pp_structure_params
|
||||
request_body = {
|
||||
"use_dual_track": True,
|
||||
"language": "ch"
|
||||
}
|
||||
|
||||
response = client.post(
|
||||
f"/api/v2/tasks/{test_task.task_id}/start",
|
||||
json=request_body
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify background task was called
|
||||
mock_process_ocr.assert_called_once()
|
||||
call_kwargs = mock_process_ocr.call_args[1]
|
||||
|
||||
# pp_structure_params should be None (not provided)
|
||||
assert call_kwargs['pp_structure_params'] is None
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
@patch('app.routers.tasks.process_task_ocr')
|
||||
def test_start_task_with_partial_params(self, mock_process_ocr, client, test_task, auth_headers, db_session):
|
||||
"""Verify partial custom params are accepted"""
|
||||
|
||||
# Override dependencies
|
||||
def override_get_db():
|
||||
try:
|
||||
yield db_session
|
||||
finally:
|
||||
pass
|
||||
|
||||
def override_get_current_user():
|
||||
return test_task.user
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
from app.core.deps import get_current_user
|
||||
app.dependency_overrides[get_current_user] = override_get_current_user
|
||||
|
||||
# Request with only some params
|
||||
request_body = {
|
||||
"use_dual_track": True,
|
||||
"pp_structure_params": {
|
||||
"layout_detection_threshold": 0.1
|
||||
# Other params omitted
|
||||
}
|
||||
}
|
||||
|
||||
response = client.post(
|
||||
f"/api/v2/tasks/{test_task.task_id}/start",
|
||||
json=request_body
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
# Verify only specified param was included
|
||||
mock_process_ocr.assert_called_once()
|
||||
call_kwargs = mock_process_ocr.call_args[1]
|
||||
pp_params = call_kwargs['pp_structure_params']
|
||||
|
||||
assert 'layout_detection_threshold' in pp_params
|
||||
assert 'layout_nms_threshold' not in pp_params
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
def test_start_task_with_invalid_params(self, client, test_task, db_session):
|
||||
"""Verify invalid params return 422 validation error"""
|
||||
|
||||
# Override dependencies
|
||||
def override_get_db():
|
||||
try:
|
||||
yield db_session
|
||||
finally:
|
||||
pass
|
||||
|
||||
def override_get_current_user():
|
||||
return test_task.user
|
||||
|
||||
app.dependency_overrides[get_db] = override_get_db
|
||||
from app.core.deps import get_current_user
|
||||
app.dependency_overrides[get_current_user] = override_get_current_user
|
||||
|
||||
# Request with invalid threshold (> 1)
|
||||
request_body = {
|
||||
"use_dual_track": True,
|
||||
"pp_structure_params": {
|
||||
"layout_detection_threshold": 1.5 # Invalid!
|
||||
}
|
||||
}
|
||||
|
||||
response = client.post(
|
||||
f"/api/v2/tasks/{test_task.task_id}/start",
|
||||
json=request_body
|
||||
)
|
||||
|
||||
# Should return validation error
|
||||
assert response.status_code == 422
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
class TestOpenAPISchema:
|
||||
"""Test OpenAPI schema includes PP-StructureV3 params"""
|
||||
|
||||
def test_openapi_schema_includes_ppstructure_params(self, client):
|
||||
"""Verify OpenAPI schema documents PP-StructureV3 parameters"""
|
||||
response = client.get("/openapi.json")
|
||||
assert response.status_code == 200
|
||||
|
||||
schema = response.json()
|
||||
|
||||
# Check PPStructureV3Params schema exists
|
||||
assert 'PPStructureV3Params' in schema['components']['schemas']
|
||||
|
||||
params_schema = schema['components']['schemas']['PPStructureV3Params']
|
||||
|
||||
# Verify all 7 parameters are documented
|
||||
assert 'layout_detection_threshold' in params_schema['properties']
|
||||
assert 'layout_nms_threshold' in params_schema['properties']
|
||||
assert 'layout_merge_bboxes_mode' in params_schema['properties']
|
||||
assert 'layout_unclip_ratio' in params_schema['properties']
|
||||
assert 'text_det_thresh' in params_schema['properties']
|
||||
assert 'text_det_box_thresh' in params_schema['properties']
|
||||
assert 'text_det_unclip_ratio' in params_schema['properties']
|
||||
|
||||
# Verify ProcessingOptions includes pp_structure_params
|
||||
options_schema = schema['components']['schemas']['ProcessingOptions']
|
||||
assert 'pp_structure_params' in options_schema['properties']
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Reference in New Issue
Block a user