feat: simplify layout model selection and archive proposals

Changes: - Replace PP-Structure 7-slider parameter UI with simple 3-option layout model selector - Add layout model mapping: chinese (PP-DocLayout-S), default (PubLayNet), cdla - Add LayoutModelSelector component and zh-TW translations - Fix "default" model behavior with sentinel value for PubLayNet - Add gap filling service for OCR track coverage improvement - Add PP-Structure debug utilities - Archive completed/incomplete proposals: - add-ocr-track-gap-filling (complete) - fix-ocr-track-table-rendering (incomplete) - simplify-ppstructure-model-selection (22/25 tasks) - Add new layout model tests, archive old PP-Structure param tests - Update OpenSpec ocr-processing spec with layout model requirements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 13:27:00 +08:00
parent c65df754cf
commit 59206a6ab8
35 changed files with 3621 additions and 658 deletions
--- a/backend/tests/archived/test_ppstructure_params_e2e.py
+++ b/backend/tests/archived/test_ppstructure_params_e2e.py
@@ -0,0 +1,417 @@
+"""
+End-to-End tests for PP-StructureV3 parameter customization
+Tests full workflow: Upload → Set params → Process → Verify results
+"""
+
+import pytest
+import requests
+import time
+import json
+from pathlib import Path
+from typing import Optional, Dict
+
+# Test configuration
+API_BASE_URL = "http://localhost:8000/api/v2"
+TEST_USER_EMAIL = "ymirliu@panjit.com.tw"
+TEST_USER_PASSWORD = "4RFV5tgb6yhn"
+
+# Test documents (assuming these exist in demo_docs/)
+TEST_DOCUMENTS = {
+    'simple_text': 'demo_docs/simple_text.pdf',
+    'complex_diagram': 'demo_docs/complex_diagram.pdf',
+    'small_text': 'demo_docs/small_text.pdf',
+}
+
+
+class TestClient:
+    """Helper class for API testing with authentication"""
+
+    def __init__(self, base_url: str = API_BASE_URL):
+        self.base_url = base_url
+        self.session = requests.Session()
+        self.access_token: Optional[str] = None
+
+    def login(self, email: str, password: str) -> bool:
+        """Login and get access token"""
+        try:
+            response = self.session.post(
+                f"{self.base_url}/auth/login",
+                json={"email": email, "password": password}
+            )
+            response.raise_for_status()
+            data = response.json()
+            self.access_token = data['access_token']
+            self.session.headers.update({
+                'Authorization': f'Bearer {self.access_token}'
+            })
+            return True
+        except Exception as e:
+            print(f"Login failed: {e}")
+            return False
+
+    def create_task(self, filename: str, file_type: str) -> Optional[str]:
+        """Create a task and return task_id"""
+        try:
+            response = self.session.post(
+                f"{self.base_url}/tasks",
+                json={"filename": filename, "file_type": file_type}
+            )
+            response.raise_for_status()
+            return response.json()['task_id']
+        except Exception as e:
+            print(f"Create task failed: {e}")
+            return None
+
+    def upload_file(self, task_id: str, file_path: Path) -> bool:
+        """Upload file to task"""
+        try:
+            with open(file_path, 'rb') as f:
+                files = {'file': (file_path.name, f, 'application/pdf')}
+                response = self.session.post(
+                    f"{self.base_url}/upload/{task_id}",
+                    files=files
+                )
+                response.raise_for_status()
+                return True
+        except Exception as e:
+            print(f"Upload failed: {e}")
+            return False
+
+    def start_task(self, task_id: str, pp_structure_params: Optional[Dict] = None) -> bool:
+        """Start task processing with optional custom parameters"""
+        try:
+            body = {
+                "use_dual_track": True,
+                "language": "ch"
+            }
+            if pp_structure_params:
+                body["pp_structure_params"] = pp_structure_params
+
+            response = self.session.post(
+                f"{self.base_url}/tasks/{task_id}/start",
+                json=body
+            )
+            response.raise_for_status()
+            return True
+        except Exception as e:
+            print(f"Start task failed: {e}")
+            return False
+
+    def get_task_status(self, task_id: str) -> Optional[Dict]:
+        """Get task status"""
+        try:
+            response = self.session.get(f"{self.base_url}/tasks/{task_id}")
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            print(f"Get task status failed: {e}")
+            return None
+
+    def wait_for_completion(self, task_id: str, timeout: int = 300) -> Optional[Dict]:
+        """Wait for task to complete (max timeout seconds)"""
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            task = self.get_task_status(task_id)
+            if task and task['status'] in ['completed', 'failed']:
+                return task
+            time.sleep(2)
+        return None
+
+    def download_result_json(self, task_id: str) -> Optional[Dict]:
+        """Download and parse result JSON"""
+        try:
+            response = self.session.get(f"{self.base_url}/tasks/{task_id}/download/json")
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            print(f"Download result failed: {e}")
+            return None
+
+
+@pytest.fixture(scope="module")
+def client():
+    """Create authenticated test client"""
+    client = TestClient()
+    if not client.login(TEST_USER_EMAIL, TEST_USER_PASSWORD):
+        pytest.skip("Authentication failed - check credentials or server")
+    return client
+
+
+@pytest.mark.e2e
+class TestPPStructureParamsE2E:
+    """End-to-end tests for PP-StructureV3 parameter customization"""
+
+    def test_default_parameters_workflow(self, client: TestClient):
+        """Test complete workflow with default parameters"""
+        # Find a test document
+        test_doc = None
+        for doc_path in TEST_DOCUMENTS.values():
+            if Path(doc_path).exists():
+                test_doc = Path(doc_path)
+                break
+
+        if not test_doc:
+            pytest.skip("No test documents found")
+
+        # Step 1: Create task
+        task_id = client.create_task(test_doc.name, "application/pdf")
+        assert task_id is not None, "Failed to create task"
+        print(f"✓ Created task: {task_id}")
+
+        # Step 2: Upload file
+        success = client.upload_file(task_id, test_doc)
+        assert success, "Failed to upload file"
+        print(f"✓ Uploaded file: {test_doc.name}")
+
+        # Step 3: Start processing (no custom params)
+        success = client.start_task(task_id, pp_structure_params=None)
+        assert success, "Failed to start task"
+        print("✓ Started processing with default parameters")
+
+        # Step 4: Wait for completion
+        result = client.wait_for_completion(task_id, timeout=180)
+        assert result is not None, "Task did not complete in time"
+        assert result['status'] == 'completed', f"Task failed: {result.get('error_message')}"
+        print(f"✓ Task completed in {result.get('processing_time_ms', 0) / 1000:.2f}s")
+
+        # Step 5: Verify results
+        result_json = client.download_result_json(task_id)
+        assert result_json is not None, "Failed to download results"
+        assert 'text_regions' in result_json or 'elements' in result_json
+        print(f"✓ Results verified (default parameters)")
+
+    def test_high_quality_preset_workflow(self, client: TestClient):
+        """Test workflow with high-quality preset parameters"""
+        # Find a test document
+        test_doc = None
+        for doc_path in TEST_DOCUMENTS.values():
+            if Path(doc_path).exists():
+                test_doc = Path(doc_path)
+                break
+
+        if not test_doc:
+            pytest.skip("No test documents found")
+
+        # High-quality preset
+        high_quality_params = {
+            "layout_detection_threshold": 0.1,
+            "layout_nms_threshold": 0.15,
+            "text_det_thresh": 0.1,
+            "text_det_box_thresh": 0.2,
+            "layout_merge_bboxes_mode": "small"
+        }
+
+        # Create and process task
+        task_id = client.create_task(test_doc.name, "application/pdf")
+        assert task_id is not None
+        print(f"✓ Created task: {task_id}")
+
+        client.upload_file(task_id, test_doc)
+        print(f"✓ Uploaded file: {test_doc.name}")
+
+        # Start with custom parameters
+        success = client.start_task(task_id, pp_structure_params=high_quality_params)
+        assert success, "Failed to start task with custom params"
+        print("✓ Started processing with HIGH-QUALITY preset")
+
+        # Wait for completion
+        result = client.wait_for_completion(task_id, timeout=180)
+        assert result is not None, "Task did not complete in time"
+        assert result['status'] == 'completed', f"Task failed: {result.get('error_message')}"
+        print(f"✓ Task completed in {result.get('processing_time_ms', 0) / 1000:.2f}s")
+
+        # Verify results
+        result_json = client.download_result_json(task_id)
+        assert result_json is not None
+        print(f"✓ Results verified (high-quality preset)")
+
+    def test_fast_preset_workflow(self, client: TestClient):
+        """Test workflow with fast preset parameters"""
+        test_doc = None
+        for doc_path in TEST_DOCUMENTS.values():
+            if Path(doc_path).exists():
+                test_doc = Path(doc_path)
+                break
+
+        if not test_doc:
+            pytest.skip("No test documents found")
+
+        # Fast preset
+        fast_params = {
+            "layout_detection_threshold": 0.3,
+            "layout_nms_threshold": 0.3,
+            "text_det_thresh": 0.3,
+            "text_det_box_thresh": 0.4,
+            "layout_merge_bboxes_mode": "large"
+        }
+
+        # Create and process task
+        task_id = client.create_task(test_doc.name, "application/pdf")
+        assert task_id is not None
+        print(f"✓ Created task: {task_id}")
+
+        client.upload_file(task_id, test_doc)
+        print(f"✓ Uploaded file: {test_doc.name}")
+
+        # Start with fast parameters
+        success = client.start_task(task_id, pp_structure_params=fast_params)
+        assert success
+        print("✓ Started processing with FAST preset")
+
+        # Wait for completion
+        result = client.wait_for_completion(task_id, timeout=180)
+        assert result is not None
+        assert result['status'] == 'completed'
+        print(f"✓ Task completed in {result.get('processing_time_ms', 0) / 1000:.2f}s")
+
+        # Verify results
+        result_json = client.download_result_json(task_id)
+        assert result_json is not None
+        print(f"✓ Results verified (fast preset)")
+
+    def test_compare_default_vs_custom_params(self, client: TestClient):
+        """Compare results between default and custom parameters"""
+        test_doc = None
+        for doc_path in TEST_DOCUMENTS.values():
+            if Path(doc_path).exists():
+                test_doc = Path(doc_path)
+                break
+
+        if not test_doc:
+            pytest.skip("No test documents found")
+
+        print(f"\n=== Comparing Default vs Custom Parameters ===")
+        print(f"Document: {test_doc.name}\n")
+
+        # Test 1: Default parameters
+        task_id_default = client.create_task(test_doc.name, "application/pdf")
+        client.upload_file(task_id_default, test_doc)
+        client.start_task(task_id_default, pp_structure_params=None)
+
+        result_default = client.wait_for_completion(task_id_default, timeout=180)
+        assert result_default and result_default['status'] == 'completed'
+
+        result_json_default = client.download_result_json(task_id_default)
+        time_default = result_default['processing_time_ms'] / 1000
+
+        # Count elements
+        elements_default = 0
+        if 'text_regions' in result_json_default:
+            elements_default = len(result_json_default['text_regions'])
+        elif 'elements' in result_json_default:
+            elements_default = len(result_json_default['elements'])
+
+        print(f"DEFAULT PARAMS:")
+        print(f"  Processing time: {time_default:.2f}s")
+        print(f"  Elements detected: {elements_default}")
+
+        # Test 2: High-quality parameters
+        custom_params = {
+            "layout_detection_threshold": 0.15,
+            "text_det_thresh": 0.15
+        }
+
+        task_id_custom = client.create_task(test_doc.name, "application/pdf")
+        client.upload_file(task_id_custom, test_doc)
+        client.start_task(task_id_custom, pp_structure_params=custom_params)
+
+        result_custom = client.wait_for_completion(task_id_custom, timeout=180)
+        assert result_custom and result_custom['status'] == 'completed'
+
+        result_json_custom = client.download_result_json(task_id_custom)
+        time_custom = result_custom['processing_time_ms'] / 1000
+
+        # Count elements
+        elements_custom = 0
+        if 'text_regions' in result_json_custom:
+            elements_custom = len(result_json_custom['text_regions'])
+        elif 'elements' in result_json_custom:
+            elements_custom = len(result_json_custom['elements'])
+
+        print(f"\nCUSTOM PARAMS (lower thresholds):")
+        print(f"  Processing time: {time_custom:.2f}s")
+        print(f"  Elements detected: {elements_custom}")
+
+        print(f"\nDIFFERENCE:")
+        print(f"  Time delta: {abs(time_custom - time_default):.2f}s")
+        print(f"  Element delta: {abs(elements_custom - elements_default)} elements")
+        print(f"  Custom detected {elements_custom - elements_default:+d} more elements")
+
+        # Both should complete successfully
+        assert result_default['status'] == 'completed'
+        assert result_custom['status'] == 'completed'
+
+        # Custom params with lower thresholds should detect more elements
+        # (this might not always be true, but it's the expected behavior)
+        print(f"\n✓ Comparison complete")
+
+
+@pytest.mark.e2e
+@pytest.mark.slow
+class TestPPStructureParamsPerformance:
+    """Performance tests for PP-StructureV3 parameters"""
+
+    def test_parameter_initialization_overhead(self, client: TestClient):
+        """Measure overhead of creating engine with custom parameters"""
+        test_doc = None
+        for doc_path in TEST_DOCUMENTS.values():
+            if Path(doc_path).exists():
+                test_doc = Path(doc_path)
+                break
+
+        if not test_doc:
+            pytest.skip("No test documents found")
+
+        print(f"\n=== Testing Parameter Initialization Overhead ===")
+
+        # Measure default (cached engine)
+        times_default = []
+        for i in range(3):
+            task_id = client.create_task(test_doc.name, "application/pdf")
+            client.upload_file(task_id, test_doc)
+
+            start = time.time()
+            client.start_task(task_id, pp_structure_params=None)
+            result = client.wait_for_completion(task_id, timeout=180)
+            end = time.time()
+
+            if result and result['status'] == 'completed':
+                times_default.append(end - start)
+                print(f"  Default run {i+1}: {end - start:.2f}s")
+
+        avg_default = sum(times_default) / len(times_default) if times_default else 0
+
+        # Measure custom params (no cache)
+        times_custom = []
+        custom_params = {"layout_detection_threshold": 0.15}
+
+        for i in range(3):
+            task_id = client.create_task(test_doc.name, "application/pdf")
+            client.upload_file(task_id, test_doc)
+
+            start = time.time()
+            client.start_task(task_id, pp_structure_params=custom_params)
+            result = client.wait_for_completion(task_id, timeout=180)
+            end = time.time()
+
+            if result and result['status'] == 'completed':
+                times_custom.append(end - start)
+                print(f"  Custom run {i+1}: {end - start:.2f}s")
+
+        avg_custom = sum(times_custom) / len(times_custom) if times_custom else 0
+
+        print(f"\nRESULTS:")
+        print(f"  Average time (default): {avg_default:.2f}s")
+        print(f"  Average time (custom):  {avg_custom:.2f}s")
+        print(f"  Overhead: {avg_custom - avg_default:.2f}s ({(avg_custom - avg_default) / avg_default * 100:.1f}%)")
+
+        # Overhead should be reasonable (< 20%)
+        if avg_default > 0:
+            overhead_percent = (avg_custom - avg_default) / avg_default * 100
+            assert overhead_percent < 50, f"Custom parameter overhead too high: {overhead_percent:.1f}%"
+            print(f"✓ Overhead within acceptable range")
+
+
+if __name__ == '__main__':
+    # Run with: pytest backend/tests/e2e/test_ppstructure_params_e2e.py -v -s -m e2e
+    pytest.main([__file__, '-v', '-s', '-m', 'e2e'])