first
This commit is contained in:
286
backend/test_services.py
Normal file
286
backend/test_services.py
Normal file
@@ -0,0 +1,286 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tool_OCR - Service Layer Integration Test
|
||||
Tests core services before API implementation
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
# Add backend to path
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from app.core.config import settings
|
||||
from app.core.database import engine, SessionLocal, Base
|
||||
from app.models.user import User
|
||||
from app.models.ocr import OCRBatch, OCRFile, OCRResult, FileStatus, BatchStatus
|
||||
from app.services.preprocessor import DocumentPreprocessor
|
||||
from app.services.ocr_service import OCRService
|
||||
from app.services.pdf_generator import PDFGenerator
|
||||
from app.services.file_manager import FileManager
|
||||
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ServiceTester:
|
||||
"""Service layer integration tester"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize tester"""
|
||||
self.db = SessionLocal()
|
||||
self.preprocessor = DocumentPreprocessor()
|
||||
self.ocr_service = OCRService()
|
||||
self.pdf_generator = PDFGenerator()
|
||||
self.file_manager = FileManager()
|
||||
self.test_results = {
|
||||
"database": False,
|
||||
"preprocessor": False,
|
||||
"ocr_engine": False,
|
||||
"pdf_generator": False,
|
||||
"file_manager": False,
|
||||
}
|
||||
|
||||
def cleanup(self):
|
||||
"""Cleanup resources"""
|
||||
self.db.close()
|
||||
|
||||
def test_database_connection(self) -> bool:
|
||||
"""Test 1: Database connection and models"""
|
||||
try:
|
||||
logger.info("=" * 80)
|
||||
logger.info("TEST 1: Database Connection")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Test connection
|
||||
from sqlalchemy import text
|
||||
self.db.execute(text("SELECT 1"))
|
||||
logger.info("✓ Database connection successful")
|
||||
|
||||
# Check if tables exist
|
||||
from sqlalchemy import inspect
|
||||
inspector = inspect(engine)
|
||||
tables = inspector.get_table_names()
|
||||
|
||||
required_tables = [
|
||||
'paddle_ocr_users',
|
||||
'paddle_ocr_batches',
|
||||
'paddle_ocr_files',
|
||||
'paddle_ocr_results',
|
||||
'paddle_ocr_export_rules',
|
||||
'paddle_ocr_translation_configs'
|
||||
]
|
||||
|
||||
missing_tables = [t for t in required_tables if t not in tables]
|
||||
if missing_tables:
|
||||
logger.error(f"✗ Missing tables: {missing_tables}")
|
||||
return False
|
||||
|
||||
logger.info(f"✓ All required tables exist: {', '.join(required_tables)}")
|
||||
|
||||
# Test creating a test user (will rollback)
|
||||
test_user = User(
|
||||
username=f"test_user_{datetime.now().timestamp()}",
|
||||
email=f"test_{datetime.now().timestamp()}@example.com",
|
||||
password_hash="test_hash_123",
|
||||
is_active=True,
|
||||
is_admin=False
|
||||
)
|
||||
self.db.add(test_user)
|
||||
self.db.flush()
|
||||
logger.info(f"✓ Test user created with ID: {test_user.id}")
|
||||
|
||||
self.db.rollback() # Don't actually save test user
|
||||
logger.info("✓ Database test completed successfully\n")
|
||||
|
||||
self.test_results["database"] = True
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Database test failed: {e}\n")
|
||||
return False
|
||||
|
||||
def test_preprocessor(self) -> bool:
|
||||
"""Test 2: Document preprocessor"""
|
||||
try:
|
||||
logger.info("=" * 80)
|
||||
logger.info("TEST 2: Document Preprocessor")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Check supported formats
|
||||
formats = ['.png', '.jpg', '.jpeg', '.pdf']
|
||||
logger.info(f"✓ Supported formats: {formats}")
|
||||
|
||||
# Check max file size
|
||||
max_size_mb = settings.max_upload_size / (1024 * 1024)
|
||||
logger.info(f"✓ Max upload size: {max_size_mb} MB")
|
||||
|
||||
logger.info("✓ Preprocessor initialized successfully\n")
|
||||
|
||||
self.test_results["preprocessor"] = True
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Preprocessor test failed: {e}\n")
|
||||
return False
|
||||
|
||||
def test_ocr_engine(self) -> bool:
|
||||
"""Test 3: OCR engine initialization"""
|
||||
try:
|
||||
logger.info("=" * 80)
|
||||
logger.info("TEST 3: OCR Engine (PaddleOCR)")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Test OCR engine lazy loading
|
||||
logger.info("Initializing PaddleOCR engine (this may take a moment)...")
|
||||
ocr_engine = self.ocr_service.get_ocr_engine(lang='ch')
|
||||
logger.info("✓ PaddleOCR engine initialized for Chinese")
|
||||
|
||||
# Test structure engine
|
||||
logger.info("Initializing PP-Structure engine...")
|
||||
structure_engine = self.ocr_service.get_structure_engine()
|
||||
logger.info("✓ PP-Structure engine initialized")
|
||||
|
||||
# Check confidence threshold
|
||||
logger.info(f"✓ Confidence threshold: {self.ocr_service.confidence_threshold}")
|
||||
|
||||
logger.info("✓ OCR engine test completed successfully\n")
|
||||
|
||||
self.test_results["ocr_engine"] = True
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"✗ OCR engine test failed: {e}")
|
||||
logger.error(" Make sure PaddleOCR models are downloaded:")
|
||||
logger.error(" - PaddleOCR will auto-download on first use (~900MB)")
|
||||
logger.error(" - Requires stable internet connection")
|
||||
logger.error("")
|
||||
return False
|
||||
|
||||
def test_pdf_generator(self) -> bool:
|
||||
"""Test 4: PDF generator"""
|
||||
try:
|
||||
logger.info("=" * 80)
|
||||
logger.info("TEST 4: PDF Generator")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Check Pandoc availability
|
||||
pandoc_available = self.pdf_generator.check_pandoc_available()
|
||||
if pandoc_available:
|
||||
logger.info("✓ Pandoc is installed and available")
|
||||
else:
|
||||
logger.warning("⚠ Pandoc not found - will use WeasyPrint fallback")
|
||||
|
||||
# Check available templates
|
||||
templates = self.pdf_generator.get_available_templates()
|
||||
logger.info(f"✓ Available CSS templates: {', '.join(templates.keys())}")
|
||||
|
||||
logger.info("✓ PDF generator test completed successfully\n")
|
||||
|
||||
self.test_results["pdf_generator"] = True
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"✗ PDF generator test failed: {e}\n")
|
||||
return False
|
||||
|
||||
def test_file_manager(self) -> bool:
|
||||
"""Test 5: File manager"""
|
||||
try:
|
||||
logger.info("=" * 80)
|
||||
logger.info("TEST 5: File Manager")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# Check upload directory
|
||||
upload_dir = Path(settings.upload_dir)
|
||||
if upload_dir.exists():
|
||||
logger.info(f"✓ Upload directory exists: {upload_dir}")
|
||||
else:
|
||||
upload_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"✓ Created upload directory: {upload_dir}")
|
||||
|
||||
# Test batch directory creation
|
||||
test_batch_id = 99999 # Use high number to avoid conflicts
|
||||
batch_dir = self.file_manager.create_batch_directory(test_batch_id)
|
||||
logger.info(f"✓ Created test batch directory: {batch_dir}")
|
||||
|
||||
# Check subdirectories
|
||||
subdirs = ["inputs", "outputs/markdown", "outputs/json", "outputs/images", "exports"]
|
||||
for subdir in subdirs:
|
||||
subdir_path = batch_dir / subdir
|
||||
if subdir_path.exists():
|
||||
logger.info(f" ✓ {subdir}")
|
||||
else:
|
||||
logger.error(f" ✗ Missing: {subdir}")
|
||||
return False
|
||||
|
||||
# Cleanup test directory
|
||||
import shutil
|
||||
shutil.rmtree(batch_dir.parent, ignore_errors=True)
|
||||
logger.info("✓ Cleaned up test batch directory")
|
||||
|
||||
logger.info("✓ File manager test completed successfully\n")
|
||||
|
||||
self.test_results["file_manager"] = True
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"✗ File manager test failed: {e}\n")
|
||||
return False
|
||||
|
||||
def run_all_tests(self):
|
||||
"""Run all service tests"""
|
||||
logger.info("\n" + "=" * 80)
|
||||
logger.info("Tool_OCR Service Layer Integration Test")
|
||||
logger.info("=" * 80 + "\n")
|
||||
|
||||
try:
|
||||
# Run tests in order
|
||||
self.test_database_connection()
|
||||
self.test_preprocessor()
|
||||
self.test_ocr_engine()
|
||||
self.test_pdf_generator()
|
||||
self.test_file_manager()
|
||||
|
||||
# Print summary
|
||||
logger.info("=" * 80)
|
||||
logger.info("TEST SUMMARY")
|
||||
logger.info("=" * 80)
|
||||
|
||||
total_tests = len(self.test_results)
|
||||
passed_tests = sum(1 for result in self.test_results.values() if result)
|
||||
|
||||
for test_name, result in self.test_results.items():
|
||||
status = "✓ PASS" if result else "✗ FAIL"
|
||||
logger.info(f"{status:8} - {test_name}")
|
||||
|
||||
logger.info("-" * 80)
|
||||
logger.info(f"Total: {passed_tests}/{total_tests} tests passed")
|
||||
|
||||
if passed_tests == total_tests:
|
||||
logger.info("\n🎉 All service layer tests passed! Ready to implement API endpoints.")
|
||||
return 0
|
||||
else:
|
||||
logger.error(f"\n❌ {total_tests - passed_tests} test(s) failed. Please fix issues before proceeding.")
|
||||
return 1
|
||||
|
||||
finally:
|
||||
self.cleanup()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main test entry point"""
|
||||
tester = ServiceTester()
|
||||
exit_code = tester.run_all_tests()
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user