first
This commit is contained in:
520
backend/tests/test_file_manager.py
Normal file
520
backend/tests/test_file_manager.py
Normal file
@@ -0,0 +1,520 @@
|
||||
"""
|
||||
Tool_OCR - File Manager Unit Tests
|
||||
Tests for app/services/file_manager.py
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
from datetime import datetime, timedelta
|
||||
from io import BytesIO
|
||||
|
||||
from fastapi import UploadFile
|
||||
|
||||
from app.services.file_manager import FileManager, FileManagementError
|
||||
from app.models.ocr import OCRBatch, OCRFile, FileStatus, BatchStatus
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def file_manager(temp_dir):
|
||||
"""Create a FileManager instance with temp directory"""
|
||||
with patch('app.services.file_manager.settings') as mock_settings:
|
||||
mock_settings.upload_dir = str(temp_dir)
|
||||
mock_settings.max_upload_size = 20 * 1024 * 1024 # 20MB
|
||||
mock_settings.allowed_extensions_list = ['png', 'jpg', 'jpeg', 'pdf']
|
||||
manager = FileManager()
|
||||
return manager
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_upload_file():
|
||||
"""Create a mock UploadFile"""
|
||||
def create_file(filename="test.png", content=b"test content", size=None):
|
||||
file_obj = BytesIO(content)
|
||||
if size is None:
|
||||
size = len(content)
|
||||
|
||||
upload_file = UploadFile(filename=filename, file=file_obj)
|
||||
# Set file size manually
|
||||
upload_file.file.seek(0, 2) # Seek to end
|
||||
upload_file.file.seek(0) # Reset
|
||||
return upload_file
|
||||
|
||||
return create_file
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_db():
|
||||
"""Create a mock database session"""
|
||||
return Mock()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestFileManagerInit:
|
||||
"""Test FileManager initialization"""
|
||||
|
||||
def test_init(self, file_manager, temp_dir):
|
||||
"""Test file manager initialization"""
|
||||
assert file_manager is not None
|
||||
assert file_manager.preprocessor is not None
|
||||
assert file_manager.base_upload_dir == temp_dir
|
||||
assert file_manager.base_upload_dir.exists()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestBatchDirectoryManagement:
|
||||
"""Test batch directory creation and management"""
|
||||
|
||||
def test_create_batch_directory(self, file_manager):
|
||||
"""Test creating batch directory structure"""
|
||||
batch_id = 123
|
||||
batch_dir = file_manager.create_batch_directory(batch_id)
|
||||
|
||||
assert batch_dir.exists()
|
||||
assert (batch_dir / "inputs").exists()
|
||||
assert (batch_dir / "outputs" / "markdown").exists()
|
||||
assert (batch_dir / "outputs" / "json").exists()
|
||||
assert (batch_dir / "outputs" / "images").exists()
|
||||
assert (batch_dir / "exports").exists()
|
||||
|
||||
def test_create_batch_directory_multiple_times(self, file_manager):
|
||||
"""Test creating same batch directory multiple times (should not error)"""
|
||||
batch_id = 123
|
||||
|
||||
batch_dir1 = file_manager.create_batch_directory(batch_id)
|
||||
batch_dir2 = file_manager.create_batch_directory(batch_id)
|
||||
|
||||
assert batch_dir1 == batch_dir2
|
||||
assert batch_dir1.exists()
|
||||
|
||||
def test_get_batch_directory(self, file_manager):
|
||||
"""Test getting batch directory path"""
|
||||
batch_id = 456
|
||||
batch_dir = file_manager.get_batch_directory(batch_id)
|
||||
|
||||
expected_path = file_manager.base_upload_dir / "batches" / "456"
|
||||
assert batch_dir == expected_path
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestUploadValidation:
|
||||
"""Test file upload validation"""
|
||||
|
||||
def test_validate_upload_valid_file(self, file_manager, mock_upload_file):
|
||||
"""Test validation of valid upload"""
|
||||
upload = mock_upload_file("test.png", b"valid content")
|
||||
|
||||
is_valid, error = file_manager.validate_upload(upload)
|
||||
|
||||
assert is_valid is True
|
||||
assert error is None
|
||||
|
||||
def test_validate_upload_empty_filename(self, file_manager):
|
||||
"""Test validation with empty filename"""
|
||||
upload = Mock()
|
||||
upload.filename = ""
|
||||
|
||||
is_valid, error = file_manager.validate_upload(upload)
|
||||
|
||||
assert is_valid is False
|
||||
assert "文件名不能為空" in error
|
||||
|
||||
def test_validate_upload_empty_file(self, file_manager, mock_upload_file):
|
||||
"""Test validation of empty file"""
|
||||
upload = mock_upload_file("test.png", b"")
|
||||
|
||||
is_valid, error = file_manager.validate_upload(upload)
|
||||
|
||||
assert is_valid is False
|
||||
assert "文件為空" in error
|
||||
|
||||
@pytest.mark.skip(reason="File size mock is complex with UploadFile, covered by integration test")
|
||||
def test_validate_upload_file_too_large(self, file_manager):
|
||||
"""Test validation of file exceeding size limit"""
|
||||
# Note: This functionality is tested in integration tests where actual
|
||||
# files can be created. Mocking UploadFile's size behavior is complex.
|
||||
pass
|
||||
|
||||
def test_validate_upload_unsupported_format(self, file_manager, mock_upload_file):
|
||||
"""Test validation of unsupported file format"""
|
||||
upload = mock_upload_file("test.txt", b"text content")
|
||||
|
||||
is_valid, error = file_manager.validate_upload(upload)
|
||||
|
||||
assert is_valid is False
|
||||
assert "不支持的文件格式" in error
|
||||
|
||||
def test_validate_upload_supported_formats(self, file_manager, mock_upload_file):
|
||||
"""Test validation of all supported formats"""
|
||||
supported_formats = ["test.png", "test.jpg", "test.jpeg", "test.pdf"]
|
||||
|
||||
for filename in supported_formats:
|
||||
upload = mock_upload_file(filename, b"content")
|
||||
is_valid, error = file_manager.validate_upload(upload)
|
||||
assert is_valid is True, f"Failed for {filename}"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestFileSaving:
|
||||
"""Test file saving operations"""
|
||||
|
||||
def test_save_upload_success(self, file_manager, mock_upload_file):
|
||||
"""Test successful file saving"""
|
||||
batch_id = 1
|
||||
file_manager.create_batch_directory(batch_id)
|
||||
|
||||
upload = mock_upload_file("test.png", b"test content")
|
||||
|
||||
file_path, original_filename = file_manager.save_upload(upload, batch_id)
|
||||
|
||||
assert file_path.exists()
|
||||
assert file_path.read_bytes() == b"test content"
|
||||
assert original_filename == "test.png"
|
||||
assert file_path.parent.name == "inputs"
|
||||
|
||||
def test_save_upload_unique_filename(self, file_manager, mock_upload_file):
|
||||
"""Test that saved files get unique filenames"""
|
||||
batch_id = 1
|
||||
file_manager.create_batch_directory(batch_id)
|
||||
|
||||
upload1 = mock_upload_file("test.png", b"content1")
|
||||
upload2 = mock_upload_file("test.png", b"content2")
|
||||
|
||||
path1, _ = file_manager.save_upload(upload1, batch_id)
|
||||
path2, _ = file_manager.save_upload(upload2, batch_id)
|
||||
|
||||
assert path1 != path2
|
||||
assert path1.exists() and path2.exists()
|
||||
assert path1.read_bytes() == b"content1"
|
||||
assert path2.read_bytes() == b"content2"
|
||||
|
||||
def test_save_upload_validation_failure(self, file_manager, mock_upload_file):
|
||||
"""Test save upload with validation failure"""
|
||||
batch_id = 1
|
||||
file_manager.create_batch_directory(batch_id)
|
||||
|
||||
# Empty file should fail validation
|
||||
upload = mock_upload_file("test.png", b"")
|
||||
|
||||
with pytest.raises(FileManagementError) as exc_info:
|
||||
file_manager.save_upload(upload, batch_id, validate=True)
|
||||
|
||||
assert "文件為空" in str(exc_info.value)
|
||||
|
||||
def test_save_upload_skip_validation(self, file_manager, mock_upload_file):
|
||||
"""Test saving with validation skipped"""
|
||||
batch_id = 1
|
||||
file_manager.create_batch_directory(batch_id)
|
||||
|
||||
# Empty file but validation skipped
|
||||
upload = mock_upload_file("test.txt", b"")
|
||||
|
||||
# Should succeed when validation is disabled
|
||||
file_path, _ = file_manager.save_upload(upload, batch_id, validate=False)
|
||||
assert file_path.exists()
|
||||
|
||||
def test_save_upload_preserves_extension(self, file_manager, mock_upload_file):
|
||||
"""Test that file extension is preserved"""
|
||||
batch_id = 1
|
||||
file_manager.create_batch_directory(batch_id)
|
||||
|
||||
upload = mock_upload_file("document.pdf", b"pdf content")
|
||||
|
||||
file_path, _ = file_manager.save_upload(upload, batch_id)
|
||||
|
||||
assert file_path.suffix == ".pdf"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestValidateSavedFile:
|
||||
"""Test validation of saved files"""
|
||||
|
||||
@patch.object(FileManager, '__init__', lambda self: None)
|
||||
def test_validate_saved_file(self, sample_image_path):
|
||||
"""Test validating a saved file"""
|
||||
from app.services.preprocessor import DocumentPreprocessor
|
||||
|
||||
manager = FileManager()
|
||||
manager.preprocessor = DocumentPreprocessor()
|
||||
|
||||
# validate_file returns (is_valid, file_format, error_message)
|
||||
is_valid, file_format, error = manager.validate_saved_file(sample_image_path)
|
||||
|
||||
assert is_valid is True
|
||||
assert file_format == 'png'
|
||||
assert error is None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestBatchCreation:
|
||||
"""Test batch creation"""
|
||||
|
||||
def test_create_batch(self, file_manager, mock_db):
|
||||
"""Test creating a new batch"""
|
||||
user_id = 1
|
||||
|
||||
# Mock database operations
|
||||
mock_batch = Mock()
|
||||
mock_batch.id = 123
|
||||
mock_db.add = Mock()
|
||||
mock_db.commit = Mock()
|
||||
mock_db.refresh = Mock(side_effect=lambda x: setattr(x, 'id', 123))
|
||||
|
||||
with patch.object(FileManager, 'create_batch_directory'):
|
||||
batch = file_manager.create_batch(mock_db, user_id)
|
||||
|
||||
assert mock_db.add.called
|
||||
assert mock_db.commit.called
|
||||
|
||||
def test_create_batch_with_custom_name(self, file_manager, mock_db):
|
||||
"""Test creating batch with custom name"""
|
||||
user_id = 1
|
||||
batch_name = "My Custom Batch"
|
||||
|
||||
mock_db.add = Mock()
|
||||
mock_db.commit = Mock()
|
||||
mock_db.refresh = Mock(side_effect=lambda x: setattr(x, 'id', 123))
|
||||
|
||||
with patch.object(FileManager, 'create_batch_directory'):
|
||||
batch = file_manager.create_batch(mock_db, user_id, batch_name)
|
||||
|
||||
# Verify batch was created with correct name
|
||||
call_args = mock_db.add.call_args[0][0]
|
||||
assert hasattr(call_args, 'batch_name')
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestGetFilePaths:
|
||||
"""Test file path retrieval"""
|
||||
|
||||
def test_get_file_paths(self, file_manager):
|
||||
"""Test getting file paths for a batch"""
|
||||
batch_id = 1
|
||||
file_id = 42
|
||||
|
||||
paths = file_manager.get_file_paths(batch_id, file_id)
|
||||
|
||||
assert "input_dir" in paths
|
||||
assert "output_dir" in paths
|
||||
assert "markdown_dir" in paths
|
||||
assert "json_dir" in paths
|
||||
assert "images_dir" in paths
|
||||
assert "export_dir" in paths
|
||||
|
||||
# Verify images_dir includes file_id
|
||||
assert str(file_id) in str(paths["images_dir"])
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestCleanupExpiredBatches:
|
||||
"""Test cleanup of expired batches"""
|
||||
|
||||
def test_cleanup_expired_batches(self, file_manager, mock_db, temp_dir):
|
||||
"""Test cleaning up expired batches"""
|
||||
# Create mock expired batch
|
||||
expired_batch = Mock()
|
||||
expired_batch.id = 1
|
||||
expired_batch.created_at = datetime.utcnow() - timedelta(hours=48)
|
||||
|
||||
# Create batch directory
|
||||
batch_dir = file_manager.create_batch_directory(1)
|
||||
assert batch_dir.exists()
|
||||
|
||||
# Mock database query
|
||||
mock_db.query.return_value.filter.return_value.all.return_value = [expired_batch]
|
||||
mock_db.delete = Mock()
|
||||
mock_db.commit = Mock()
|
||||
|
||||
# Run cleanup
|
||||
cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24)
|
||||
|
||||
assert cleaned == 1
|
||||
assert not batch_dir.exists()
|
||||
mock_db.delete.assert_called_once_with(expired_batch)
|
||||
mock_db.commit.assert_called_once()
|
||||
|
||||
def test_cleanup_no_expired_batches(self, file_manager, mock_db):
|
||||
"""Test cleanup when no batches are expired"""
|
||||
# Mock database query returning empty list
|
||||
mock_db.query.return_value.filter.return_value.all.return_value = []
|
||||
|
||||
cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24)
|
||||
|
||||
assert cleaned == 0
|
||||
|
||||
def test_cleanup_handles_missing_directory(self, file_manager, mock_db):
|
||||
"""Test cleanup handles missing batch directory gracefully"""
|
||||
expired_batch = Mock()
|
||||
expired_batch.id = 999 # Directory doesn't exist
|
||||
expired_batch.created_at = datetime.utcnow() - timedelta(hours=48)
|
||||
|
||||
mock_db.query.return_value.filter.return_value.all.return_value = [expired_batch]
|
||||
mock_db.delete = Mock()
|
||||
mock_db.commit = Mock()
|
||||
|
||||
# Should not raise error
|
||||
cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24)
|
||||
|
||||
assert cleaned == 1
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestFileOwnershipVerification:
|
||||
"""Test file ownership verification"""
|
||||
|
||||
def test_verify_file_ownership_success(self, file_manager, mock_db):
|
||||
"""Test successful ownership verification"""
|
||||
user_id = 1
|
||||
batch_id = 123
|
||||
|
||||
# Mock batch owned by user
|
||||
mock_batch = Mock()
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_batch
|
||||
|
||||
is_owner = file_manager.verify_file_ownership(mock_db, user_id, batch_id)
|
||||
|
||||
assert is_owner is True
|
||||
|
||||
def test_verify_file_ownership_failure(self, file_manager, mock_db):
|
||||
"""Test ownership verification failure"""
|
||||
user_id = 1
|
||||
batch_id = 123
|
||||
|
||||
# Mock no batch found (wrong owner)
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = None
|
||||
|
||||
is_owner = file_manager.verify_file_ownership(mock_db, user_id, batch_id)
|
||||
|
||||
assert is_owner is False
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestBatchStatistics:
|
||||
"""Test batch statistics retrieval"""
|
||||
|
||||
def test_get_batch_statistics(self, file_manager, mock_db):
|
||||
"""Test getting batch statistics"""
|
||||
batch_id = 1
|
||||
|
||||
# Create mock batch with files
|
||||
mock_file1 = Mock()
|
||||
mock_file1.file_size = 1000
|
||||
|
||||
mock_file2 = Mock()
|
||||
mock_file2.file_size = 2000
|
||||
|
||||
mock_batch = Mock()
|
||||
mock_batch.id = batch_id
|
||||
mock_batch.batch_name = "Test Batch"
|
||||
mock_batch.status = BatchStatus.COMPLETED
|
||||
mock_batch.total_files = 2
|
||||
mock_batch.completed_files = 2
|
||||
mock_batch.failed_files = 0
|
||||
mock_batch.progress_percentage = 100.0
|
||||
mock_batch.files = [mock_file1, mock_file2]
|
||||
mock_batch.created_at = datetime(2025, 1, 1, 10, 0, 0)
|
||||
mock_batch.started_at = datetime(2025, 1, 1, 10, 1, 0)
|
||||
mock_batch.completed_at = datetime(2025, 1, 1, 10, 5, 0)
|
||||
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_batch
|
||||
|
||||
stats = file_manager.get_batch_statistics(mock_db, batch_id)
|
||||
|
||||
assert stats['batch_id'] == batch_id
|
||||
assert stats['batch_name'] == "Test Batch"
|
||||
assert stats['total_files'] == 2
|
||||
assert stats['total_file_size'] == 3000
|
||||
assert stats['total_file_size_mb'] == 0.0 # Small files
|
||||
assert stats['processing_time'] == 240.0 # 4 minutes
|
||||
assert stats['pending_files'] == 0
|
||||
|
||||
def test_get_batch_statistics_not_found(self, file_manager, mock_db):
|
||||
"""Test getting statistics for non-existent batch"""
|
||||
batch_id = 999
|
||||
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = None
|
||||
|
||||
stats = file_manager.get_batch_statistics(mock_db, batch_id)
|
||||
|
||||
assert stats == {}
|
||||
|
||||
def test_get_batch_statistics_no_completion_time(self, file_manager, mock_db):
|
||||
"""Test statistics for batch without completion time"""
|
||||
mock_batch = Mock()
|
||||
mock_batch.id = 1
|
||||
mock_batch.batch_name = "Pending Batch"
|
||||
mock_batch.status = BatchStatus.PROCESSING
|
||||
mock_batch.total_files = 5
|
||||
mock_batch.completed_files = 2
|
||||
mock_batch.failed_files = 0
|
||||
mock_batch.progress_percentage = 40.0
|
||||
mock_batch.files = []
|
||||
mock_batch.created_at = datetime(2025, 1, 1)
|
||||
mock_batch.started_at = datetime(2025, 1, 1)
|
||||
mock_batch.completed_at = None
|
||||
|
||||
mock_db.query.return_value.filter.return_value.first.return_value = mock_batch
|
||||
|
||||
stats = file_manager.get_batch_statistics(mock_db, 1)
|
||||
|
||||
assert stats['processing_time'] is None
|
||||
assert stats['pending_files'] == 3
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestEdgeCases:
|
||||
"""Test edge cases and error handling"""
|
||||
|
||||
def test_save_upload_creates_parent_directories(self, file_manager, mock_upload_file):
|
||||
"""Test that save_upload creates necessary directories"""
|
||||
batch_id = 999 # Directory doesn't exist yet
|
||||
|
||||
upload = mock_upload_file("test.png", b"content")
|
||||
|
||||
file_path, _ = file_manager.save_upload(upload, batch_id)
|
||||
|
||||
assert file_path.exists()
|
||||
assert file_path.parent.exists()
|
||||
|
||||
def test_cleanup_continues_on_error(self, file_manager, mock_db):
|
||||
"""Test that cleanup continues even if one batch fails"""
|
||||
batch1 = Mock()
|
||||
batch1.id = 1
|
||||
batch1.created_at = datetime.utcnow() - timedelta(hours=48)
|
||||
|
||||
batch2 = Mock()
|
||||
batch2.id = 2
|
||||
batch2.created_at = datetime.utcnow() - timedelta(hours=48)
|
||||
|
||||
# Create only batch2 directory
|
||||
file_manager.create_batch_directory(2)
|
||||
|
||||
mock_db.query.return_value.filter.return_value.all.return_value = [batch1, batch2]
|
||||
mock_db.delete = Mock()
|
||||
mock_db.commit = Mock()
|
||||
|
||||
# Should not fail, should clean batch2 even if batch1 fails
|
||||
cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24)
|
||||
|
||||
assert cleaned > 0
|
||||
|
||||
def test_validate_upload_with_unicode_filename(self, file_manager, mock_upload_file):
|
||||
"""Test validation with Unicode filename"""
|
||||
upload = mock_upload_file("測試文件.png", b"content")
|
||||
|
||||
is_valid, error = file_manager.validate_upload(upload)
|
||||
|
||||
assert is_valid is True
|
||||
|
||||
def test_save_upload_preserves_unicode_filename(self, file_manager, mock_upload_file):
|
||||
"""Test that Unicode filenames are handled correctly"""
|
||||
batch_id = 1
|
||||
file_manager.create_batch_directory(batch_id)
|
||||
|
||||
upload = mock_upload_file("中文文檔.pdf", b"content")
|
||||
|
||||
file_path, original_filename = file_manager.save_upload(upload, batch_id)
|
||||
|
||||
assert original_filename == "中文文檔.pdf"
|
||||
assert file_path.exists()
|
||||
Reference in New Issue
Block a user