""" Tool_OCR - File Manager Unit Tests Tests for app/services/file_manager.py """ import pytest import shutil from pathlib import Path from unittest.mock import Mock, patch, MagicMock from datetime import datetime, timedelta from io import BytesIO from fastapi import UploadFile from app.services.file_manager import FileManager, FileManagementError from app.models.ocr import OCRBatch, OCRFile, FileStatus, BatchStatus @pytest.fixture def file_manager(temp_dir): """Create a FileManager instance with temp directory""" with patch('app.services.file_manager.settings') as mock_settings: mock_settings.upload_dir = str(temp_dir) mock_settings.max_upload_size = 20 * 1024 * 1024 # 20MB mock_settings.allowed_extensions_list = ['png', 'jpg', 'jpeg', 'pdf'] manager = FileManager() return manager @pytest.fixture def mock_upload_file(): """Create a mock UploadFile""" def create_file(filename="test.png", content=b"test content", size=None): file_obj = BytesIO(content) if size is None: size = len(content) upload_file = UploadFile(filename=filename, file=file_obj) # Set file size manually upload_file.file.seek(0, 2) # Seek to end upload_file.file.seek(0) # Reset return upload_file return create_file @pytest.fixture def mock_db(): """Create a mock database session""" return Mock() @pytest.mark.unit class TestFileManagerInit: """Test FileManager initialization""" def test_init(self, file_manager, temp_dir): """Test file manager initialization""" assert file_manager is not None assert file_manager.preprocessor is not None assert file_manager.base_upload_dir == temp_dir assert file_manager.base_upload_dir.exists() @pytest.mark.unit class TestBatchDirectoryManagement: """Test batch directory creation and management""" def test_create_batch_directory(self, file_manager): """Test creating batch directory structure""" batch_id = 123 batch_dir = file_manager.create_batch_directory(batch_id) assert batch_dir.exists() assert (batch_dir / "inputs").exists() assert (batch_dir / "outputs" / "markdown").exists() assert (batch_dir / "outputs" / "json").exists() assert (batch_dir / "outputs" / "images").exists() assert (batch_dir / "exports").exists() def test_create_batch_directory_multiple_times(self, file_manager): """Test creating same batch directory multiple times (should not error)""" batch_id = 123 batch_dir1 = file_manager.create_batch_directory(batch_id) batch_dir2 = file_manager.create_batch_directory(batch_id) assert batch_dir1 == batch_dir2 assert batch_dir1.exists() def test_get_batch_directory(self, file_manager): """Test getting batch directory path""" batch_id = 456 batch_dir = file_manager.get_batch_directory(batch_id) expected_path = file_manager.base_upload_dir / "batches" / "456" assert batch_dir == expected_path @pytest.mark.unit class TestUploadValidation: """Test file upload validation""" def test_validate_upload_valid_file(self, file_manager, mock_upload_file): """Test validation of valid upload""" upload = mock_upload_file("test.png", b"valid content") is_valid, error = file_manager.validate_upload(upload) assert is_valid is True assert error is None def test_validate_upload_empty_filename(self, file_manager): """Test validation with empty filename""" upload = Mock() upload.filename = "" is_valid, error = file_manager.validate_upload(upload) assert is_valid is False assert "文件名不能為空" in error def test_validate_upload_empty_file(self, file_manager, mock_upload_file): """Test validation of empty file""" upload = mock_upload_file("test.png", b"") is_valid, error = file_manager.validate_upload(upload) assert is_valid is False assert "文件為空" in error @pytest.mark.skip(reason="File size mock is complex with UploadFile, covered by integration test") def test_validate_upload_file_too_large(self, file_manager): """Test validation of file exceeding size limit""" # Note: This functionality is tested in integration tests where actual # files can be created. Mocking UploadFile's size behavior is complex. pass def test_validate_upload_unsupported_format(self, file_manager, mock_upload_file): """Test validation of unsupported file format""" upload = mock_upload_file("test.txt", b"text content") is_valid, error = file_manager.validate_upload(upload) assert is_valid is False assert "不支持的文件格式" in error def test_validate_upload_supported_formats(self, file_manager, mock_upload_file): """Test validation of all supported formats""" supported_formats = ["test.png", "test.jpg", "test.jpeg", "test.pdf"] for filename in supported_formats: upload = mock_upload_file(filename, b"content") is_valid, error = file_manager.validate_upload(upload) assert is_valid is True, f"Failed for {filename}" @pytest.mark.unit class TestFileSaving: """Test file saving operations""" def test_save_upload_success(self, file_manager, mock_upload_file): """Test successful file saving""" batch_id = 1 file_manager.create_batch_directory(batch_id) upload = mock_upload_file("test.png", b"test content") file_path, original_filename = file_manager.save_upload(upload, batch_id) assert file_path.exists() assert file_path.read_bytes() == b"test content" assert original_filename == "test.png" assert file_path.parent.name == "inputs" def test_save_upload_unique_filename(self, file_manager, mock_upload_file): """Test that saved files get unique filenames""" batch_id = 1 file_manager.create_batch_directory(batch_id) upload1 = mock_upload_file("test.png", b"content1") upload2 = mock_upload_file("test.png", b"content2") path1, _ = file_manager.save_upload(upload1, batch_id) path2, _ = file_manager.save_upload(upload2, batch_id) assert path1 != path2 assert path1.exists() and path2.exists() assert path1.read_bytes() == b"content1" assert path2.read_bytes() == b"content2" def test_save_upload_validation_failure(self, file_manager, mock_upload_file): """Test save upload with validation failure""" batch_id = 1 file_manager.create_batch_directory(batch_id) # Empty file should fail validation upload = mock_upload_file("test.png", b"") with pytest.raises(FileManagementError) as exc_info: file_manager.save_upload(upload, batch_id, validate=True) assert "文件為空" in str(exc_info.value) def test_save_upload_skip_validation(self, file_manager, mock_upload_file): """Test saving with validation skipped""" batch_id = 1 file_manager.create_batch_directory(batch_id) # Empty file but validation skipped upload = mock_upload_file("test.txt", b"") # Should succeed when validation is disabled file_path, _ = file_manager.save_upload(upload, batch_id, validate=False) assert file_path.exists() def test_save_upload_preserves_extension(self, file_manager, mock_upload_file): """Test that file extension is preserved""" batch_id = 1 file_manager.create_batch_directory(batch_id) upload = mock_upload_file("document.pdf", b"pdf content") file_path, _ = file_manager.save_upload(upload, batch_id) assert file_path.suffix == ".pdf" @pytest.mark.unit class TestValidateSavedFile: """Test validation of saved files""" @patch.object(FileManager, '__init__', lambda self: None) def test_validate_saved_file(self, sample_image_path): """Test validating a saved file""" from app.services.preprocessor import DocumentPreprocessor manager = FileManager() manager.preprocessor = DocumentPreprocessor() # validate_file returns (is_valid, file_format, error_message) is_valid, file_format, error = manager.validate_saved_file(sample_image_path) assert is_valid is True assert file_format == 'png' assert error is None @pytest.mark.unit class TestBatchCreation: """Test batch creation""" def test_create_batch(self, file_manager, mock_db): """Test creating a new batch""" user_id = 1 # Mock database operations mock_batch = Mock() mock_batch.id = 123 mock_db.add = Mock() mock_db.commit = Mock() mock_db.refresh = Mock(side_effect=lambda x: setattr(x, 'id', 123)) with patch.object(FileManager, 'create_batch_directory'): batch = file_manager.create_batch(mock_db, user_id) assert mock_db.add.called assert mock_db.commit.called def test_create_batch_with_custom_name(self, file_manager, mock_db): """Test creating batch with custom name""" user_id = 1 batch_name = "My Custom Batch" mock_db.add = Mock() mock_db.commit = Mock() mock_db.refresh = Mock(side_effect=lambda x: setattr(x, 'id', 123)) with patch.object(FileManager, 'create_batch_directory'): batch = file_manager.create_batch(mock_db, user_id, batch_name) # Verify batch was created with correct name call_args = mock_db.add.call_args[0][0] assert hasattr(call_args, 'batch_name') @pytest.mark.unit class TestGetFilePaths: """Test file path retrieval""" def test_get_file_paths(self, file_manager): """Test getting file paths for a batch""" batch_id = 1 file_id = 42 paths = file_manager.get_file_paths(batch_id, file_id) assert "input_dir" in paths assert "output_dir" in paths assert "markdown_dir" in paths assert "json_dir" in paths assert "images_dir" in paths assert "export_dir" in paths # Verify images_dir includes file_id assert str(file_id) in str(paths["images_dir"]) @pytest.mark.unit class TestCleanupExpiredBatches: """Test cleanup of expired batches""" def test_cleanup_expired_batches(self, file_manager, mock_db, temp_dir): """Test cleaning up expired batches""" # Create mock expired batch expired_batch = Mock() expired_batch.id = 1 expired_batch.created_at = datetime.utcnow() - timedelta(hours=48) # Create batch directory batch_dir = file_manager.create_batch_directory(1) assert batch_dir.exists() # Mock database query mock_db.query.return_value.filter.return_value.all.return_value = [expired_batch] mock_db.delete = Mock() mock_db.commit = Mock() # Run cleanup cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24) assert cleaned == 1 assert not batch_dir.exists() mock_db.delete.assert_called_once_with(expired_batch) mock_db.commit.assert_called_once() def test_cleanup_no_expired_batches(self, file_manager, mock_db): """Test cleanup when no batches are expired""" # Mock database query returning empty list mock_db.query.return_value.filter.return_value.all.return_value = [] cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24) assert cleaned == 0 def test_cleanup_handles_missing_directory(self, file_manager, mock_db): """Test cleanup handles missing batch directory gracefully""" expired_batch = Mock() expired_batch.id = 999 # Directory doesn't exist expired_batch.created_at = datetime.utcnow() - timedelta(hours=48) mock_db.query.return_value.filter.return_value.all.return_value = [expired_batch] mock_db.delete = Mock() mock_db.commit = Mock() # Should not raise error cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24) assert cleaned == 1 @pytest.mark.unit class TestFileOwnershipVerification: """Test file ownership verification""" def test_verify_file_ownership_success(self, file_manager, mock_db): """Test successful ownership verification""" user_id = 1 batch_id = 123 # Mock batch owned by user mock_batch = Mock() mock_db.query.return_value.filter.return_value.first.return_value = mock_batch is_owner = file_manager.verify_file_ownership(mock_db, user_id, batch_id) assert is_owner is True def test_verify_file_ownership_failure(self, file_manager, mock_db): """Test ownership verification failure""" user_id = 1 batch_id = 123 # Mock no batch found (wrong owner) mock_db.query.return_value.filter.return_value.first.return_value = None is_owner = file_manager.verify_file_ownership(mock_db, user_id, batch_id) assert is_owner is False @pytest.mark.unit class TestBatchStatistics: """Test batch statistics retrieval""" def test_get_batch_statistics(self, file_manager, mock_db): """Test getting batch statistics""" batch_id = 1 # Create mock batch with files mock_file1 = Mock() mock_file1.file_size = 1000 mock_file2 = Mock() mock_file2.file_size = 2000 mock_batch = Mock() mock_batch.id = batch_id mock_batch.batch_name = "Test Batch" mock_batch.status = BatchStatus.COMPLETED mock_batch.total_files = 2 mock_batch.completed_files = 2 mock_batch.failed_files = 0 mock_batch.progress_percentage = 100.0 mock_batch.files = [mock_file1, mock_file2] mock_batch.created_at = datetime(2025, 1, 1, 10, 0, 0) mock_batch.started_at = datetime(2025, 1, 1, 10, 1, 0) mock_batch.completed_at = datetime(2025, 1, 1, 10, 5, 0) mock_db.query.return_value.filter.return_value.first.return_value = mock_batch stats = file_manager.get_batch_statistics(mock_db, batch_id) assert stats['batch_id'] == batch_id assert stats['batch_name'] == "Test Batch" assert stats['total_files'] == 2 assert stats['total_file_size'] == 3000 assert stats['total_file_size_mb'] == 0.0 # Small files assert stats['processing_time'] == 240.0 # 4 minutes assert stats['pending_files'] == 0 def test_get_batch_statistics_not_found(self, file_manager, mock_db): """Test getting statistics for non-existent batch""" batch_id = 999 mock_db.query.return_value.filter.return_value.first.return_value = None stats = file_manager.get_batch_statistics(mock_db, batch_id) assert stats == {} def test_get_batch_statistics_no_completion_time(self, file_manager, mock_db): """Test statistics for batch without completion time""" mock_batch = Mock() mock_batch.id = 1 mock_batch.batch_name = "Pending Batch" mock_batch.status = BatchStatus.PROCESSING mock_batch.total_files = 5 mock_batch.completed_files = 2 mock_batch.failed_files = 0 mock_batch.progress_percentage = 40.0 mock_batch.files = [] mock_batch.created_at = datetime(2025, 1, 1) mock_batch.started_at = datetime(2025, 1, 1) mock_batch.completed_at = None mock_db.query.return_value.filter.return_value.first.return_value = mock_batch stats = file_manager.get_batch_statistics(mock_db, 1) assert stats['processing_time'] is None assert stats['pending_files'] == 3 @pytest.mark.unit class TestEdgeCases: """Test edge cases and error handling""" def test_save_upload_creates_parent_directories(self, file_manager, mock_upload_file): """Test that save_upload creates necessary directories""" batch_id = 999 # Directory doesn't exist yet upload = mock_upload_file("test.png", b"content") file_path, _ = file_manager.save_upload(upload, batch_id) assert file_path.exists() assert file_path.parent.exists() def test_cleanup_continues_on_error(self, file_manager, mock_db): """Test that cleanup continues even if one batch fails""" batch1 = Mock() batch1.id = 1 batch1.created_at = datetime.utcnow() - timedelta(hours=48) batch2 = Mock() batch2.id = 2 batch2.created_at = datetime.utcnow() - timedelta(hours=48) # Create only batch2 directory file_manager.create_batch_directory(2) mock_db.query.return_value.filter.return_value.all.return_value = [batch1, batch2] mock_db.delete = Mock() mock_db.commit = Mock() # Should not fail, should clean batch2 even if batch1 fails cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24) assert cleaned > 0 def test_validate_upload_with_unicode_filename(self, file_manager, mock_upload_file): """Test validation with Unicode filename""" upload = mock_upload_file("測試文件.png", b"content") is_valid, error = file_manager.validate_upload(upload) assert is_valid is True def test_save_upload_preserves_unicode_filename(self, file_manager, mock_upload_file): """Test that Unicode filenames are handled correctly""" batch_id = 1 file_manager.create_batch_directory(batch_id) upload = mock_upload_file("中文文檔.pdf", b"content") file_path, original_filename = file_manager.save_upload(upload, batch_id) assert original_filename == "中文文檔.pdf" assert file_path.exists()