This commit is contained in:
beabigegg
2025-11-12 22:53:17 +08:00
commit da700721fa
130 changed files with 23393 additions and 0 deletions

View File

@@ -0,0 +1,520 @@
"""
Tool_OCR - File Manager Unit Tests
Tests for app/services/file_manager.py
"""
import pytest
import shutil
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
from datetime import datetime, timedelta
from io import BytesIO
from fastapi import UploadFile
from app.services.file_manager import FileManager, FileManagementError
from app.models.ocr import OCRBatch, OCRFile, FileStatus, BatchStatus
@pytest.fixture
def file_manager(temp_dir):
"""Create a FileManager instance with temp directory"""
with patch('app.services.file_manager.settings') as mock_settings:
mock_settings.upload_dir = str(temp_dir)
mock_settings.max_upload_size = 20 * 1024 * 1024 # 20MB
mock_settings.allowed_extensions_list = ['png', 'jpg', 'jpeg', 'pdf']
manager = FileManager()
return manager
@pytest.fixture
def mock_upload_file():
"""Create a mock UploadFile"""
def create_file(filename="test.png", content=b"test content", size=None):
file_obj = BytesIO(content)
if size is None:
size = len(content)
upload_file = UploadFile(filename=filename, file=file_obj)
# Set file size manually
upload_file.file.seek(0, 2) # Seek to end
upload_file.file.seek(0) # Reset
return upload_file
return create_file
@pytest.fixture
def mock_db():
"""Create a mock database session"""
return Mock()
@pytest.mark.unit
class TestFileManagerInit:
"""Test FileManager initialization"""
def test_init(self, file_manager, temp_dir):
"""Test file manager initialization"""
assert file_manager is not None
assert file_manager.preprocessor is not None
assert file_manager.base_upload_dir == temp_dir
assert file_manager.base_upload_dir.exists()
@pytest.mark.unit
class TestBatchDirectoryManagement:
"""Test batch directory creation and management"""
def test_create_batch_directory(self, file_manager):
"""Test creating batch directory structure"""
batch_id = 123
batch_dir = file_manager.create_batch_directory(batch_id)
assert batch_dir.exists()
assert (batch_dir / "inputs").exists()
assert (batch_dir / "outputs" / "markdown").exists()
assert (batch_dir / "outputs" / "json").exists()
assert (batch_dir / "outputs" / "images").exists()
assert (batch_dir / "exports").exists()
def test_create_batch_directory_multiple_times(self, file_manager):
"""Test creating same batch directory multiple times (should not error)"""
batch_id = 123
batch_dir1 = file_manager.create_batch_directory(batch_id)
batch_dir2 = file_manager.create_batch_directory(batch_id)
assert batch_dir1 == batch_dir2
assert batch_dir1.exists()
def test_get_batch_directory(self, file_manager):
"""Test getting batch directory path"""
batch_id = 456
batch_dir = file_manager.get_batch_directory(batch_id)
expected_path = file_manager.base_upload_dir / "batches" / "456"
assert batch_dir == expected_path
@pytest.mark.unit
class TestUploadValidation:
"""Test file upload validation"""
def test_validate_upload_valid_file(self, file_manager, mock_upload_file):
"""Test validation of valid upload"""
upload = mock_upload_file("test.png", b"valid content")
is_valid, error = file_manager.validate_upload(upload)
assert is_valid is True
assert error is None
def test_validate_upload_empty_filename(self, file_manager):
"""Test validation with empty filename"""
upload = Mock()
upload.filename = ""
is_valid, error = file_manager.validate_upload(upload)
assert is_valid is False
assert "文件名不能為空" in error
def test_validate_upload_empty_file(self, file_manager, mock_upload_file):
"""Test validation of empty file"""
upload = mock_upload_file("test.png", b"")
is_valid, error = file_manager.validate_upload(upload)
assert is_valid is False
assert "文件為空" in error
@pytest.mark.skip(reason="File size mock is complex with UploadFile, covered by integration test")
def test_validate_upload_file_too_large(self, file_manager):
"""Test validation of file exceeding size limit"""
# Note: This functionality is tested in integration tests where actual
# files can be created. Mocking UploadFile's size behavior is complex.
pass
def test_validate_upload_unsupported_format(self, file_manager, mock_upload_file):
"""Test validation of unsupported file format"""
upload = mock_upload_file("test.txt", b"text content")
is_valid, error = file_manager.validate_upload(upload)
assert is_valid is False
assert "不支持的文件格式" in error
def test_validate_upload_supported_formats(self, file_manager, mock_upload_file):
"""Test validation of all supported formats"""
supported_formats = ["test.png", "test.jpg", "test.jpeg", "test.pdf"]
for filename in supported_formats:
upload = mock_upload_file(filename, b"content")
is_valid, error = file_manager.validate_upload(upload)
assert is_valid is True, f"Failed for {filename}"
@pytest.mark.unit
class TestFileSaving:
"""Test file saving operations"""
def test_save_upload_success(self, file_manager, mock_upload_file):
"""Test successful file saving"""
batch_id = 1
file_manager.create_batch_directory(batch_id)
upload = mock_upload_file("test.png", b"test content")
file_path, original_filename = file_manager.save_upload(upload, batch_id)
assert file_path.exists()
assert file_path.read_bytes() == b"test content"
assert original_filename == "test.png"
assert file_path.parent.name == "inputs"
def test_save_upload_unique_filename(self, file_manager, mock_upload_file):
"""Test that saved files get unique filenames"""
batch_id = 1
file_manager.create_batch_directory(batch_id)
upload1 = mock_upload_file("test.png", b"content1")
upload2 = mock_upload_file("test.png", b"content2")
path1, _ = file_manager.save_upload(upload1, batch_id)
path2, _ = file_manager.save_upload(upload2, batch_id)
assert path1 != path2
assert path1.exists() and path2.exists()
assert path1.read_bytes() == b"content1"
assert path2.read_bytes() == b"content2"
def test_save_upload_validation_failure(self, file_manager, mock_upload_file):
"""Test save upload with validation failure"""
batch_id = 1
file_manager.create_batch_directory(batch_id)
# Empty file should fail validation
upload = mock_upload_file("test.png", b"")
with pytest.raises(FileManagementError) as exc_info:
file_manager.save_upload(upload, batch_id, validate=True)
assert "文件為空" in str(exc_info.value)
def test_save_upload_skip_validation(self, file_manager, mock_upload_file):
"""Test saving with validation skipped"""
batch_id = 1
file_manager.create_batch_directory(batch_id)
# Empty file but validation skipped
upload = mock_upload_file("test.txt", b"")
# Should succeed when validation is disabled
file_path, _ = file_manager.save_upload(upload, batch_id, validate=False)
assert file_path.exists()
def test_save_upload_preserves_extension(self, file_manager, mock_upload_file):
"""Test that file extension is preserved"""
batch_id = 1
file_manager.create_batch_directory(batch_id)
upload = mock_upload_file("document.pdf", b"pdf content")
file_path, _ = file_manager.save_upload(upload, batch_id)
assert file_path.suffix == ".pdf"
@pytest.mark.unit
class TestValidateSavedFile:
"""Test validation of saved files"""
@patch.object(FileManager, '__init__', lambda self: None)
def test_validate_saved_file(self, sample_image_path):
"""Test validating a saved file"""
from app.services.preprocessor import DocumentPreprocessor
manager = FileManager()
manager.preprocessor = DocumentPreprocessor()
# validate_file returns (is_valid, file_format, error_message)
is_valid, file_format, error = manager.validate_saved_file(sample_image_path)
assert is_valid is True
assert file_format == 'png'
assert error is None
@pytest.mark.unit
class TestBatchCreation:
"""Test batch creation"""
def test_create_batch(self, file_manager, mock_db):
"""Test creating a new batch"""
user_id = 1
# Mock database operations
mock_batch = Mock()
mock_batch.id = 123
mock_db.add = Mock()
mock_db.commit = Mock()
mock_db.refresh = Mock(side_effect=lambda x: setattr(x, 'id', 123))
with patch.object(FileManager, 'create_batch_directory'):
batch = file_manager.create_batch(mock_db, user_id)
assert mock_db.add.called
assert mock_db.commit.called
def test_create_batch_with_custom_name(self, file_manager, mock_db):
"""Test creating batch with custom name"""
user_id = 1
batch_name = "My Custom Batch"
mock_db.add = Mock()
mock_db.commit = Mock()
mock_db.refresh = Mock(side_effect=lambda x: setattr(x, 'id', 123))
with patch.object(FileManager, 'create_batch_directory'):
batch = file_manager.create_batch(mock_db, user_id, batch_name)
# Verify batch was created with correct name
call_args = mock_db.add.call_args[0][0]
assert hasattr(call_args, 'batch_name')
@pytest.mark.unit
class TestGetFilePaths:
"""Test file path retrieval"""
def test_get_file_paths(self, file_manager):
"""Test getting file paths for a batch"""
batch_id = 1
file_id = 42
paths = file_manager.get_file_paths(batch_id, file_id)
assert "input_dir" in paths
assert "output_dir" in paths
assert "markdown_dir" in paths
assert "json_dir" in paths
assert "images_dir" in paths
assert "export_dir" in paths
# Verify images_dir includes file_id
assert str(file_id) in str(paths["images_dir"])
@pytest.mark.unit
class TestCleanupExpiredBatches:
"""Test cleanup of expired batches"""
def test_cleanup_expired_batches(self, file_manager, mock_db, temp_dir):
"""Test cleaning up expired batches"""
# Create mock expired batch
expired_batch = Mock()
expired_batch.id = 1
expired_batch.created_at = datetime.utcnow() - timedelta(hours=48)
# Create batch directory
batch_dir = file_manager.create_batch_directory(1)
assert batch_dir.exists()
# Mock database query
mock_db.query.return_value.filter.return_value.all.return_value = [expired_batch]
mock_db.delete = Mock()
mock_db.commit = Mock()
# Run cleanup
cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24)
assert cleaned == 1
assert not batch_dir.exists()
mock_db.delete.assert_called_once_with(expired_batch)
mock_db.commit.assert_called_once()
def test_cleanup_no_expired_batches(self, file_manager, mock_db):
"""Test cleanup when no batches are expired"""
# Mock database query returning empty list
mock_db.query.return_value.filter.return_value.all.return_value = []
cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24)
assert cleaned == 0
def test_cleanup_handles_missing_directory(self, file_manager, mock_db):
"""Test cleanup handles missing batch directory gracefully"""
expired_batch = Mock()
expired_batch.id = 999 # Directory doesn't exist
expired_batch.created_at = datetime.utcnow() - timedelta(hours=48)
mock_db.query.return_value.filter.return_value.all.return_value = [expired_batch]
mock_db.delete = Mock()
mock_db.commit = Mock()
# Should not raise error
cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24)
assert cleaned == 1
@pytest.mark.unit
class TestFileOwnershipVerification:
"""Test file ownership verification"""
def test_verify_file_ownership_success(self, file_manager, mock_db):
"""Test successful ownership verification"""
user_id = 1
batch_id = 123
# Mock batch owned by user
mock_batch = Mock()
mock_db.query.return_value.filter.return_value.first.return_value = mock_batch
is_owner = file_manager.verify_file_ownership(mock_db, user_id, batch_id)
assert is_owner is True
def test_verify_file_ownership_failure(self, file_manager, mock_db):
"""Test ownership verification failure"""
user_id = 1
batch_id = 123
# Mock no batch found (wrong owner)
mock_db.query.return_value.filter.return_value.first.return_value = None
is_owner = file_manager.verify_file_ownership(mock_db, user_id, batch_id)
assert is_owner is False
@pytest.mark.unit
class TestBatchStatistics:
"""Test batch statistics retrieval"""
def test_get_batch_statistics(self, file_manager, mock_db):
"""Test getting batch statistics"""
batch_id = 1
# Create mock batch with files
mock_file1 = Mock()
mock_file1.file_size = 1000
mock_file2 = Mock()
mock_file2.file_size = 2000
mock_batch = Mock()
mock_batch.id = batch_id
mock_batch.batch_name = "Test Batch"
mock_batch.status = BatchStatus.COMPLETED
mock_batch.total_files = 2
mock_batch.completed_files = 2
mock_batch.failed_files = 0
mock_batch.progress_percentage = 100.0
mock_batch.files = [mock_file1, mock_file2]
mock_batch.created_at = datetime(2025, 1, 1, 10, 0, 0)
mock_batch.started_at = datetime(2025, 1, 1, 10, 1, 0)
mock_batch.completed_at = datetime(2025, 1, 1, 10, 5, 0)
mock_db.query.return_value.filter.return_value.first.return_value = mock_batch
stats = file_manager.get_batch_statistics(mock_db, batch_id)
assert stats['batch_id'] == batch_id
assert stats['batch_name'] == "Test Batch"
assert stats['total_files'] == 2
assert stats['total_file_size'] == 3000
assert stats['total_file_size_mb'] == 0.0 # Small files
assert stats['processing_time'] == 240.0 # 4 minutes
assert stats['pending_files'] == 0
def test_get_batch_statistics_not_found(self, file_manager, mock_db):
"""Test getting statistics for non-existent batch"""
batch_id = 999
mock_db.query.return_value.filter.return_value.first.return_value = None
stats = file_manager.get_batch_statistics(mock_db, batch_id)
assert stats == {}
def test_get_batch_statistics_no_completion_time(self, file_manager, mock_db):
"""Test statistics for batch without completion time"""
mock_batch = Mock()
mock_batch.id = 1
mock_batch.batch_name = "Pending Batch"
mock_batch.status = BatchStatus.PROCESSING
mock_batch.total_files = 5
mock_batch.completed_files = 2
mock_batch.failed_files = 0
mock_batch.progress_percentage = 40.0
mock_batch.files = []
mock_batch.created_at = datetime(2025, 1, 1)
mock_batch.started_at = datetime(2025, 1, 1)
mock_batch.completed_at = None
mock_db.query.return_value.filter.return_value.first.return_value = mock_batch
stats = file_manager.get_batch_statistics(mock_db, 1)
assert stats['processing_time'] is None
assert stats['pending_files'] == 3
@pytest.mark.unit
class TestEdgeCases:
"""Test edge cases and error handling"""
def test_save_upload_creates_parent_directories(self, file_manager, mock_upload_file):
"""Test that save_upload creates necessary directories"""
batch_id = 999 # Directory doesn't exist yet
upload = mock_upload_file("test.png", b"content")
file_path, _ = file_manager.save_upload(upload, batch_id)
assert file_path.exists()
assert file_path.parent.exists()
def test_cleanup_continues_on_error(self, file_manager, mock_db):
"""Test that cleanup continues even if one batch fails"""
batch1 = Mock()
batch1.id = 1
batch1.created_at = datetime.utcnow() - timedelta(hours=48)
batch2 = Mock()
batch2.id = 2
batch2.created_at = datetime.utcnow() - timedelta(hours=48)
# Create only batch2 directory
file_manager.create_batch_directory(2)
mock_db.query.return_value.filter.return_value.all.return_value = [batch1, batch2]
mock_db.delete = Mock()
mock_db.commit = Mock()
# Should not fail, should clean batch2 even if batch1 fails
cleaned = file_manager.cleanup_expired_batches(mock_db, retention_hours=24)
assert cleaned > 0
def test_validate_upload_with_unicode_filename(self, file_manager, mock_upload_file):
"""Test validation with Unicode filename"""
upload = mock_upload_file("測試文件.png", b"content")
is_valid, error = file_manager.validate_upload(upload)
assert is_valid is True
def test_save_upload_preserves_unicode_filename(self, file_manager, mock_upload_file):
"""Test that Unicode filenames are handled correctly"""
batch_id = 1
file_manager.create_batch_directory(batch_id)
upload = mock_upload_file("中文文檔.pdf", b"content")
file_path, original_filename = file_manager.save_upload(upload, batch_id)
assert original_filename == "中文文檔.pdf"
assert file_path.exists()