""" Tool_OCR - Export Service Unit Tests Tests for app/services/export_service.py """ import pytest import json import zipfile from pathlib import Path from unittest.mock import Mock, patch, MagicMock from datetime import datetime import pandas as pd from app.services.export_service import ExportService, ExportError from app.models.ocr import FileStatus @pytest.fixture def export_service(): """Create an ExportService instance""" return ExportService() @pytest.fixture def mock_ocr_result(temp_dir): """Create a mock OCRResult with markdown file""" # Create mock markdown file md_file = temp_dir / "test_result.md" md_file.write_text("# Test Document\n\nThis is test content.", encoding="utf-8") # Create mock result result = Mock() result.id = 1 result.markdown_path = str(md_file) result.json_path = None result.detected_language = "zh" result.total_text_regions = 10 result.average_confidence = 0.95 result.layout_data = {"elements": [{"type": "text"}]} result.images_metadata = [] # Mock file result.file = Mock() result.file.id = 1 result.file.original_filename = "test.png" result.file.file_format = "png" result.file.file_size = 1024 result.file.processing_time = 2.5 return result @pytest.fixture def mock_db(): """Create a mock database session""" return Mock() @pytest.mark.unit class TestExportServiceInit: """Test ExportService initialization""" def test_init(self, export_service): """Test export service initialization""" assert export_service is not None assert export_service.pdf_generator is not None @pytest.mark.unit class TestApplyFilters: """Test filter application""" def test_apply_filters_confidence_threshold(self, export_service): """Test confidence threshold filter""" result1 = Mock() result1.average_confidence = 0.95 result1.file = Mock() result1.file.original_filename = "test1.png" result2 = Mock() result2.average_confidence = 0.75 result2.file = Mock() result2.file.original_filename = "test2.png" result3 = Mock() result3.average_confidence = 0.85 result3.file = Mock() result3.file.original_filename = "test3.png" results = [result1, result2, result3] filters = {"confidence_threshold": 0.80} filtered = export_service.apply_filters(results, filters) assert len(filtered) == 2 assert result1 in filtered assert result3 in filtered assert result2 not in filtered def test_apply_filters_filename_pattern(self, export_service): """Test filename pattern filter""" result1 = Mock() result1.average_confidence = 0.95 result1.file = Mock() result1.file.original_filename = "invoice_2024.png" result2 = Mock() result2.average_confidence = 0.95 result2.file = Mock() result2.file.original_filename = "receipt.png" results = [result1, result2] filters = {"filename_pattern": "invoice"} filtered = export_service.apply_filters(results, filters) assert len(filtered) == 1 assert result1 in filtered def test_apply_filters_language(self, export_service): """Test language filter""" result1 = Mock() result1.detected_language = "zh" result1.average_confidence = 0.95 result1.file = Mock() result1.file.original_filename = "chinese.png" result2 = Mock() result2.detected_language = "en" result2.average_confidence = 0.95 result2.file = Mock() result2.file.original_filename = "english.png" results = [result1, result2] filters = {"language": "zh"} filtered = export_service.apply_filters(results, filters) assert len(filtered) == 1 assert result1 in filtered def test_apply_filters_combined(self, export_service): """Test multiple filters combined""" result1 = Mock() result1.detected_language = "zh" result1.average_confidence = 0.95 result1.file = Mock() result1.file.original_filename = "invoice_chinese.png" result2 = Mock() result2.detected_language = "zh" result2.average_confidence = 0.75 result2.file = Mock() result2.file.original_filename = "invoice_low.png" result3 = Mock() result3.detected_language = "en" result3.average_confidence = 0.95 result3.file = Mock() result3.file.original_filename = "invoice_english.png" results = [result1, result2, result3] filters = { "confidence_threshold": 0.80, "language": "zh", "filename_pattern": "invoice" } filtered = export_service.apply_filters(results, filters) assert len(filtered) == 1 assert result1 in filtered def test_apply_filters_no_filters(self, export_service): """Test with no filters applied""" results = [Mock(), Mock(), Mock()] filtered = export_service.apply_filters(results, {}) assert len(filtered) == len(results) @pytest.mark.unit class TestExportToTXT: """Test TXT export""" def test_export_to_txt_basic(self, export_service, mock_ocr_result, temp_dir): """Test basic TXT export""" output_path = temp_dir / "output.txt" result_path = export_service.export_to_txt([mock_ocr_result], output_path) assert result_path.exists() content = result_path.read_text(encoding="utf-8") assert "Test Document" in content assert "test content" in content def test_export_to_txt_with_line_numbers(self, export_service, mock_ocr_result, temp_dir): """Test TXT export with line numbers""" output_path = temp_dir / "output.txt" formatting = {"add_line_numbers": True} result_path = export_service.export_to_txt( [mock_ocr_result], output_path, formatting=formatting ) content = result_path.read_text(encoding="utf-8") assert "|" in content # Line number separator def test_export_to_txt_with_metadata(self, export_service, mock_ocr_result, temp_dir): """Test TXT export with metadata headers""" output_path = temp_dir / "output.txt" formatting = {"include_metadata": True} result_path = export_service.export_to_txt( [mock_ocr_result], output_path, formatting=formatting ) content = result_path.read_text(encoding="utf-8") assert "文件:" in content assert "test.png" in content assert "信心度:" in content def test_export_to_txt_with_grouping(self, export_service, mock_ocr_result, temp_dir): """Test TXT export with file grouping""" output_path = temp_dir / "output.txt" formatting = {"group_by_filename": True} result_path = export_service.export_to_txt( [mock_ocr_result, mock_ocr_result], output_path, formatting=formatting ) content = result_path.read_text(encoding="utf-8") assert "-" * 80 in content # Separator def test_export_to_txt_missing_markdown(self, export_service, temp_dir): """Test TXT export with missing markdown file""" result = Mock() result.id = 1 result.markdown_path = "/nonexistent/path.md" result.file = Mock() result.file.original_filename = "test.png" output_path = temp_dir / "output.txt" # Should not fail, just skip the file result_path = export_service.export_to_txt([result], output_path) assert result_path.exists() def test_export_to_txt_creates_parent_directories(self, export_service, mock_ocr_result, temp_dir): """Test that export creates necessary parent directories""" output_path = temp_dir / "subdir" / "output.txt" result_path = export_service.export_to_txt([mock_ocr_result], output_path) assert result_path.exists() assert result_path.parent.exists() @pytest.mark.unit class TestExportToJSON: """Test JSON export""" def test_export_to_json_basic(self, export_service, mock_ocr_result, temp_dir): """Test basic JSON export""" output_path = temp_dir / "output.json" result_path = export_service.export_to_json([mock_ocr_result], output_path) assert result_path.exists() data = json.loads(result_path.read_text(encoding="utf-8")) assert "export_time" in data assert data["total_files"] == 1 assert len(data["results"]) == 1 assert data["results"][0]["filename"] == "test.png" assert data["results"][0]["average_confidence"] == 0.95 def test_export_to_json_with_layout(self, export_service, mock_ocr_result, temp_dir): """Test JSON export with layout data""" output_path = temp_dir / "output.json" result_path = export_service.export_to_json( [mock_ocr_result], output_path, include_layout=True ) data = json.loads(result_path.read_text(encoding="utf-8")) assert "layout_data" in data["results"][0] def test_export_to_json_without_layout(self, export_service, mock_ocr_result, temp_dir): """Test JSON export without layout data""" output_path = temp_dir / "output.json" result_path = export_service.export_to_json( [mock_ocr_result], output_path, include_layout=False ) data = json.loads(result_path.read_text(encoding="utf-8")) assert "layout_data" not in data["results"][0] def test_export_to_json_multiple_results(self, export_service, mock_ocr_result, temp_dir): """Test JSON export with multiple results""" output_path = temp_dir / "output.json" result_path = export_service.export_to_json( [mock_ocr_result, mock_ocr_result], output_path ) data = json.loads(result_path.read_text(encoding="utf-8")) assert data["total_files"] == 2 assert len(data["results"]) == 2 @pytest.mark.unit class TestExportToExcel: """Test Excel export""" def test_export_to_excel_basic(self, export_service, mock_ocr_result, temp_dir): """Test basic Excel export""" output_path = temp_dir / "output.xlsx" result_path = export_service.export_to_excel([mock_ocr_result], output_path) assert result_path.exists() df = pd.read_excel(result_path) assert len(df) == 1 assert "文件名" in df.columns assert df.iloc[0]["文件名"] == "test.png" def test_export_to_excel_with_confidence(self, export_service, mock_ocr_result, temp_dir): """Test Excel export with confidence scores""" output_path = temp_dir / "output.xlsx" result_path = export_service.export_to_excel( [mock_ocr_result], output_path, include_confidence=True ) df = pd.read_excel(result_path) assert "平均信心度" in df.columns def test_export_to_excel_without_processing_time(self, export_service, mock_ocr_result, temp_dir): """Test Excel export without processing time""" output_path = temp_dir / "output.xlsx" result_path = export_service.export_to_excel( [mock_ocr_result], output_path, include_processing_time=False ) df = pd.read_excel(result_path) assert "處理時間(秒)" not in df.columns def test_export_to_excel_long_content_truncation(self, export_service, temp_dir): """Test that long content is truncated in Excel""" # Create result with long content md_file = temp_dir / "long.md" md_file.write_text("x" * 2000, encoding="utf-8") result = Mock() result.id = 1 result.markdown_path = str(md_file) result.detected_language = "zh" result.total_text_regions = 10 result.average_confidence = 0.95 result.file = Mock() result.file.original_filename = "long.png" result.file.file_format = "png" result.file.file_size = 1024 result.file.processing_time = 1.0 output_path = temp_dir / "output.xlsx" result_path = export_service.export_to_excel([result], output_path) df = pd.read_excel(result_path) content = df.iloc[0]["提取內容"] assert "..." in content assert len(content) <= 1004 # 1000 + "..." @pytest.mark.unit class TestExportToMarkdown: """Test Markdown export""" def test_export_to_markdown_combined(self, export_service, mock_ocr_result, temp_dir): """Test combined Markdown export""" output_path = temp_dir / "combined.md" result_path = export_service.export_to_markdown( [mock_ocr_result], output_path, combine=True ) assert result_path.exists() assert result_path.is_file() content = result_path.read_text(encoding="utf-8") assert "test.png" in content assert "Test Document" in content def test_export_to_markdown_separate(self, export_service, mock_ocr_result, temp_dir): """Test separate Markdown export""" output_dir = temp_dir / "markdown_files" result_path = export_service.export_to_markdown( [mock_ocr_result], output_dir, combine=False ) assert result_path.exists() assert result_path.is_dir() files = list(result_path.glob("*.md")) assert len(files) == 1 def test_export_to_markdown_multiple_files(self, export_service, mock_ocr_result, temp_dir): """Test Markdown export with multiple files""" output_path = temp_dir / "combined.md" result_path = export_service.export_to_markdown( [mock_ocr_result, mock_ocr_result], output_path, combine=True ) content = result_path.read_text(encoding="utf-8") assert content.count("---") >= 1 # Separators @pytest.mark.unit class TestExportToPDF: """Test PDF export""" @patch.object(ExportService, '__init__', lambda self: None) def test_export_to_pdf_success(self, mock_ocr_result, temp_dir): """Test successful PDF export""" from app.services.pdf_generator import PDFGenerator service = ExportService() service.pdf_generator = Mock(spec=PDFGenerator) service.pdf_generator.generate_pdf = Mock(return_value=temp_dir / "output.pdf") output_path = temp_dir / "output.pdf" result_path = service.export_to_pdf(mock_ocr_result, output_path) service.pdf_generator.generate_pdf.assert_called_once() call_kwargs = service.pdf_generator.generate_pdf.call_args[1] assert call_kwargs["css_template"] == "default" @patch.object(ExportService, '__init__', lambda self: None) def test_export_to_pdf_with_custom_template(self, mock_ocr_result, temp_dir): """Test PDF export with custom CSS template""" from app.services.pdf_generator import PDFGenerator service = ExportService() service.pdf_generator = Mock(spec=PDFGenerator) service.pdf_generator.generate_pdf = Mock(return_value=temp_dir / "output.pdf") output_path = temp_dir / "output.pdf" service.export_to_pdf(mock_ocr_result, output_path, css_template="academic") call_kwargs = service.pdf_generator.generate_pdf.call_args[1] assert call_kwargs["css_template"] == "academic" @patch.object(ExportService, '__init__', lambda self: None) def test_export_to_pdf_missing_markdown(self, temp_dir): """Test PDF export with missing markdown file""" from app.services.pdf_generator import PDFGenerator result = Mock() result.id = 1 result.markdown_path = None result.file = Mock() service = ExportService() service.pdf_generator = Mock(spec=PDFGenerator) output_path = temp_dir / "output.pdf" with pytest.raises(ExportError) as exc_info: service.export_to_pdf(result, output_path) assert "not found" in str(exc_info.value).lower() @pytest.mark.unit class TestGetExportFormats: """Test getting available export formats""" def test_get_export_formats(self, export_service): """Test getting export formats""" formats = export_service.get_export_formats() assert isinstance(formats, dict) assert "txt" in formats assert "json" in formats assert "excel" in formats assert "markdown" in formats assert "pdf" in formats assert "zip" in formats # Check descriptions are in Chinese for desc in formats.values(): assert isinstance(desc, str) assert len(desc) > 0 @pytest.mark.unit class TestApplyExportRule: """Test export rule application""" def test_apply_export_rule_success(self, export_service, mock_db): """Test applying export rule""" # Create mock rule rule = Mock() rule.id = 1 rule.config_json = { "filters": { "confidence_threshold": 0.80 } } mock_db.query.return_value.filter.return_value.first.return_value = rule # Create mock results result1 = Mock() result1.average_confidence = 0.95 result1.file = Mock() result1.file.original_filename = "test1.png" result2 = Mock() result2.average_confidence = 0.70 result2.file = Mock() result2.file.original_filename = "test2.png" results = [result1, result2] filtered = export_service.apply_export_rule(mock_db, results, rule_id=1) assert len(filtered) == 1 assert result1 in filtered def test_apply_export_rule_not_found(self, export_service, mock_db): """Test applying non-existent rule""" mock_db.query.return_value.filter.return_value.first.return_value = None with pytest.raises(ExportError) as exc_info: export_service.apply_export_rule(mock_db, [], rule_id=999) assert "not found" in str(exc_info.value).lower() @pytest.mark.unit class TestEdgeCases: """Test edge cases and error handling""" def test_export_to_txt_empty_results(self, export_service, temp_dir): """Test TXT export with empty results list""" output_path = temp_dir / "output.txt" result_path = export_service.export_to_txt([], output_path) assert result_path.exists() content = result_path.read_text(encoding="utf-8") assert content == "" def test_export_to_json_empty_results(self, export_service, temp_dir): """Test JSON export with empty results list""" output_path = temp_dir / "output.json" result_path = export_service.export_to_json([], output_path) data = json.loads(result_path.read_text(encoding="utf-8")) assert data["total_files"] == 0 assert len(data["results"]) == 0 def test_export_with_unicode_content(self, export_service, temp_dir): """Test export with Unicode/Chinese content""" md_file = temp_dir / "chinese.md" md_file.write_text("# 測試文檔\n\n這是中文內容。", encoding="utf-8") result = Mock() result.id = 1 result.markdown_path = str(md_file) result.json_path = None result.detected_language = "zh" result.total_text_regions = 10 result.average_confidence = 0.95 result.layout_data = None # Use None instead of Mock for JSON serialization result.images_metadata = None # Use None instead of Mock result.file = Mock() result.file.id = 1 result.file.original_filename = "中文測試.png" result.file.file_format = "png" result.file.file_size = 1024 result.file.processing_time = 1.0 # Test TXT export txt_path = temp_dir / "output.txt" export_service.export_to_txt([result], txt_path) assert "測試文檔" in txt_path.read_text(encoding="utf-8") # Test JSON export json_path = temp_dir / "output.json" export_service.export_to_json([result], json_path) data = json.loads(json_path.read_text(encoding="utf-8")) assert data["results"][0]["filename"] == "中文測試.png" def test_apply_filters_with_none_values(self, export_service): """Test filters with None values in results""" result = Mock() result.average_confidence = None result.detected_language = None result.file = Mock() result.file.original_filename = "test.png" filters = {"confidence_threshold": 0.80} filtered = export_service.apply_filters([result], filters) # Should filter out result with None confidence assert len(filtered) == 0