""" Tool_OCR - PDF Generator Unit Tests Tests for app/services/pdf_generator.py """ import pytest from pathlib import Path from unittest.mock import Mock, patch, MagicMock import subprocess from app.services.pdf_generator import PDFGenerator, PDFGenerationError @pytest.mark.unit class TestPDFGeneratorInit: """Test PDF generator initialization""" def test_init(self): """Test PDF generator initialization""" generator = PDFGenerator() assert generator is not None assert hasattr(generator, 'css_templates') assert len(generator.css_templates) == 3 assert 'default' in generator.css_templates assert 'academic' in generator.css_templates assert 'business' in generator.css_templates def test_css_templates_have_content(self): """Test that CSS templates contain content""" generator = PDFGenerator() for template_name, css_content in generator.css_templates.items(): assert isinstance(css_content, str) assert len(css_content) > 100 assert '@page' in css_content assert 'body' in css_content @pytest.mark.unit class TestPandocAvailability: """Test Pandoc availability checking""" @patch('subprocess.run') def test_check_pandoc_available_success(self, mock_run): """Test Pandoc availability check when pandoc is installed""" mock_run.return_value = Mock(returncode=0, stdout="pandoc 2.x") generator = PDFGenerator() is_available = generator.check_pandoc_available() assert is_available is True mock_run.assert_called_once() assert mock_run.call_args[0][0] == ["pandoc", "--version"] @patch('subprocess.run') def test_check_pandoc_available_not_found(self, mock_run): """Test Pandoc availability check when pandoc is not installed""" mock_run.side_effect = FileNotFoundError() generator = PDFGenerator() is_available = generator.check_pandoc_available() assert is_available is False @patch('subprocess.run') def test_check_pandoc_available_timeout(self, mock_run): """Test Pandoc availability check when command times out""" mock_run.side_effect = subprocess.TimeoutExpired("pandoc", 5) generator = PDFGenerator() is_available = generator.check_pandoc_available() assert is_available is False @pytest.mark.unit class TestPandocPDFGeneration: """Test PDF generation using Pandoc""" @pytest.fixture def sample_markdown(self, temp_dir): """Create a sample Markdown file""" md_file = temp_dir / "sample.md" md_file.write_text("# Test Document\n\nThis is a test.", encoding="utf-8") return md_file @patch('subprocess.run') def test_generate_pdf_pandoc_success(self, mock_run, sample_markdown, temp_dir): """Test successful PDF generation with Pandoc""" output_path = temp_dir / "output.pdf" mock_run.return_value = Mock(returncode=0, stderr="") # Create the output file to simulate successful generation output_path.touch() generator = PDFGenerator() result = generator.generate_pdf_pandoc(sample_markdown, output_path) assert result == output_path assert output_path.exists() mock_run.assert_called_once() # Verify pandoc command structure cmd_args = mock_run.call_args[0][0] assert "pandoc" in cmd_args assert str(sample_markdown) in cmd_args assert str(output_path) in cmd_args assert "--pdf-engine=weasyprint" in cmd_args @patch('subprocess.run') def test_generate_pdf_pandoc_with_metadata(self, mock_run, sample_markdown, temp_dir): """Test Pandoc PDF generation with metadata""" output_path = temp_dir / "output.pdf" mock_run.return_value = Mock(returncode=0, stderr="") output_path.touch() metadata = { "title": "Test Title", "author": "Test Author", "date": "2025-01-01" } generator = PDFGenerator() result = generator.generate_pdf_pandoc( sample_markdown, output_path, metadata=metadata ) assert result == output_path # Verify metadata in command cmd_args = mock_run.call_args[0][0] assert "--metadata" in cmd_args assert "title=Test Title" in cmd_args assert "author=Test Author" in cmd_args assert "date=2025-01-01" in cmd_args @patch('subprocess.run') def test_generate_pdf_pandoc_with_custom_css(self, mock_run, sample_markdown, temp_dir): """Test Pandoc PDF generation with custom CSS template""" output_path = temp_dir / "output.pdf" mock_run.return_value = Mock(returncode=0, stderr="") output_path.touch() generator = PDFGenerator() result = generator.generate_pdf_pandoc( sample_markdown, output_path, css_template="academic" ) assert result == output_path mock_run.assert_called_once() @patch('subprocess.run') def test_generate_pdf_pandoc_command_failed(self, mock_run, sample_markdown, temp_dir): """Test Pandoc PDF generation when command fails""" output_path = temp_dir / "output.pdf" mock_run.return_value = Mock(returncode=1, stderr="Pandoc error message") generator = PDFGenerator() with pytest.raises(PDFGenerationError) as exc_info: generator.generate_pdf_pandoc(sample_markdown, output_path) assert "Pandoc failed" in str(exc_info.value) assert "Pandoc error message" in str(exc_info.value) @patch('subprocess.run') def test_generate_pdf_pandoc_timeout(self, mock_run, sample_markdown, temp_dir): """Test Pandoc PDF generation timeout""" output_path = temp_dir / "output.pdf" mock_run.side_effect = subprocess.TimeoutExpired("pandoc", 60) generator = PDFGenerator() with pytest.raises(PDFGenerationError) as exc_info: generator.generate_pdf_pandoc(sample_markdown, output_path) assert "timed out" in str(exc_info.value).lower() @patch('subprocess.run') def test_generate_pdf_pandoc_output_not_created(self, mock_run, sample_markdown, temp_dir): """Test when Pandoc command succeeds but output file not created""" output_path = temp_dir / "output.pdf" mock_run.return_value = Mock(returncode=0, stderr="") # Don't create output file generator = PDFGenerator() with pytest.raises(PDFGenerationError) as exc_info: generator.generate_pdf_pandoc(sample_markdown, output_path) assert "PDF file not created" in str(exc_info.value) @pytest.mark.unit class TestWeasyPrintPDFGeneration: """Test PDF generation using WeasyPrint directly""" @pytest.fixture def sample_markdown(self, temp_dir): """Create a sample Markdown file""" md_file = temp_dir / "sample.md" md_file.write_text("# Test Document\n\nThis is a test.", encoding="utf-8") return md_file @patch('app.services.pdf_generator.HTML') @patch('app.services.pdf_generator.CSS') def test_generate_pdf_weasyprint_success(self, mock_css, mock_html, sample_markdown, temp_dir): """Test successful PDF generation with WeasyPrint""" output_path = temp_dir / "output.pdf" # Mock HTML and CSS objects mock_html_instance = Mock() mock_html_instance.write_pdf = Mock() mock_html.return_value = mock_html_instance # Create output file to simulate successful generation def create_pdf(*args, **kwargs): output_path.touch() mock_html_instance.write_pdf.side_effect = create_pdf generator = PDFGenerator() result = generator.generate_pdf_weasyprint(sample_markdown, output_path) assert result == output_path assert output_path.exists() mock_html.assert_called_once() mock_css.assert_called_once() mock_html_instance.write_pdf.assert_called_once() @patch('app.services.pdf_generator.HTML') @patch('app.services.pdf_generator.CSS') def test_generate_pdf_weasyprint_with_metadata(self, mock_css, mock_html, sample_markdown, temp_dir): """Test WeasyPrint PDF generation with metadata""" output_path = temp_dir / "output.pdf" mock_html_instance = Mock() mock_html_instance.write_pdf = Mock() mock_html.return_value = mock_html_instance def create_pdf(*args, **kwargs): output_path.touch() mock_html_instance.write_pdf.side_effect = create_pdf metadata = { "title": "Test Title", "author": "Test Author" } generator = PDFGenerator() result = generator.generate_pdf_weasyprint( sample_markdown, output_path, metadata=metadata ) assert result == output_path # Check that HTML string includes title html_call_args = mock_html.call_args assert html_call_args[1]['string'] is not None assert "Test Title" in html_call_args[1]['string'] @patch('app.services.pdf_generator.HTML') def test_generate_pdf_weasyprint_markdown_conversion(self, mock_html, sample_markdown, temp_dir): """Test that Markdown is properly converted to HTML""" output_path = temp_dir / "output.pdf" captured_html = None def capture_html(string, **kwargs): nonlocal captured_html captured_html = string mock_instance = Mock() mock_instance.write_pdf = Mock(side_effect=lambda *args, **kwargs: output_path.touch()) return mock_instance mock_html.side_effect = capture_html generator = PDFGenerator() generator.generate_pdf_weasyprint(sample_markdown, output_path) # Verify HTML structure assert captured_html is not None assert "" in captured_html assert "

Test Document

" in captured_html assert "

This is a test.

" in captured_html @patch('app.services.pdf_generator.HTML') @patch('app.services.pdf_generator.CSS') def test_generate_pdf_weasyprint_with_template(self, mock_css, mock_html, sample_markdown, temp_dir): """Test WeasyPrint PDF generation with different templates""" output_path = temp_dir / "output.pdf" mock_html_instance = Mock() mock_html_instance.write_pdf = Mock() mock_html.return_value = mock_html_instance def create_pdf(*args, **kwargs): output_path.touch() mock_html_instance.write_pdf.side_effect = create_pdf generator = PDFGenerator() # Test academic template generator.generate_pdf_weasyprint( sample_markdown, output_path, css_template="academic" ) # Verify CSS was called with academic template content css_call_args = mock_css.call_args assert css_call_args[1]['string'] is not None assert "Times New Roman" in css_call_args[1]['string'] @patch('app.services.pdf_generator.HTML') def test_generate_pdf_weasyprint_error_handling(self, mock_html, sample_markdown, temp_dir): """Test WeasyPrint error handling""" output_path = temp_dir / "output.pdf" mock_html.side_effect = Exception("WeasyPrint rendering error") generator = PDFGenerator() with pytest.raises(PDFGenerationError) as exc_info: generator.generate_pdf_weasyprint(sample_markdown, output_path) assert "WeasyPrint PDF generation failed" in str(exc_info.value) @pytest.mark.unit class TestUnifiedPDFGeneration: """Test unified PDF generation with automatic fallback""" @pytest.fixture def sample_markdown(self, temp_dir): """Create a sample Markdown file""" md_file = temp_dir / "sample.md" md_file.write_text("# Test Document\n\nTest content.", encoding="utf-8") return md_file def test_generate_pdf_nonexistent_markdown(self, temp_dir): """Test error when Markdown file doesn't exist""" nonexistent = temp_dir / "nonexistent.md" output_path = temp_dir / "output.pdf" generator = PDFGenerator() with pytest.raises(PDFGenerationError) as exc_info: generator.generate_pdf(nonexistent, output_path) assert "not found" in str(exc_info.value).lower() @patch.object(PDFGenerator, 'check_pandoc_available') @patch.object(PDFGenerator, 'generate_pdf_pandoc') def test_generate_pdf_prefers_pandoc(self, mock_pandoc_gen, mock_check, sample_markdown, temp_dir): """Test that Pandoc is preferred when available""" output_path = temp_dir / "output.pdf" output_path.touch() mock_check.return_value = True mock_pandoc_gen.return_value = output_path generator = PDFGenerator() result = generator.generate_pdf(sample_markdown, output_path, prefer_pandoc=True) assert result == output_path mock_check.assert_called_once() mock_pandoc_gen.assert_called_once() @patch.object(PDFGenerator, 'check_pandoc_available') @patch.object(PDFGenerator, 'generate_pdf_weasyprint') def test_generate_pdf_uses_weasyprint_when_pandoc_unavailable( self, mock_weasy_gen, mock_check, sample_markdown, temp_dir ): """Test fallback to WeasyPrint when Pandoc unavailable""" output_path = temp_dir / "output.pdf" output_path.touch() mock_check.return_value = False mock_weasy_gen.return_value = output_path generator = PDFGenerator() result = generator.generate_pdf(sample_markdown, output_path, prefer_pandoc=True) assert result == output_path mock_check.assert_called_once() mock_weasy_gen.assert_called_once() @patch.object(PDFGenerator, 'check_pandoc_available') @patch.object(PDFGenerator, 'generate_pdf_pandoc') @patch.object(PDFGenerator, 'generate_pdf_weasyprint') def test_generate_pdf_fallback_on_pandoc_failure( self, mock_weasy_gen, mock_pandoc_gen, mock_check, sample_markdown, temp_dir ): """Test automatic fallback to WeasyPrint when Pandoc fails""" output_path = temp_dir / "output.pdf" output_path.touch() mock_check.return_value = True mock_pandoc_gen.side_effect = PDFGenerationError("Pandoc failed") mock_weasy_gen.return_value = output_path generator = PDFGenerator() result = generator.generate_pdf(sample_markdown, output_path, prefer_pandoc=True) assert result == output_path mock_pandoc_gen.assert_called_once() mock_weasy_gen.assert_called_once() @patch.object(PDFGenerator, 'check_pandoc_available') @patch.object(PDFGenerator, 'generate_pdf_weasyprint') def test_generate_pdf_creates_output_directory( self, mock_weasy_gen, mock_check, sample_markdown, temp_dir ): """Test that output directory is created if needed""" output_dir = temp_dir / "subdir" / "outputs" output_path = output_dir / "output.pdf" output_path.parent.mkdir(parents=True, exist_ok=True) output_path.touch() mock_check.return_value = False mock_weasy_gen.return_value = output_path generator = PDFGenerator() result = generator.generate_pdf(sample_markdown, output_path) assert output_dir.exists() assert result == output_path @pytest.mark.unit class TestTemplateManagement: """Test CSS template management""" def test_get_available_templates(self): """Test retrieving available templates""" generator = PDFGenerator() templates = generator.get_available_templates() assert isinstance(templates, dict) assert len(templates) == 3 assert "default" in templates assert "academic" in templates assert "business" in templates # Check descriptions are in Chinese for desc in templates.values(): assert isinstance(desc, str) assert len(desc) > 0 def test_save_custom_template(self): """Test saving a custom CSS template""" generator = PDFGenerator() custom_css = "@page { size: A4; }" generator.save_custom_template("custom", custom_css) assert "custom" in generator.css_templates assert generator.css_templates["custom"] == custom_css def test_save_custom_template_overwrites_existing(self): """Test that saving custom template can overwrite existing""" generator = PDFGenerator() new_css = "@page { size: Letter; }" generator.save_custom_template("default", new_css) assert generator.css_templates["default"] == new_css @pytest.mark.unit class TestEdgeCases: """Test edge cases and error handling""" @pytest.fixture def sample_markdown(self, temp_dir): """Create a sample Markdown file""" md_file = temp_dir / "sample.md" md_file.write_text("# Test", encoding="utf-8") return md_file @patch('app.services.pdf_generator.HTML') @patch('app.services.pdf_generator.CSS') def test_generate_with_unicode_content(self, mock_css, mock_html, temp_dir): """Test PDF generation with Unicode/Chinese content""" md_file = temp_dir / "unicode.md" md_file.write_text("# 測試文檔\n\n這是中文內容。", encoding="utf-8") output_path = temp_dir / "output.pdf" captured_html = None def capture_html(string, **kwargs): nonlocal captured_html captured_html = string mock_instance = Mock() mock_instance.write_pdf = Mock(side_effect=lambda *args, **kwargs: output_path.touch()) return mock_instance mock_html.side_effect = capture_html generator = PDFGenerator() result = generator.generate_pdf_weasyprint(md_file, output_path) assert result == output_path assert "測試文檔" in captured_html assert "中文內容" in captured_html @patch('app.services.pdf_generator.HTML') @patch('app.services.pdf_generator.CSS') def test_generate_with_table_markdown(self, mock_css, mock_html, temp_dir): """Test PDF generation with Markdown tables""" md_file = temp_dir / "table.md" md_content = """ # Document with Table | Column 1 | Column 2 | |----------|----------| | Data 1 | Data 2 | """ md_file.write_text(md_content, encoding="utf-8") output_path = temp_dir / "output.pdf" captured_html = None def capture_html(string, **kwargs): nonlocal captured_html captured_html = string mock_instance = Mock() mock_instance.write_pdf = Mock(side_effect=lambda *args, **kwargs: output_path.touch()) return mock_instance mock_html.side_effect = capture_html generator = PDFGenerator() result = generator.generate_pdf_weasyprint(md_file, output_path) assert result == output_path # Markdown tables should be converted to HTML tables assert "" in captured_html assert "
" in captured_html or "" in captured_html def test_custom_css_string_not_in_templates(self, sample_markdown, temp_dir): """Test using custom CSS string that's not a template name""" generator = PDFGenerator() # This should work - treat as custom CSS string custom_css = "body { font-size: 20pt; }" # When CSS template is not in templates dict, it should be used as-is assert custom_css not in generator.css_templates.values()