first
This commit is contained in:
559
backend/tests/test_pdf_generator.py
Normal file
559
backend/tests/test_pdf_generator.py
Normal file
@@ -0,0 +1,559 @@
|
||||
"""
|
||||
Tool_OCR - PDF Generator Unit Tests
|
||||
Tests for app/services/pdf_generator.py
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
import subprocess
|
||||
|
||||
from app.services.pdf_generator import PDFGenerator, PDFGenerationError
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestPDFGeneratorInit:
|
||||
"""Test PDF generator initialization"""
|
||||
|
||||
def test_init(self):
|
||||
"""Test PDF generator initialization"""
|
||||
generator = PDFGenerator()
|
||||
|
||||
assert generator is not None
|
||||
assert hasattr(generator, 'css_templates')
|
||||
assert len(generator.css_templates) == 3
|
||||
assert 'default' in generator.css_templates
|
||||
assert 'academic' in generator.css_templates
|
||||
assert 'business' in generator.css_templates
|
||||
|
||||
def test_css_templates_have_content(self):
|
||||
"""Test that CSS templates contain content"""
|
||||
generator = PDFGenerator()
|
||||
|
||||
for template_name, css_content in generator.css_templates.items():
|
||||
assert isinstance(css_content, str)
|
||||
assert len(css_content) > 100
|
||||
assert '@page' in css_content
|
||||
assert 'body' in css_content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestPandocAvailability:
|
||||
"""Test Pandoc availability checking"""
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_check_pandoc_available_success(self, mock_run):
|
||||
"""Test Pandoc availability check when pandoc is installed"""
|
||||
mock_run.return_value = Mock(returncode=0, stdout="pandoc 2.x")
|
||||
|
||||
generator = PDFGenerator()
|
||||
is_available = generator.check_pandoc_available()
|
||||
|
||||
assert is_available is True
|
||||
mock_run.assert_called_once()
|
||||
assert mock_run.call_args[0][0] == ["pandoc", "--version"]
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_check_pandoc_available_not_found(self, mock_run):
|
||||
"""Test Pandoc availability check when pandoc is not installed"""
|
||||
mock_run.side_effect = FileNotFoundError()
|
||||
|
||||
generator = PDFGenerator()
|
||||
is_available = generator.check_pandoc_available()
|
||||
|
||||
assert is_available is False
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_check_pandoc_available_timeout(self, mock_run):
|
||||
"""Test Pandoc availability check when command times out"""
|
||||
mock_run.side_effect = subprocess.TimeoutExpired("pandoc", 5)
|
||||
|
||||
generator = PDFGenerator()
|
||||
is_available = generator.check_pandoc_available()
|
||||
|
||||
assert is_available is False
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestPandocPDFGeneration:
|
||||
"""Test PDF generation using Pandoc"""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_markdown(self, temp_dir):
|
||||
"""Create a sample Markdown file"""
|
||||
md_file = temp_dir / "sample.md"
|
||||
md_file.write_text("# Test Document\n\nThis is a test.", encoding="utf-8")
|
||||
return md_file
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_generate_pdf_pandoc_success(self, mock_run, sample_markdown, temp_dir):
|
||||
"""Test successful PDF generation with Pandoc"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
mock_run.return_value = Mock(returncode=0, stderr="")
|
||||
|
||||
# Create the output file to simulate successful generation
|
||||
output_path.touch()
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf_pandoc(sample_markdown, output_path)
|
||||
|
||||
assert result == output_path
|
||||
assert output_path.exists()
|
||||
mock_run.assert_called_once()
|
||||
|
||||
# Verify pandoc command structure
|
||||
cmd_args = mock_run.call_args[0][0]
|
||||
assert "pandoc" in cmd_args
|
||||
assert str(sample_markdown) in cmd_args
|
||||
assert str(output_path) in cmd_args
|
||||
assert "--pdf-engine=weasyprint" in cmd_args
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_generate_pdf_pandoc_with_metadata(self, mock_run, sample_markdown, temp_dir):
|
||||
"""Test Pandoc PDF generation with metadata"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
mock_run.return_value = Mock(returncode=0, stderr="")
|
||||
output_path.touch()
|
||||
|
||||
metadata = {
|
||||
"title": "Test Title",
|
||||
"author": "Test Author",
|
||||
"date": "2025-01-01"
|
||||
}
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf_pandoc(
|
||||
sample_markdown,
|
||||
output_path,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
assert result == output_path
|
||||
|
||||
# Verify metadata in command
|
||||
cmd_args = mock_run.call_args[0][0]
|
||||
assert "--metadata" in cmd_args
|
||||
assert "title=Test Title" in cmd_args
|
||||
assert "author=Test Author" in cmd_args
|
||||
assert "date=2025-01-01" in cmd_args
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_generate_pdf_pandoc_with_custom_css(self, mock_run, sample_markdown, temp_dir):
|
||||
"""Test Pandoc PDF generation with custom CSS template"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
mock_run.return_value = Mock(returncode=0, stderr="")
|
||||
output_path.touch()
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf_pandoc(
|
||||
sample_markdown,
|
||||
output_path,
|
||||
css_template="academic"
|
||||
)
|
||||
|
||||
assert result == output_path
|
||||
mock_run.assert_called_once()
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_generate_pdf_pandoc_command_failed(self, mock_run, sample_markdown, temp_dir):
|
||||
"""Test Pandoc PDF generation when command fails"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
mock_run.return_value = Mock(returncode=1, stderr="Pandoc error message")
|
||||
|
||||
generator = PDFGenerator()
|
||||
|
||||
with pytest.raises(PDFGenerationError) as exc_info:
|
||||
generator.generate_pdf_pandoc(sample_markdown, output_path)
|
||||
|
||||
assert "Pandoc failed" in str(exc_info.value)
|
||||
assert "Pandoc error message" in str(exc_info.value)
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_generate_pdf_pandoc_timeout(self, mock_run, sample_markdown, temp_dir):
|
||||
"""Test Pandoc PDF generation timeout"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
mock_run.side_effect = subprocess.TimeoutExpired("pandoc", 60)
|
||||
|
||||
generator = PDFGenerator()
|
||||
|
||||
with pytest.raises(PDFGenerationError) as exc_info:
|
||||
generator.generate_pdf_pandoc(sample_markdown, output_path)
|
||||
|
||||
assert "timed out" in str(exc_info.value).lower()
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_generate_pdf_pandoc_output_not_created(self, mock_run, sample_markdown, temp_dir):
|
||||
"""Test when Pandoc command succeeds but output file not created"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
mock_run.return_value = Mock(returncode=0, stderr="")
|
||||
# Don't create output file
|
||||
|
||||
generator = PDFGenerator()
|
||||
|
||||
with pytest.raises(PDFGenerationError) as exc_info:
|
||||
generator.generate_pdf_pandoc(sample_markdown, output_path)
|
||||
|
||||
assert "PDF file not created" in str(exc_info.value)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestWeasyPrintPDFGeneration:
|
||||
"""Test PDF generation using WeasyPrint directly"""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_markdown(self, temp_dir):
|
||||
"""Create a sample Markdown file"""
|
||||
md_file = temp_dir / "sample.md"
|
||||
md_file.write_text("# Test Document\n\nThis is a test.", encoding="utf-8")
|
||||
return md_file
|
||||
|
||||
@patch('app.services.pdf_generator.HTML')
|
||||
@patch('app.services.pdf_generator.CSS')
|
||||
def test_generate_pdf_weasyprint_success(self, mock_css, mock_html, sample_markdown, temp_dir):
|
||||
"""Test successful PDF generation with WeasyPrint"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
|
||||
# Mock HTML and CSS objects
|
||||
mock_html_instance = Mock()
|
||||
mock_html_instance.write_pdf = Mock()
|
||||
mock_html.return_value = mock_html_instance
|
||||
|
||||
# Create output file to simulate successful generation
|
||||
def create_pdf(*args, **kwargs):
|
||||
output_path.touch()
|
||||
|
||||
mock_html_instance.write_pdf.side_effect = create_pdf
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf_weasyprint(sample_markdown, output_path)
|
||||
|
||||
assert result == output_path
|
||||
assert output_path.exists()
|
||||
mock_html.assert_called_once()
|
||||
mock_css.assert_called_once()
|
||||
mock_html_instance.write_pdf.assert_called_once()
|
||||
|
||||
@patch('app.services.pdf_generator.HTML')
|
||||
@patch('app.services.pdf_generator.CSS')
|
||||
def test_generate_pdf_weasyprint_with_metadata(self, mock_css, mock_html, sample_markdown, temp_dir):
|
||||
"""Test WeasyPrint PDF generation with metadata"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
|
||||
mock_html_instance = Mock()
|
||||
mock_html_instance.write_pdf = Mock()
|
||||
mock_html.return_value = mock_html_instance
|
||||
|
||||
def create_pdf(*args, **kwargs):
|
||||
output_path.touch()
|
||||
|
||||
mock_html_instance.write_pdf.side_effect = create_pdf
|
||||
|
||||
metadata = {
|
||||
"title": "Test Title",
|
||||
"author": "Test Author"
|
||||
}
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf_weasyprint(
|
||||
sample_markdown,
|
||||
output_path,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
assert result == output_path
|
||||
|
||||
# Check that HTML string includes title
|
||||
html_call_args = mock_html.call_args
|
||||
assert html_call_args[1]['string'] is not None
|
||||
assert "Test Title" in html_call_args[1]['string']
|
||||
|
||||
@patch('app.services.pdf_generator.HTML')
|
||||
def test_generate_pdf_weasyprint_markdown_conversion(self, mock_html, sample_markdown, temp_dir):
|
||||
"""Test that Markdown is properly converted to HTML"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
|
||||
captured_html = None
|
||||
|
||||
def capture_html(string, **kwargs):
|
||||
nonlocal captured_html
|
||||
captured_html = string
|
||||
mock_instance = Mock()
|
||||
mock_instance.write_pdf = Mock(side_effect=lambda *args, **kwargs: output_path.touch())
|
||||
return mock_instance
|
||||
|
||||
mock_html.side_effect = capture_html
|
||||
|
||||
generator = PDFGenerator()
|
||||
generator.generate_pdf_weasyprint(sample_markdown, output_path)
|
||||
|
||||
# Verify HTML structure
|
||||
assert captured_html is not None
|
||||
assert "<!DOCTYPE html>" in captured_html
|
||||
assert "<h1>Test Document</h1>" in captured_html
|
||||
assert "<p>This is a test.</p>" in captured_html
|
||||
|
||||
@patch('app.services.pdf_generator.HTML')
|
||||
@patch('app.services.pdf_generator.CSS')
|
||||
def test_generate_pdf_weasyprint_with_template(self, mock_css, mock_html, sample_markdown, temp_dir):
|
||||
"""Test WeasyPrint PDF generation with different templates"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
|
||||
mock_html_instance = Mock()
|
||||
mock_html_instance.write_pdf = Mock()
|
||||
mock_html.return_value = mock_html_instance
|
||||
|
||||
def create_pdf(*args, **kwargs):
|
||||
output_path.touch()
|
||||
|
||||
mock_html_instance.write_pdf.side_effect = create_pdf
|
||||
|
||||
generator = PDFGenerator()
|
||||
|
||||
# Test academic template
|
||||
generator.generate_pdf_weasyprint(
|
||||
sample_markdown,
|
||||
output_path,
|
||||
css_template="academic"
|
||||
)
|
||||
|
||||
# Verify CSS was called with academic template content
|
||||
css_call_args = mock_css.call_args
|
||||
assert css_call_args[1]['string'] is not None
|
||||
assert "Times New Roman" in css_call_args[1]['string']
|
||||
|
||||
@patch('app.services.pdf_generator.HTML')
|
||||
def test_generate_pdf_weasyprint_error_handling(self, mock_html, sample_markdown, temp_dir):
|
||||
"""Test WeasyPrint error handling"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
|
||||
mock_html.side_effect = Exception("WeasyPrint rendering error")
|
||||
|
||||
generator = PDFGenerator()
|
||||
|
||||
with pytest.raises(PDFGenerationError) as exc_info:
|
||||
generator.generate_pdf_weasyprint(sample_markdown, output_path)
|
||||
|
||||
assert "WeasyPrint PDF generation failed" in str(exc_info.value)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestUnifiedPDFGeneration:
|
||||
"""Test unified PDF generation with automatic fallback"""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_markdown(self, temp_dir):
|
||||
"""Create a sample Markdown file"""
|
||||
md_file = temp_dir / "sample.md"
|
||||
md_file.write_text("# Test Document\n\nTest content.", encoding="utf-8")
|
||||
return md_file
|
||||
|
||||
def test_generate_pdf_nonexistent_markdown(self, temp_dir):
|
||||
"""Test error when Markdown file doesn't exist"""
|
||||
nonexistent = temp_dir / "nonexistent.md"
|
||||
output_path = temp_dir / "output.pdf"
|
||||
|
||||
generator = PDFGenerator()
|
||||
|
||||
with pytest.raises(PDFGenerationError) as exc_info:
|
||||
generator.generate_pdf(nonexistent, output_path)
|
||||
|
||||
assert "not found" in str(exc_info.value).lower()
|
||||
|
||||
@patch.object(PDFGenerator, 'check_pandoc_available')
|
||||
@patch.object(PDFGenerator, 'generate_pdf_pandoc')
|
||||
def test_generate_pdf_prefers_pandoc(self, mock_pandoc_gen, mock_check, sample_markdown, temp_dir):
|
||||
"""Test that Pandoc is preferred when available"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
output_path.touch()
|
||||
|
||||
mock_check.return_value = True
|
||||
mock_pandoc_gen.return_value = output_path
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf(sample_markdown, output_path, prefer_pandoc=True)
|
||||
|
||||
assert result == output_path
|
||||
mock_check.assert_called_once()
|
||||
mock_pandoc_gen.assert_called_once()
|
||||
|
||||
@patch.object(PDFGenerator, 'check_pandoc_available')
|
||||
@patch.object(PDFGenerator, 'generate_pdf_weasyprint')
|
||||
def test_generate_pdf_uses_weasyprint_when_pandoc_unavailable(
|
||||
self, mock_weasy_gen, mock_check, sample_markdown, temp_dir
|
||||
):
|
||||
"""Test fallback to WeasyPrint when Pandoc unavailable"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
output_path.touch()
|
||||
|
||||
mock_check.return_value = False
|
||||
mock_weasy_gen.return_value = output_path
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf(sample_markdown, output_path, prefer_pandoc=True)
|
||||
|
||||
assert result == output_path
|
||||
mock_check.assert_called_once()
|
||||
mock_weasy_gen.assert_called_once()
|
||||
|
||||
@patch.object(PDFGenerator, 'check_pandoc_available')
|
||||
@patch.object(PDFGenerator, 'generate_pdf_pandoc')
|
||||
@patch.object(PDFGenerator, 'generate_pdf_weasyprint')
|
||||
def test_generate_pdf_fallback_on_pandoc_failure(
|
||||
self, mock_weasy_gen, mock_pandoc_gen, mock_check, sample_markdown, temp_dir
|
||||
):
|
||||
"""Test automatic fallback to WeasyPrint when Pandoc fails"""
|
||||
output_path = temp_dir / "output.pdf"
|
||||
output_path.touch()
|
||||
|
||||
mock_check.return_value = True
|
||||
mock_pandoc_gen.side_effect = PDFGenerationError("Pandoc failed")
|
||||
mock_weasy_gen.return_value = output_path
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf(sample_markdown, output_path, prefer_pandoc=True)
|
||||
|
||||
assert result == output_path
|
||||
mock_pandoc_gen.assert_called_once()
|
||||
mock_weasy_gen.assert_called_once()
|
||||
|
||||
@patch.object(PDFGenerator, 'check_pandoc_available')
|
||||
@patch.object(PDFGenerator, 'generate_pdf_weasyprint')
|
||||
def test_generate_pdf_creates_output_directory(
|
||||
self, mock_weasy_gen, mock_check, sample_markdown, temp_dir
|
||||
):
|
||||
"""Test that output directory is created if needed"""
|
||||
output_dir = temp_dir / "subdir" / "outputs"
|
||||
output_path = output_dir / "output.pdf"
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.touch()
|
||||
|
||||
mock_check.return_value = False
|
||||
mock_weasy_gen.return_value = output_path
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf(sample_markdown, output_path)
|
||||
|
||||
assert output_dir.exists()
|
||||
assert result == output_path
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTemplateManagement:
|
||||
"""Test CSS template management"""
|
||||
|
||||
def test_get_available_templates(self):
|
||||
"""Test retrieving available templates"""
|
||||
generator = PDFGenerator()
|
||||
templates = generator.get_available_templates()
|
||||
|
||||
assert isinstance(templates, dict)
|
||||
assert len(templates) == 3
|
||||
assert "default" in templates
|
||||
assert "academic" in templates
|
||||
assert "business" in templates
|
||||
|
||||
# Check descriptions are in Chinese
|
||||
for desc in templates.values():
|
||||
assert isinstance(desc, str)
|
||||
assert len(desc) > 0
|
||||
|
||||
def test_save_custom_template(self):
|
||||
"""Test saving a custom CSS template"""
|
||||
generator = PDFGenerator()
|
||||
|
||||
custom_css = "@page { size: A4; }"
|
||||
generator.save_custom_template("custom", custom_css)
|
||||
|
||||
assert "custom" in generator.css_templates
|
||||
assert generator.css_templates["custom"] == custom_css
|
||||
|
||||
def test_save_custom_template_overwrites_existing(self):
|
||||
"""Test that saving custom template can overwrite existing"""
|
||||
generator = PDFGenerator()
|
||||
|
||||
new_css = "@page { size: Letter; }"
|
||||
generator.save_custom_template("default", new_css)
|
||||
|
||||
assert generator.css_templates["default"] == new_css
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestEdgeCases:
|
||||
"""Test edge cases and error handling"""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_markdown(self, temp_dir):
|
||||
"""Create a sample Markdown file"""
|
||||
md_file = temp_dir / "sample.md"
|
||||
md_file.write_text("# Test", encoding="utf-8")
|
||||
return md_file
|
||||
|
||||
@patch('app.services.pdf_generator.HTML')
|
||||
@patch('app.services.pdf_generator.CSS')
|
||||
def test_generate_with_unicode_content(self, mock_css, mock_html, temp_dir):
|
||||
"""Test PDF generation with Unicode/Chinese content"""
|
||||
md_file = temp_dir / "unicode.md"
|
||||
md_file.write_text("# 測試文檔\n\n這是中文內容。", encoding="utf-8")
|
||||
output_path = temp_dir / "output.pdf"
|
||||
|
||||
captured_html = None
|
||||
|
||||
def capture_html(string, **kwargs):
|
||||
nonlocal captured_html
|
||||
captured_html = string
|
||||
mock_instance = Mock()
|
||||
mock_instance.write_pdf = Mock(side_effect=lambda *args, **kwargs: output_path.touch())
|
||||
return mock_instance
|
||||
|
||||
mock_html.side_effect = capture_html
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf_weasyprint(md_file, output_path)
|
||||
|
||||
assert result == output_path
|
||||
assert "測試文檔" in captured_html
|
||||
assert "中文內容" in captured_html
|
||||
|
||||
@patch('app.services.pdf_generator.HTML')
|
||||
@patch('app.services.pdf_generator.CSS')
|
||||
def test_generate_with_table_markdown(self, mock_css, mock_html, temp_dir):
|
||||
"""Test PDF generation with Markdown tables"""
|
||||
md_file = temp_dir / "table.md"
|
||||
md_content = """
|
||||
# Document with Table
|
||||
|
||||
| Column 1 | Column 2 |
|
||||
|----------|----------|
|
||||
| Data 1 | Data 2 |
|
||||
"""
|
||||
md_file.write_text(md_content, encoding="utf-8")
|
||||
output_path = temp_dir / "output.pdf"
|
||||
|
||||
captured_html = None
|
||||
|
||||
def capture_html(string, **kwargs):
|
||||
nonlocal captured_html
|
||||
captured_html = string
|
||||
mock_instance = Mock()
|
||||
mock_instance.write_pdf = Mock(side_effect=lambda *args, **kwargs: output_path.touch())
|
||||
return mock_instance
|
||||
|
||||
mock_html.side_effect = capture_html
|
||||
|
||||
generator = PDFGenerator()
|
||||
result = generator.generate_pdf_weasyprint(md_file, output_path)
|
||||
|
||||
assert result == output_path
|
||||
# Markdown tables should be converted to HTML tables
|
||||
assert "<table>" in captured_html
|
||||
assert "<th>" in captured_html or "<td>" in captured_html
|
||||
|
||||
def test_custom_css_string_not_in_templates(self, sample_markdown, temp_dir):
|
||||
"""Test using custom CSS string that's not a template name"""
|
||||
generator = PDFGenerator()
|
||||
|
||||
# This should work - treat as custom CSS string
|
||||
custom_css = "body { font-size: 20pt; }"
|
||||
|
||||
# When CSS template is not in templates dict, it should be used as-is
|
||||
assert custom_css not in generator.css_templates.values()
|
||||
Reference in New Issue
Block a user