"""
Pytest configuration and fixtures for Tool_OCR tests.
"""
import pytest
import tempfile
import os
from pathlib import Path
# Add project root to path
import sys
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
@pytest.fixture
def temp_dir():
"""Create a temporary directory for test files."""
with tempfile.TemporaryDirectory() as tmpdir:
yield Path(tmpdir)
@pytest.fixture
def sample_text_file(temp_dir):
"""Create a sample text file."""
file_path = temp_dir / "sample.txt"
file_path.write_text("This is a sample text file for testing purposes.\n" * 10)
return file_path
@pytest.fixture
def sample_pdf_editable(temp_dir):
"""Create a sample editable PDF with text content."""
import fitz
file_path = temp_dir / "editable.pdf"
doc = fitz.open()
# Create 3 pages with text
for i in range(3):
page = doc.new_page()
text = f"This is page {i + 1} of an editable PDF document.\n" * 20
page.insert_text((50, 50), text, fontsize=12)
doc.save(str(file_path))
doc.close()
return file_path
@pytest.fixture
def sample_pdf_scanned(temp_dir):
"""Create a sample scanned PDF (image-only, no text)."""
import fitz
from PIL import Image
import io
file_path = temp_dir / "scanned.pdf"
doc = fitz.open()
# Create 3 pages with only images (simulating scanned pages)
for i in range(3):
page = doc.new_page()
# Create a simple image
img = Image.new('RGB', (400, 300), color=(200, 200, 200))
img_bytes = io.BytesIO()
img.save(img_bytes, format='PNG')
img_bytes.seek(0)
# Insert image covering most of the page
rect = fitz.Rect(50, 50, 550, 750)
page.insert_image(rect, stream=img_bytes.read())
doc.save(str(file_path))
doc.close()
return file_path
@pytest.fixture
def sample_pdf_mixed(temp_dir):
"""Create a sample mixed PDF (some pages with text, some with images)."""
import fitz
from PIL import Image
import io
file_path = temp_dir / "mixed.pdf"
doc = fitz.open()
# Page 1: Text
page = doc.new_page()
text = "This is a text page.\n" * 20
page.insert_text((50, 50), text, fontsize=12)
# Page 2: Image only
page = doc.new_page()
img = Image.new('RGB', (400, 300), color=(200, 200, 200))
img_bytes = io.BytesIO()
img.save(img_bytes, format='PNG')
img_bytes.seek(0)
rect = fitz.Rect(50, 50, 550, 750)
page.insert_image(rect, stream=img_bytes.read())
# Page 3: Image only
page = doc.new_page()
img = Image.new('RGB', (400, 300), color=(150, 150, 150))
img_bytes = io.BytesIO()
img.save(img_bytes, format='PNG')
img_bytes.seek(0)
page.insert_image(rect, stream=img_bytes.read())
doc.save(str(file_path))
doc.close()
return file_path
@pytest.fixture
def sample_image_png(temp_dir):
"""Create a sample PNG image."""
from PIL import Image
file_path = temp_dir / "sample.png"
img = Image.new('RGB', (100, 100), color='red')
img.save(str(file_path))
return file_path
@pytest.fixture
def sample_image_jpg(temp_dir):
"""Create a sample JPEG image."""
from PIL import Image
file_path = temp_dir / "sample.jpg"
img = Image.new('RGB', (100, 100), color='blue')
img.save(str(file_path))
return file_path
@pytest.fixture
def sample_docx(temp_dir):
"""Create a sample DOCX file (minimal valid structure)."""
from zipfile import ZipFile
file_path = temp_dir / "sample.docx"
# Create minimal DOCX structure
with ZipFile(file_path, 'w') as zf:
# [Content_Types].xml
content_types = '''
'''
zf.writestr('[Content_Types].xml', content_types)
# _rels/.rels
rels = '''
'''
zf.writestr('_rels/.rels', rels)
# word/document.xml
document = '''
Test document
'''
zf.writestr('word/document.xml', document)
return file_path