""" Pytest configuration and fixtures for Tool_OCR tests. """ import pytest import tempfile import os from pathlib import Path # Add project root to path import sys project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) @pytest.fixture def temp_dir(): """Create a temporary directory for test files.""" with tempfile.TemporaryDirectory() as tmpdir: yield Path(tmpdir) @pytest.fixture def sample_text_file(temp_dir): """Create a sample text file.""" file_path = temp_dir / "sample.txt" file_path.write_text("This is a sample text file for testing purposes.\n" * 10) return file_path @pytest.fixture def sample_pdf_editable(temp_dir): """Create a sample editable PDF with text content.""" import fitz file_path = temp_dir / "editable.pdf" doc = fitz.open() # Create 3 pages with text for i in range(3): page = doc.new_page() text = f"This is page {i + 1} of an editable PDF document.\n" * 20 page.insert_text((50, 50), text, fontsize=12) doc.save(str(file_path)) doc.close() return file_path @pytest.fixture def sample_pdf_scanned(temp_dir): """Create a sample scanned PDF (image-only, no text).""" import fitz from PIL import Image import io file_path = temp_dir / "scanned.pdf" doc = fitz.open() # Create 3 pages with only images (simulating scanned pages) for i in range(3): page = doc.new_page() # Create a simple image img = Image.new('RGB', (400, 300), color=(200, 200, 200)) img_bytes = io.BytesIO() img.save(img_bytes, format='PNG') img_bytes.seek(0) # Insert image covering most of the page rect = fitz.Rect(50, 50, 550, 750) page.insert_image(rect, stream=img_bytes.read()) doc.save(str(file_path)) doc.close() return file_path @pytest.fixture def sample_pdf_mixed(temp_dir): """Create a sample mixed PDF (some pages with text, some with images).""" import fitz from PIL import Image import io file_path = temp_dir / "mixed.pdf" doc = fitz.open() # Page 1: Text page = doc.new_page() text = "This is a text page.\n" * 20 page.insert_text((50, 50), text, fontsize=12) # Page 2: Image only page = doc.new_page() img = Image.new('RGB', (400, 300), color=(200, 200, 200)) img_bytes = io.BytesIO() img.save(img_bytes, format='PNG') img_bytes.seek(0) rect = fitz.Rect(50, 50, 550, 750) page.insert_image(rect, stream=img_bytes.read()) # Page 3: Image only page = doc.new_page() img = Image.new('RGB', (400, 300), color=(150, 150, 150)) img_bytes = io.BytesIO() img.save(img_bytes, format='PNG') img_bytes.seek(0) page.insert_image(rect, stream=img_bytes.read()) doc.save(str(file_path)) doc.close() return file_path @pytest.fixture def sample_image_png(temp_dir): """Create a sample PNG image.""" from PIL import Image file_path = temp_dir / "sample.png" img = Image.new('RGB', (100, 100), color='red') img.save(str(file_path)) return file_path @pytest.fixture def sample_image_jpg(temp_dir): """Create a sample JPEG image.""" from PIL import Image file_path = temp_dir / "sample.jpg" img = Image.new('RGB', (100, 100), color='blue') img.save(str(file_path)) return file_path @pytest.fixture def sample_docx(temp_dir): """Create a sample DOCX file (minimal valid structure).""" from zipfile import ZipFile file_path = temp_dir / "sample.docx" # Create minimal DOCX structure with ZipFile(file_path, 'w') as zf: # [Content_Types].xml content_types = ''' ''' zf.writestr('[Content_Types].xml', content_types) # _rels/.rels rels = ''' ''' zf.writestr('_rels/.rels', rels) # word/document.xml document = ''' Test document ''' zf.writestr('word/document.xml', document) return file_path