OCR/backend/test_pdf_scaling.py

#!/usr/bin/env python
"""
Test script for PDF generation with proper scaling
"""

import json
from pathlib import Path
from app.services.pdf_generator_service import pdf_generator_service

def test_pdf_generation():
    """Test PDF generation with mock data that includes OCR dimensions"""

    # Create a test directory
    test_dir = Path("test_output")
    test_dir.mkdir(exist_ok=True)

    # Create mock OCR JSON data with OCR dimensions
    mock_ocr_data = {
        "status": "success",
        "file_name": "test_image.jpg",
        "language": "ch",
        "ocr_dimensions": {
            "width": 500,  # OCR processed at 500px wide
            "height": 700  # OCR processed at 700px tall
        },
        "text_regions": [
            {
                "text": "測試文字 Test Text",
                "bbox": [[50, 100], [250, 100], [250, 150], [50, 150]],
                "confidence": 0.95
            },
            {
                "text": "第二行文字 Second line",
                "bbox": [[50, 200], [300, 200], [300, 250], [50, 250]],
                "confidence": 0.92
            }
        ],
        "total_text_regions": 2,
        "average_confidence": 0.935,
        "layout_data": None,
        "images_metadata": [],
        "markdown_content": "# Test Document\n\n測試文字 Test Text\n\n第二行文字 Second line",
        "processing_time": 1.5,
        "timestamp": "2025-11-17T00:00:00"
    }

    # Save mock JSON
    json_path = test_dir / "test_ocr_result.json"
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(mock_ocr_data, f, ensure_ascii=False, indent=2)

    print(f"Created test JSON at: {json_path}")

    # Test PDF generation
    pdf_path = test_dir / "test_output.pdf"

    # Create a dummy source file for dimensions (1000x1400 target PDF size)
    from PIL import Image
    source_image = test_dir / "test_source.jpg"
    img = Image.new('RGB', (1000, 1400), color='white')
    img.save(source_image)
    print(f"Created test source image: {source_image} (1000x1400)")

    # Generate PDF
    print("\nGenerating PDF with scaling...")

    # Set up logging to see scale factors
    import logging
    logging.basicConfig(level=logging.INFO, format='%(message)s')

    success = pdf_generator_service.generate_layout_pdf(
        json_path=json_path,
        output_path=pdf_path,
        source_file_path=source_image
    )

    if success:
        print(f"✓ PDF generated successfully: {pdf_path}")
        print(f"  Expected scale factors: X={1000/500:.2f}, Y={1400/700:.2f}")
        print("  Text should now be properly scaled and positioned!")
    else:
        print("✗ PDF generation failed")

    return success

if __name__ == "__main__":
    import sys
    sys.path.insert(0, str(Path(__file__).parent))

    print("Testing PDF generation with proper scaling...")
    print("=" * 60)

    success = test_pdf_generation()

    print("\n" + "=" * 60)
    if success:
        print("✓ Test completed successfully!")
        print("Check test_output/test_output.pdf to verify scaling")
    else:
        print("✗ Test failed")