Files
OCR/backend/test_pdf_scaling.py
egg d33f605bdb fix: add proper coordinate scaling from OCR space to PDF space
Problem:
- OCR processes images at smaller resolutions but coordinates were being used directly on larger PDF canvases
- This caused all text/tables/images to be drawn at wrong scale in bottom-left corner

Solution:
- Track OCR image dimensions in JSON output (ocr_dimensions)
- Calculate proper scale factors: scale_w = pdf_width/ocr_width, scale_h = pdf_height/ocr_height
- Apply scaling to all coordinates before drawing on PDF canvas
- Support per-page scaling for multi-page PDFs

Changes:
1. ocr_service.py:
   - Add OCR image dimensions capture using PIL
   - Include ocr_dimensions in JSON output for both single images and PDFs

2. pdf_generator_service.py:
   - Calculate scale factors from OCR dimensions vs target PDF dimensions
   - Update all drawing methods (text, table, image) to accept and apply scale factors
   - Apply scaling to bbox coordinates before coordinate transformation

3. test_pdf_scaling.py:
   - Add test script to verify scaling works correctly
   - Test with OCR at 500x700 scaled to PDF at 1000x1400 (2x scaling)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-17 20:45:36 +08:00

100 lines
3.1 KiB
Python

#!/usr/bin/env python
"""
Test script for PDF generation with proper scaling
"""
import json
from pathlib import Path
from app.services.pdf_generator_service import pdf_generator_service
def test_pdf_generation():
"""Test PDF generation with mock data that includes OCR dimensions"""
# Create a test directory
test_dir = Path("test_output")
test_dir.mkdir(exist_ok=True)
# Create mock OCR JSON data with OCR dimensions
mock_ocr_data = {
"status": "success",
"file_name": "test_image.jpg",
"language": "ch",
"ocr_dimensions": {
"width": 500, # OCR processed at 500px wide
"height": 700 # OCR processed at 700px tall
},
"text_regions": [
{
"text": "測試文字 Test Text",
"bbox": [[50, 100], [250, 100], [250, 150], [50, 150]],
"confidence": 0.95
},
{
"text": "第二行文字 Second line",
"bbox": [[50, 200], [300, 200], [300, 250], [50, 250]],
"confidence": 0.92
}
],
"total_text_regions": 2,
"average_confidence": 0.935,
"layout_data": None,
"images_metadata": [],
"markdown_content": "# Test Document\n\n測試文字 Test Text\n\n第二行文字 Second line",
"processing_time": 1.5,
"timestamp": "2025-11-17T00:00:00"
}
# Save mock JSON
json_path = test_dir / "test_ocr_result.json"
with open(json_path, "w", encoding="utf-8") as f:
json.dump(mock_ocr_data, f, ensure_ascii=False, indent=2)
print(f"Created test JSON at: {json_path}")
# Test PDF generation
pdf_path = test_dir / "test_output.pdf"
# Create a dummy source file for dimensions (1000x1400 target PDF size)
from PIL import Image
source_image = test_dir / "test_source.jpg"
img = Image.new('RGB', (1000, 1400), color='white')
img.save(source_image)
print(f"Created test source image: {source_image} (1000x1400)")
# Generate PDF
print("\nGenerating PDF with scaling...")
# Set up logging to see scale factors
import logging
logging.basicConfig(level=logging.INFO, format='%(message)s')
success = pdf_generator_service.generate_layout_pdf(
json_path=json_path,
output_path=pdf_path,
source_file_path=source_image
)
if success:
print(f"✓ PDF generated successfully: {pdf_path}")
print(f" Expected scale factors: X={1000/500:.2f}, Y={1400/700:.2f}")
print(" Text should now be properly scaled and positioned!")
else:
print("✗ PDF generation failed")
return success
if __name__ == "__main__":
import sys
sys.path.insert(0, str(Path(__file__).parent))
print("Testing PDF generation with proper scaling...")
print("=" * 60)
success = test_pdf_generation()
print("\n" + "=" * 60)
if success:
print("✓ Test completed successfully!")
print("Check test_output/test_output.pdf to verify scaling")
else:
print("✗ Test failed")