Problem: - OCR processes images at smaller resolutions but coordinates were being used directly on larger PDF canvases - This caused all text/tables/images to be drawn at wrong scale in bottom-left corner Solution: - Track OCR image dimensions in JSON output (ocr_dimensions) - Calculate proper scale factors: scale_w = pdf_width/ocr_width, scale_h = pdf_height/ocr_height - Apply scaling to all coordinates before drawing on PDF canvas - Support per-page scaling for multi-page PDFs Changes: 1. ocr_service.py: - Add OCR image dimensions capture using PIL - Include ocr_dimensions in JSON output for both single images and PDFs 2. pdf_generator_service.py: - Calculate scale factors from OCR dimensions vs target PDF dimensions - Update all drawing methods (text, table, image) to accept and apply scale factors - Apply scaling to bbox coordinates before coordinate transformation 3. test_pdf_scaling.py: - Add test script to verify scaling works correctly - Test with OCR at 500x700 scaled to PDF at 1000x1400 (2x scaling) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
100 lines
3.1 KiB
Python
100 lines
3.1 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
Test script for PDF generation with proper scaling
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from app.services.pdf_generator_service import pdf_generator_service
|
|
|
|
def test_pdf_generation():
|
|
"""Test PDF generation with mock data that includes OCR dimensions"""
|
|
|
|
# Create a test directory
|
|
test_dir = Path("test_output")
|
|
test_dir.mkdir(exist_ok=True)
|
|
|
|
# Create mock OCR JSON data with OCR dimensions
|
|
mock_ocr_data = {
|
|
"status": "success",
|
|
"file_name": "test_image.jpg",
|
|
"language": "ch",
|
|
"ocr_dimensions": {
|
|
"width": 500, # OCR processed at 500px wide
|
|
"height": 700 # OCR processed at 700px tall
|
|
},
|
|
"text_regions": [
|
|
{
|
|
"text": "測試文字 Test Text",
|
|
"bbox": [[50, 100], [250, 100], [250, 150], [50, 150]],
|
|
"confidence": 0.95
|
|
},
|
|
{
|
|
"text": "第二行文字 Second line",
|
|
"bbox": [[50, 200], [300, 200], [300, 250], [50, 250]],
|
|
"confidence": 0.92
|
|
}
|
|
],
|
|
"total_text_regions": 2,
|
|
"average_confidence": 0.935,
|
|
"layout_data": None,
|
|
"images_metadata": [],
|
|
"markdown_content": "# Test Document\n\n測試文字 Test Text\n\n第二行文字 Second line",
|
|
"processing_time": 1.5,
|
|
"timestamp": "2025-11-17T00:00:00"
|
|
}
|
|
|
|
# Save mock JSON
|
|
json_path = test_dir / "test_ocr_result.json"
|
|
with open(json_path, "w", encoding="utf-8") as f:
|
|
json.dump(mock_ocr_data, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"Created test JSON at: {json_path}")
|
|
|
|
# Test PDF generation
|
|
pdf_path = test_dir / "test_output.pdf"
|
|
|
|
# Create a dummy source file for dimensions (1000x1400 target PDF size)
|
|
from PIL import Image
|
|
source_image = test_dir / "test_source.jpg"
|
|
img = Image.new('RGB', (1000, 1400), color='white')
|
|
img.save(source_image)
|
|
print(f"Created test source image: {source_image} (1000x1400)")
|
|
|
|
# Generate PDF
|
|
print("\nGenerating PDF with scaling...")
|
|
|
|
# Set up logging to see scale factors
|
|
import logging
|
|
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
|
|
|
success = pdf_generator_service.generate_layout_pdf(
|
|
json_path=json_path,
|
|
output_path=pdf_path,
|
|
source_file_path=source_image
|
|
)
|
|
|
|
if success:
|
|
print(f"✓ PDF generated successfully: {pdf_path}")
|
|
print(f" Expected scale factors: X={1000/500:.2f}, Y={1400/700:.2f}")
|
|
print(" Text should now be properly scaled and positioned!")
|
|
else:
|
|
print("✗ PDF generation failed")
|
|
|
|
return success
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
print("Testing PDF generation with proper scaling...")
|
|
print("=" * 60)
|
|
|
|
success = test_pdf_generation()
|
|
|
|
print("\n" + "=" * 60)
|
|
if success:
|
|
print("✓ Test completed successfully!")
|
|
print("Check test_output/test_output.pdf to verify scaling")
|
|
else:
|
|
print("✗ Test failed") |