Files
OCR/backend/app/services/pdf_generator_service.py
egg 00e0d1fd76 fix: ensure calculate_page_dimensions checks all bbox sources
Critical Fix for User-Reported Bug:
The function was only checking layout_data.elements but not the 'layout'
field or prioritizing 'text_regions', causing it to miss all bbox data
when layout=[] (empty list) even though text_regions contained valid data.

User's Scenario (ELER-8-100HFV Data Sheet):
- JSON structure: layout=[] (empty), text_regions=[...] (has data)
- Previous code only checked layout_data.elements
- Resulted in max_x=0, max_y=0
- Fell back to source file dimensions (595x842)
- Calculated scale=1.0 instead of ~0.3
- All text with X>595 rendered out of bounds

Root Cause Analysis:
1. Different OCR outputs use different field names
2. Some use 'layout', some use 'text_regions', some use 'layout_data.elements'
3. Previous code didn't check 'layout' field at all
4. Previous code checked layout_data.elements before text_regions
5. If both were empty/missing, fell back to source dims too early

Solution:
Check ALL possible bbox sources in order of priority:
1. text_regions - Most common, contains all text boxes
2. layout - Legacy field, may be empty list
3. layout_data.elements - PP-StructureV3 format

Only fall back to source file dimensions if ALL sources are empty.

Changes:
- backend/app/services/pdf_generator_service.py:
  - Rewrite calculate_page_dimensions to check all three fields
  - Use explicit extend() to combine all regions
  - Add type checks (isinstance) for safety
  - Update warning messages to be more specific

- backend/test_empty_layout.py:
  - Add test for layout=[] + text_regions=[...] scenario
  - Validates scale factors are correct (~0.3, not 1.0)

Test Results:
✓ OCR dimensions inferred from text_regions: 1850.0 x 2880.0
✓ Target PDF dimensions: 595.3 x 841.9
✓ Scale factors correct: X=0.322, Y=0.292 (NOT 1.0!)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-18 07:27:29 +08:00

719 lines
28 KiB
Python

"""
Layout-Preserving PDF Generation Service
Generates PDF files that preserve the original document layout using OCR JSON data
"""
import json
import logging
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from datetime import datetime
from reportlab.lib.pagesizes import A4, letter
from reportlab.lib.units import mm
from reportlab.pdfgen import canvas
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.platypus import Table, TableStyle
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
from reportlab.platypus import Paragraph
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from PIL import Image
from html.parser import HTMLParser
from app.core.config import settings
logger = logging.getLogger(__name__)
class HTMLTableParser(HTMLParser):
"""Parse HTML table to extract structure and data"""
def __init__(self):
super().__init__()
self.tables = []
self.current_table = None
self.current_row = None
self.current_cell = None
self.in_table = False
def handle_starttag(self, tag, attrs):
attrs_dict = dict(attrs)
if tag == 'table':
self.in_table = True
self.current_table = {'rows': []}
elif tag == 'tr' and self.in_table:
self.current_row = {'cells': []}
elif tag in ('td', 'th') and self.in_table and self.current_row is not None:
colspan = int(attrs_dict.get('colspan', 1))
rowspan = int(attrs_dict.get('rowspan', 1))
self.current_cell = {
'text': '',
'is_header': tag == 'th',
'colspan': colspan,
'rowspan': rowspan
}
def handle_endtag(self, tag):
if tag == 'table' and self.in_table:
if self.current_table and self.current_table['rows']:
self.tables.append(self.current_table)
self.current_table = None
self.in_table = False
elif tag == 'tr' and self.current_row is not None:
if self.current_table is not None:
self.current_table['rows'].append(self.current_row)
self.current_row = None
elif tag in ('td', 'th') and self.current_cell is not None:
if self.current_row is not None:
self.current_row['cells'].append(self.current_cell)
self.current_cell = None
def handle_data(self, data):
if self.current_cell is not None:
self.current_cell['text'] += data.strip() + ' '
class PDFGeneratorService:
"""Service for generating layout-preserving PDFs from OCR JSON data"""
def __init__(self):
"""Initialize PDF generator with font configuration"""
self.font_name = 'NotoSansSC'
self.font_path = None
self.font_registered = False
self._register_chinese_font()
def _register_chinese_font(self):
"""Register Chinese font for PDF generation"""
try:
# Get font path from settings
font_path = Path(settings.chinese_font_path)
# Try relative path from project root
if not font_path.is_absolute():
# Adjust path - settings.chinese_font_path starts with ./backend/
project_root = Path(__file__).resolve().parent.parent.parent.parent
font_path = project_root / font_path
if not font_path.exists():
logger.error(f"Chinese font not found at {font_path}")
return
# Register font
pdfmetrics.registerFont(TTFont(self.font_name, str(font_path)))
self.font_path = font_path
self.font_registered = True
logger.info(f"Chinese font registered: {self.font_name} from {font_path}")
except Exception as e:
logger.error(f"Failed to register Chinese font: {e}")
self.font_registered = False
def load_ocr_json(self, json_path: Path) -> Optional[Dict]:
"""
Load and parse OCR JSON result file
Args:
json_path: Path to JSON file
Returns:
Parsed JSON data or None if failed
"""
try:
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
logger.info(f"Loaded OCR JSON: {json_path.name}")
return data
except Exception as e:
logger.error(f"Failed to load JSON {json_path}: {e}")
return None
def calculate_page_dimensions(self, ocr_data: Dict, source_file_path: Optional[Path] = None) -> Tuple[float, float]:
"""
從 OCR JSON 數據中推斷 OCR 處理時的實際頁面尺寸。
這非常重要,因為 OCR 可能在高解析度影像上運行。
Args:
ocr_data: Complete OCR data dictionary with text_regions and layout
source_file_path: Optional path to source file (fallback only)
Returns:
Tuple of (width, height) in points
"""
max_x = 0
max_y = 0
# *** 關鍵修復:檢查所有可能包含 bbox 的字段 ***
# 不同版本的 OCR 輸出可能使用不同的字段名
all_regions = []
# 1. text_regions - 包含所有文字區域(最常見)
if 'text_regions' in ocr_data:
all_regions.extend(ocr_data['text_regions'])
# 2. layout - 可能包含布局信息
if 'layout' in ocr_data and isinstance(ocr_data['layout'], list):
all_regions.extend(ocr_data['layout'])
# 3. layout_data.elements - PP-StructureV3 格式
if 'layout_data' in ocr_data and isinstance(ocr_data['layout_data'], dict):
elements = ocr_data['layout_data'].get('elements', [])
if elements:
all_regions.extend(elements)
if not all_regions:
# 如果 JSON 為空,回退到原始檔案尺寸
logger.warning("JSON 中沒有找到任何包含 bbox 的區域,回退到原始檔案尺寸。")
if source_file_path:
dims = self.get_original_page_size(source_file_path)
if dims:
return dims
return A4
region_count = 0
for region in all_regions:
try:
bbox = region.get('bbox')
if not bbox:
continue
region_count += 1
# *** 關鍵修復:正確處理多邊形 [[x, y], ...] 格式 ***
if isinstance(bbox[0], (int, float)):
# 處理簡單的 [x1, y1, x2, y2] 格式
max_x = max(max_x, bbox[2])
max_y = max(max_y, bbox[3])
elif isinstance(bbox[0], (list, tuple)):
# 處理多邊形 [[x, y], ...] 格式
x_coords = [p[0] for p in bbox if isinstance(p, (list, tuple)) and len(p) >= 2]
y_coords = [p[1] for p in bbox if isinstance(p, (list, tuple)) and len(p) >= 2]
if x_coords and y_coords:
max_x = max(max_x, max(x_coords))
max_y = max(max_y, max(y_coords))
except Exception as e:
logger.warning(f"Error processing bbox {bbox}: {e}")
if max_x > 0 and max_y > 0:
logger.info(f"{region_count} 個區域中推斷出的 OCR 座標系尺寸: {max_x:.1f} x {max_y:.1f}")
return (max_x, max_y)
else:
# 如果所有 bbox 都解析失敗,才回退
logger.warning("無法從 bbox 推斷尺寸,回退到原始檔案尺寸。")
if source_file_path:
dims = self.get_original_page_size(source_file_path)
if dims:
return dims
return A4
def get_original_page_size(self, file_path: Path) -> Optional[Tuple[float, float]]:
"""
Extract page dimensions from original source file
Args:
file_path: Path to original file (image or PDF)
Returns:
Tuple of (width, height) in points or None
"""
try:
if not file_path.exists():
return None
# For images, get dimensions from PIL
if file_path.suffix.lower() in ['.png', '.jpg', '.jpeg', '.bmp', '.tiff']:
img = Image.open(file_path)
# Use pixel dimensions directly as points (1:1 mapping)
# This matches how PaddleOCR reports coordinates
width_pt = float(img.width)
height_pt = float(img.height)
logger.info(f"Extracted dimensions from image: {width_pt:.1f} x {height_pt:.1f} points (1:1 pixel mapping)")
return (width_pt, height_pt)
# For PDFs, extract dimensions using PyPDF2
if file_path.suffix.lower() == '.pdf':
try:
from PyPDF2 import PdfReader
reader = PdfReader(file_path)
if len(reader.pages) > 0:
page = reader.pages[0]
# MediaBox gives [x1, y1, x2, y2] in points
mediabox = page.mediabox
width_pt = float(mediabox.width)
height_pt = float(mediabox.height)
logger.info(f"Extracted dimensions from PDF: {width_pt:.1f} x {height_pt:.1f} points")
return (width_pt, height_pt)
except ImportError:
logger.warning("PyPDF2 not available, cannot extract PDF dimensions")
except Exception as e:
logger.warning(f"Failed to extract PDF dimensions: {e}")
except Exception as e:
logger.warning(f"Failed to get page size from {file_path}: {e}")
return None
def draw_text_region(
self,
pdf_canvas: canvas.Canvas,
region: Dict,
page_height: float,
scale_w: float = 1.0,
scale_h: float = 1.0
):
"""
Draw a text region at precise coordinates
Args:
pdf_canvas: ReportLab canvas object
region: Text region dict with text, bbox, confidence
page_height: Height of page (for coordinate transformation)
scale_w: Scale factor for X coordinates (PDF width / OCR width)
scale_h: Scale factor for Y coordinates (PDF height / OCR height)
"""
text = region.get('text', '')
bbox = region.get('bbox', [])
confidence = region.get('confidence', 1.0)
if not text or not bbox or len(bbox) < 4:
return
try:
# bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
# Points: top-left, top-right, bottom-right, bottom-left
# OCR coordinates: origin (0,0) at top-left, Y increases downward
ocr_x_left = bbox[0][0] # Left X
ocr_y_top = bbox[0][1] # Top Y in OCR coordinates
ocr_x_right = bbox[2][0] # Right X
ocr_y_bottom = bbox[2][1] # Bottom Y in OCR coordinates
# Apply scale factors to convert from OCR space to PDF space
ocr_x_left = ocr_x_left * scale_w
ocr_y_top = ocr_y_top * scale_h
ocr_x_right = ocr_x_right * scale_w
ocr_y_bottom = ocr_y_bottom * scale_h
# Calculate bbox dimensions (after scaling)
bbox_width = abs(ocr_x_right - ocr_x_left)
bbox_height = abs(ocr_y_bottom - ocr_y_top)
# Calculate font size using heuristics
# Font size is typically 70-90% of bbox height
# Testing shows 0.75 works well for most cases
font_size = bbox_height * 0.75
font_size = max(min(font_size, 72), 4) # Clamp between 4pt and 72pt
# Transform coordinates: OCR (top-left origin) → PDF (bottom-left origin)
# CRITICAL: Y-axis flip!
pdf_x = ocr_x_left
pdf_y = page_height - ocr_y_bottom # Flip Y-axis using bottom coordinate
# Set font
font_name = self.font_name if self.font_registered else 'Helvetica'
pdf_canvas.setFont(font_name, font_size)
# Calculate text width to prevent overflow
text_width = pdf_canvas.stringWidth(text, font_name, font_size)
# If text is too wide for bbox, scale down font
if text_width > bbox_width:
scale_factor = bbox_width / text_width
font_size = font_size * scale_factor * 0.95 # 95% to add small margin
font_size = max(font_size, 3) # Minimum 3pt
pdf_canvas.setFont(font_name, font_size)
# Draw text at calculated position
pdf_canvas.drawString(pdf_x, pdf_y, text)
# Debug: Draw bounding box (optional)
if settings.pdf_enable_bbox_debug:
pdf_canvas.setStrokeColorRGB(1, 0, 0, 0.3) # Red, semi-transparent
pdf_canvas.setLineWidth(0.5)
# Transform all bbox points to PDF coordinates (apply scaling first)
pdf_points = [(p[0] * scale_w, page_height - p[1] * scale_h) for p in bbox]
# Draw bbox rectangle
for i in range(4):
x1, y1 = pdf_points[i]
x2, y2 = pdf_points[(i + 1) % 4]
pdf_canvas.line(x1, y1, x2, y2)
except Exception as e:
logger.warning(f"Failed to draw text region '{text[:20]}...': {e}")
def draw_table_region(
self,
pdf_canvas: canvas.Canvas,
table_element: Dict,
images_metadata: List[Dict],
page_height: float,
scale_w: float = 1.0,
scale_h: float = 1.0
):
"""
Draw a table region by parsing HTML and rebuilding with ReportLab Table
Args:
pdf_canvas: ReportLab canvas object
table_element: Table element dict with HTML content
images_metadata: List of image metadata to find table bbox
page_height: Height of page
scale_w: Scale factor for X coordinates (PDF width / OCR width)
scale_h: Scale factor for Y coordinates (PDF height / OCR height)
"""
try:
html_content = table_element.get('content', '')
if not html_content:
return
# Parse HTML to extract table structure
parser = HTMLTableParser()
parser.feed(html_content)
if not parser.tables:
logger.warning("No tables found in HTML content")
return
# Get the first table (PP-StructureV3 usually provides one table per element)
table_data = parser.tables[0]
rows = table_data['rows']
if not rows:
return
# Find corresponding table image to get bbox
table_bbox = None
for img_meta in images_metadata:
img_path = img_meta.get('image_path', '')
if 'table' in img_path.lower():
bbox = img_meta.get('bbox', [])
if bbox and len(bbox) >= 4:
table_bbox = bbox
break
if not table_bbox:
logger.warning("No bbox found for table")
return
# Extract bbox coordinates and apply scaling
ocr_x_left = table_bbox[0][0] * scale_w
ocr_y_top = table_bbox[0][1] * scale_h
ocr_x_right = table_bbox[2][0] * scale_w
ocr_y_bottom = table_bbox[2][1] * scale_h
table_width = abs(ocr_x_right - ocr_x_left)
table_height = abs(ocr_y_bottom - ocr_y_top)
# Transform coordinates
pdf_x = ocr_x_left
pdf_y = page_height - ocr_y_bottom
# Build table data for ReportLab
# Convert parsed structure to simple 2D array
max_cols = max(len(row['cells']) for row in rows)
reportlab_data = []
for row in rows:
row_data = []
for cell in row['cells']:
text = cell['text'].strip()
row_data.append(text)
# Pad row if needed
while len(row_data) < max_cols:
row_data.append('')
reportlab_data.append(row_data)
# Calculate column widths (equal distribution)
col_widths = [table_width / max_cols] * max_cols
# Create ReportLab Table
# Use smaller font size to fit in bbox
font_size = min(table_height / len(rows) * 0.5, 10)
font_size = max(font_size, 6)
# Create table with font
table = Table(reportlab_data, colWidths=col_widths)
# Apply table style
style = TableStyle([
('FONT', (0, 0), (-1, -1), self.font_name if self.font_registered else 'Helvetica', font_size),
('GRID', (0, 0), (-1, -1), 0.5, colors.black),
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('LEFTPADDING', (0, 0), (-1, -1), 2),
('RIGHTPADDING', (0, 0), (-1, -1), 2),
('TOPPADDING', (0, 0), (-1, -1), 2),
('BOTTOMPADDING', (0, 0), (-1, -1), 2),
])
# Add header style if first row has headers
if rows and rows[0]['cells'] and rows[0]['cells'][0].get('is_header'):
style.add('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey)
style.add('FONT', (0, 0), (-1, 0), self.font_name if self.font_registered else 'Helvetica-Bold', font_size)
table.setStyle(style)
# Calculate table size
table.wrapOn(pdf_canvas, table_width, table_height)
# Draw table at position
table.drawOn(pdf_canvas, pdf_x, pdf_y)
logger.info(f"Drew table at ({pdf_x:.0f}, {pdf_y:.0f}) size {table_width:.0f}x{table_height:.0f} with {len(rows)} rows")
except Exception as e:
logger.warning(f"Failed to draw table region: {e}")
import traceback
traceback.print_exc()
def draw_image_region(
self,
pdf_canvas: canvas.Canvas,
region: Dict,
page_height: float,
result_dir: Path,
scale_w: float = 1.0,
scale_h: float = 1.0
):
"""
Draw an image region by embedding the extracted image
Handles images extracted by PP-StructureV3 (tables, figures, charts, etc.)
Args:
pdf_canvas: ReportLab canvas object
region: Image metadata dict with image_path and bbox
page_height: Height of page (for coordinate transformation)
result_dir: Directory containing result files
scale_w: Scale factor for X coordinates (PDF width / OCR width)
scale_h: Scale factor for Y coordinates (PDF height / OCR height)
"""
try:
image_path_str = region.get('image_path', '')
if not image_path_str:
return
# Construct full path to image
image_path = result_dir / image_path_str
if not image_path.exists():
logger.warning(f"Image not found: {image_path}")
return
# Get bbox for positioning
bbox = region.get('bbox', [])
if not bbox or len(bbox) < 4:
# If no bbox, skip for now
logger.warning(f"No bbox for image {image_path_str}")
return
# bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
# OCR coordinates: origin (0,0) at top-left, Y increases downward
ocr_x_left = bbox[0][0] * scale_w
ocr_y_top = bbox[0][1] * scale_h
ocr_x_right = bbox[2][0] * scale_w
ocr_y_bottom = bbox[2][1] * scale_h
# Calculate bbox dimensions (after scaling)
bbox_width = abs(ocr_x_right - ocr_x_left)
bbox_height = abs(ocr_y_bottom - ocr_y_top)
# Transform coordinates: OCR (top-left origin) → PDF (bottom-left origin)
# CRITICAL: Y-axis flip!
# For images, we position at bottom-left corner
pdf_x_left = ocr_x_left
pdf_y_bottom = page_height - ocr_y_bottom # Flip Y-axis
# Draw image using ReportLab
# drawImage expects: (path, x, y, width, height)
# where (x, y) is the bottom-left corner of the image
pdf_canvas.drawImage(
str(image_path),
pdf_x_left,
pdf_y_bottom,
width=bbox_width,
height=bbox_height,
preserveAspectRatio=True,
mask='auto' # Handle transparency
)
logger.info(f"Drew image: {image_path_str} at ({pdf_x_left:.0f}, {pdf_y_bottom:.0f}) size {bbox_width:.0f}x{bbox_height:.0f}")
except Exception as e:
logger.warning(f"Failed to draw image region: {e}")
def generate_layout_pdf(
self,
json_path: Path,
output_path: Path,
source_file_path: Optional[Path] = None
) -> bool:
"""
Generate layout-preserving PDF from OCR JSON data
Args:
json_path: Path to OCR JSON file
output_path: Path to save generated PDF
source_file_path: Optional path to original source file for dimension extraction
Returns:
True if successful, False otherwise
"""
try:
# Check if PDF already exists (caching)
if output_path.exists():
logger.info(f"PDF already exists: {output_path.name}")
return True
# Load JSON data
ocr_data = self.load_ocr_json(json_path)
if not ocr_data:
return False
# Get text regions
text_regions = ocr_data.get('text_regions', [])
if not text_regions:
logger.warning("No text regions found in JSON")
return False
# Get images metadata
images_metadata = ocr_data.get('images_metadata', [])
# Get layout data
layout_data = ocr_data.get('layout_data', {})
# Step 1: Get OCR processing dimensions (the large image OCR actually used)
# This comes from analyzing all bbox coordinates in the OCR data
ocr_width, ocr_height = self.calculate_page_dimensions(ocr_data, source_file_path=None)
logger.info(f"OCR 處理時使用的座標系尺寸: {ocr_width:.1f} x {ocr_height:.1f}")
# Step 2: Get target PDF dimensions (usually the original file size)
# This is what we want the final PDF size to be
if source_file_path:
target_dims = self.get_original_page_size(source_file_path)
if target_dims:
target_width, target_height = target_dims
logger.info(f"目標 PDF 尺寸(來自原始文件): {target_width:.1f} x {target_height:.1f}")
else:
# If we can't get original size, use OCR dimensions as target
target_width, target_height = ocr_width, ocr_height
logger.warning(f"無法獲取原始文件尺寸,使用 OCR 尺寸作為目標: {target_width:.1f} x {target_height:.1f}")
else:
# No source file, use OCR dimensions as target (1:1 mapping)
target_width, target_height = ocr_width, ocr_height
logger.info(f"無原始文件,使用 OCR 尺寸作為目標: {target_width:.1f} x {target_height:.1f}")
# Step 3: Calculate scale factors to convert OCR coordinates to PDF coordinates
scale_w = target_width / ocr_width
scale_h = target_height / ocr_height
logger.info(f"縮放因子: X={scale_w:.3f}, Y={scale_h:.3f} (OCR座標 → PDF座標)")
# Create PDF canvas with target dimensions
pdf_canvas = canvas.Canvas(str(output_path), pagesize=(target_width, target_height))
# Extract table bboxes to exclude text in those regions
table_bboxes = []
for img_meta in images_metadata:
img_path = img_meta.get('image_path', '')
if 'table' in img_path.lower():
bbox = img_meta.get('bbox', [])
if bbox and len(bbox) >= 4:
table_bboxes.append(bbox)
# Helper function to check if a point is inside a bbox
def point_in_bbox(x, y, bbox):
x1, y1 = bbox[0]
x2, y2 = bbox[2]
return min(x1, x2) <= x <= max(x1, x2) and min(y1, y2) <= y <= max(y1, y2)
# Filter text regions to exclude those inside tables
filtered_text_regions = []
for region in text_regions:
bbox = region.get('bbox', [])
if not bbox or len(bbox) < 4:
continue
# Check if text region center is inside any table bbox
center_x = (bbox[0][0] + bbox[2][0]) / 2
center_y = (bbox[0][1] + bbox[2][1]) / 2
is_in_table = any(point_in_bbox(center_x, center_y, table_bbox) for table_bbox in table_bboxes)
if not is_in_table:
filtered_text_regions.append(region)
else:
logger.debug(f"Excluded text '{region.get('text', '')[:20]}...' (inside table)")
logger.info(f"Filtered {len(text_regions) - len(filtered_text_regions)} text regions inside tables")
# Group regions by page
pages_data = {}
for region in filtered_text_regions:
page_num = region.get('page', 1)
if page_num not in pages_data:
pages_data[page_num] = []
pages_data[page_num].append(region)
# Get table elements from layout_data
table_elements = []
if layout_data and layout_data.get('elements'):
table_elements = [e for e in layout_data['elements'] if e.get('type') == 'table']
# Process each page
total_pages = ocr_data.get('total_pages', 1)
for page_num in range(1, total_pages + 1):
if page_num > 1:
pdf_canvas.showPage() # Start new page
# Draw text regions for this page (excluding table text)
page_regions = pages_data.get(page_num, [])
for region in page_regions:
self.draw_text_region(pdf_canvas, region, target_height, scale_w, scale_h)
# Draw tables for this page
for table_elem in table_elements:
if table_elem.get('page', 0) == page_num - 1: # page is 0-indexed
self.draw_table_region(pdf_canvas, table_elem, images_metadata, target_height, scale_w, scale_h)
# Draw non-table images for this page (figure, chart, seal, etc.)
for img_meta in images_metadata:
if img_meta.get('page') == page_num - 1: # page is 0-indexed
img_path = img_meta.get('image_path', '')
# Skip table images (they're now rendered as tables)
if 'table' not in img_path.lower():
self.draw_image_region(
pdf_canvas,
img_meta,
target_height,
json_path.parent,
scale_w,
scale_h
)
# Save PDF
pdf_canvas.save()
file_size = output_path.stat().st_size
logger.info(f"Generated layout-preserving PDF: {output_path.name} ({file_size} bytes)")
return True
except Exception as e:
logger.error(f"Failed to generate PDF: {e}")
import traceback
traceback.print_exc()
return False
# Singleton instance
pdf_generator_service = PDFGeneratorService()