feat: add OCR track alignment support and spacing_after analysis
Complete text alignment parity between OCR and Direct tracks: **OCR Track Alignment Support (Task 5.3.5)** - Extract alignment from region style (StyleInfo or dict) - Support left/right/center/justify alignment in draw_text_region - Calculate line_x position based on alignment setting: - Left: line_x = pdf_x (default) - Center: line_x = pdf_x + (bbox_width - text_width) / 2 - Right: line_x = pdf_x + bbox_width - text_width - Justify: word spacing distribution (except last line) - Lines 1179-1247 in pdf_generator_service.py - OCR track now has feature parity with Direct track for alignment **Enhanced spacing_after Handling (Task 5.2.4-5.2.5)** - Calculate actual text height: len(lines) * line_height - Compute bbox_bottom_margin to show implicit spacing - Add detailed logging with actual_height and bbox_bottom_margin - Document that spacing_after is inherent in bbox-based layout - If text is shorter than bbox, remaining space acts as spacing - Lines 1680-1689 in pdf_generator_service.py **Technical Details** - Both tracks now support identical alignment modes - spacing_after is implicitly present in element positioning - bbox_bottom_margin = bbox_height - actual_text_height - spacing_before - This shows how much space remains below the text (implicit spacing_after) **Modified Files** - backend/app/services/pdf_generator_service.py - Lines 1179-1185: Alignment extraction for OCR track - Lines 1222-1247: OCR track alignment calculation and rendering - Lines 1680-1689: spacing_after analysis with bbox_bottom_margin - openspec/changes/pdf-layout-restoration/tasks.md - Added 5.2.5: bbox_bottom_margin calculation - Added 5.3.5: OCR track alignment support 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1176,6 +1176,14 @@ class PDFGeneratorService:
|
||||
is_direct_track = (self.current_processing_track == 'direct' or
|
||||
self.current_processing_track == ProcessingTrack.DIRECT)
|
||||
|
||||
# Extract alignment from style (if available)
|
||||
alignment = 'left' # Default alignment
|
||||
if style_info:
|
||||
if hasattr(style_info, 'alignment') and style_info.alignment:
|
||||
alignment = style_info.alignment
|
||||
elif isinstance(style_info, dict) and 'alignment' in style_info:
|
||||
alignment = style_info['alignment']
|
||||
|
||||
if style_info and is_direct_track:
|
||||
# Direct track: Apply rich styling from StyleInfo
|
||||
self._apply_text_style(pdf_canvas, style_info, default_size=font_size)
|
||||
@@ -1192,7 +1200,7 @@ class PDFGeneratorService:
|
||||
lines = text.split('\n')
|
||||
line_height = font_size * 1.2 # 120% of font size for line spacing
|
||||
|
||||
# Draw each line
|
||||
# Draw each line with alignment
|
||||
for i, line in enumerate(lines):
|
||||
if not line.strip():
|
||||
continue # Skip empty lines
|
||||
@@ -1209,9 +1217,37 @@ class PDFGeneratorService:
|
||||
current_font_size = font_size * scale_factor * 0.95 # 95% to add small margin
|
||||
current_font_size = max(current_font_size, 3) # Minimum 3pt
|
||||
pdf_canvas.setFont(font_name, current_font_size)
|
||||
text_width = pdf_canvas.stringWidth(line, font_name, current_font_size)
|
||||
|
||||
# Calculate X position based on alignment
|
||||
line_x = pdf_x
|
||||
if alignment == 'center':
|
||||
line_x = pdf_x + (bbox_width - text_width) / 2
|
||||
elif alignment == 'right':
|
||||
line_x = pdf_x + bbox_width - text_width
|
||||
elif alignment == 'justify' and i < len(lines) - 1:
|
||||
# Justify: distribute extra space between words (except last line)
|
||||
words = line.split()
|
||||
if len(words) > 1:
|
||||
total_word_width = sum(pdf_canvas.stringWidth(word, font_name, current_font_size) for word in words)
|
||||
extra_space = bbox_width - total_word_width
|
||||
word_spacing = extra_space / (len(words) - 1)
|
||||
|
||||
# Draw words with calculated spacing
|
||||
x_pos = pdf_x
|
||||
for word in words:
|
||||
pdf_canvas.drawString(x_pos, line_y, word)
|
||||
word_width = pdf_canvas.stringWidth(word, font_name, current_font_size)
|
||||
x_pos += word_width + word_spacing
|
||||
|
||||
# Reset font and skip normal drawString
|
||||
if text_width > bbox_width:
|
||||
pdf_canvas.setFont(font_name, font_size)
|
||||
continue
|
||||
# else: left alignment uses line_x = pdf_x
|
||||
|
||||
# Draw text at calculated position
|
||||
pdf_canvas.drawString(pdf_x, line_y, line)
|
||||
pdf_canvas.drawString(line_x, line_y, line)
|
||||
|
||||
# Reset font size for next line
|
||||
if text_width > bbox_width:
|
||||
@@ -1641,9 +1677,16 @@ class PDFGeneratorService:
|
||||
if text_width > available_width:
|
||||
pdf_canvas.setFont(font_name, font_size)
|
||||
|
||||
# Calculate actual text height used
|
||||
actual_text_height = len(lines) * line_height
|
||||
bbox_bottom_margin = bbox_height - actual_text_height - paragraph_spacing_before
|
||||
|
||||
# Note: spacing_after is inherent in element positioning (bbox-based layout)
|
||||
# If text is shorter than bbox, the remaining space acts as implicit spacing
|
||||
logger.debug(f"Drew text element: {text_content[:30]}... "
|
||||
f"({len(lines)} lines, align={alignment}, indent={indent}, "
|
||||
f"spacing_before={paragraph_spacing_before}, spacing_after={paragraph_spacing_after})")
|
||||
f"spacing_before={paragraph_spacing_before}, spacing_after={paragraph_spacing_after}, "
|
||||
f"actual_height={actual_text_height:.1f}, bbox_bottom_margin={bbox_bottom_margin:.1f})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to draw text element {element.element_id}: {e}")
|
||||
|
||||
Reference in New Issue
Block a user