diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py index 97d7697..0bb5220 100644 --- a/backend/app/services/pdf_generator_service.py +++ b/backend/app/services/pdf_generator_service.py @@ -1172,18 +1172,11 @@ class PDFGeneratorService: logger.info(f"[文字] '{text[:30]}' → PDF位置: ({pdf_x:.1f}, {pdf_y:.1f}), 字體:{font_size:.1f}pt, 寬x高:{bbox_width:.0f}x{bbox_height:.0f}") # Set font with track-specific styling + # Note: OCR track has no StyleInfo (extracted from images), so no advanced formatting style_info = region.get('style') is_direct_track = (self.current_processing_track == 'direct' or self.current_processing_track == ProcessingTrack.DIRECT) - # Extract alignment from style (if available) - alignment = 'left' # Default alignment - if style_info: - if hasattr(style_info, 'alignment') and style_info.alignment: - alignment = style_info.alignment - elif isinstance(style_info, dict) and 'alignment' in style_info: - alignment = style_info['alignment'] - if style_info and is_direct_track: # Direct track: Apply rich styling from StyleInfo self._apply_text_style(pdf_canvas, style_info, default_size=font_size) @@ -1197,10 +1190,11 @@ class PDFGeneratorService: pdf_canvas.setFont(font_name, font_size) # Handle line breaks (split text by newlines) + # OCR track: simple left-aligned rendering lines = text.split('\n') line_height = font_size * 1.2 # 120% of font size for line spacing - # Draw each line with alignment + # Draw each line (left-aligned for OCR track) for i, line in enumerate(lines): if not line.strip(): continue # Skip empty lines @@ -1217,37 +1211,9 @@ class PDFGeneratorService: current_font_size = font_size * scale_factor * 0.95 # 95% to add small margin current_font_size = max(current_font_size, 3) # Minimum 3pt pdf_canvas.setFont(font_name, current_font_size) - text_width = pdf_canvas.stringWidth(line, font_name, current_font_size) - # Calculate X position based on alignment - line_x = pdf_x - if alignment == 'center': - line_x = pdf_x + (bbox_width - text_width) / 2 - elif alignment == 'right': - line_x = pdf_x + bbox_width - text_width - elif alignment == 'justify' and i < len(lines) - 1: - # Justify: distribute extra space between words (except last line) - words = line.split() - if len(words) > 1: - total_word_width = sum(pdf_canvas.stringWidth(word, font_name, current_font_size) for word in words) - extra_space = bbox_width - total_word_width - word_spacing = extra_space / (len(words) - 1) - - # Draw words with calculated spacing - x_pos = pdf_x - for word in words: - pdf_canvas.drawString(x_pos, line_y, word) - word_width = pdf_canvas.stringWidth(word, font_name, current_font_size) - x_pos += word_width + word_spacing - - # Reset font and skip normal drawString - if text_width > bbox_width: - pdf_canvas.setFont(font_name, font_size) - continue - # else: left alignment uses line_x = pdf_x - - # Draw text at calculated position - pdf_canvas.drawString(line_x, line_y, line) + # Draw text at left-aligned position (OCR track uses simple left alignment) + pdf_canvas.drawString(pdf_x, line_y, line) # Reset font size for next line if text_width > bbox_width: diff --git a/openspec/changes/pdf-layout-restoration/tasks.md b/openspec/changes/pdf-layout-restoration/tasks.md index 7768a45..308c144 100644 --- a/openspec/changes/pdf-layout-restoration/tasks.md +++ b/openspec/changes/pdf-layout-restoration/tasks.md @@ -77,23 +77,25 @@ ## Phase 3: Advanced Layout (P2 - Week 2) ### 5. Enhanced Text Rendering -- [x] 5.1 Implement line-by-line rendering +- [x] 5.1 Implement line-by-line rendering (both tracks) - [x] 5.1.1 Split text content by newlines (text.split('\n')) - [x] 5.1.2 Calculate line height from font size (font_size * 1.2) - [x] 5.1.3 Render each line with proper spacing (line_y = pdf_y - i * line_height) - - [x] 5.1.4 Add OCR track support in draw_text_region (lines 1199-1254) -- [x] 5.2 Add paragraph handling + - [x] 5.1.4 Direct track: _draw_text_element_direct (lines 1549-1693) + - [x] 5.1.5 OCR track: draw_text_region (lines 1113-1270, simplified) +- [x] 5.2 Add paragraph handling (Direct track only) - [x] 5.2.1 Detect paragraph boundaries (via element.type PARAGRAPH) - - [x] 5.2.2 Apply paragraph spacing (spacing_before/spacing_after from metadata) - - [x] 5.2.3 Handle indentation (indent/first_line_indent from metadata) - - [x] 5.2.4 Record spacing_after with actual text height analysis (lines 1680-1689) - - [x] 5.2.5 Calculate bbox_bottom_margin to show implicit spacing -- [x] 5.3 Implement text alignment + - [x] 5.2.2 Apply spacing_before from metadata (line 1576, adjusts Y position) + - [x] 5.2.3 Handle indentation (indent/first_line_indent from metadata, lines 1564-1565) + - [x] 5.2.4 Record spacing_after for analysis (lines 1680-1689) + - [x] 5.2.5 Note: spacing_after is implicit in bbox-based layout (bbox_bottom_margin) + - [x] 5.2.6 OCR track: no paragraph handling (simple left-aligned rendering) +- [x] 5.3 Implement text alignment (Direct track only) - [x] 5.3.1 Support left/right/center/justify (from StyleInfo.alignment) - [x] 5.3.2 Calculate positioning based on alignment (line_x calculation) - [x] 5.3.3 Apply to each text block (per-line alignment in _draw_text_element_direct) - [x] 5.3.4 Justify alignment with word spacing distribution - - [x] 5.3.5 Add OCR track alignment support (extract from style, lines 1179-1247) + - [x] 5.3.5 OCR track: left-aligned only (no StyleInfo available) ### 6. List Formatting - [ ] 6.1 Detect list elements