feat: refactor dual-track architecture (Phase 1-5)

## Backend Changes - **Service Layer Refactoring**: - Add ProcessingOrchestrator for unified document processing - Add PDFTableRenderer for table rendering extraction - Add PDFFontManager for font management with CJK support - Add MemoryPolicyEngine (73% code reduction from MemoryGuard) - **Bug Fixes**: - Fix Direct Track table row span calculation - Fix OCR Track image path handling - Add cell_boxes coordinate validation - Filter out small decorative images - Add covering image detection ## Frontend Changes - **State Management**: - Add TaskStore for centralized task state management - Add localStorage persistence for recent tasks - Add processing state tracking - **Type Consolidation**: - Merge shared types from api.ts to apiV2.ts - Update imports in authStore, uploadStore, ResultsTable, SettingsPage - **Page Integration**: - Integrate TaskStore in ProcessingPage and TaskDetailPage - Update useTaskValidation hook with cache sync ## Testing - Direct Track: edit.pdf (3 pages, 1.281s), edit3.pdf (2 pages, 0.203s) - Cell boxes validation: 43 valid, 0 invalid - Table merging: 12 merged cells verified 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-07 07:18:27 +08:00
parent 8265be1741
commit eff9b0bcd5
19 changed files with 3637 additions and 173 deletions
--- a/backend/app/services/pdf_table_renderer.py
+++ b/backend/app/services/pdf_table_renderer.py
@@ -0,0 +1,917 @@
+"""
+PDF Table Renderer - Handles table rendering for PDF generation.
+
+This module provides unified table rendering capabilities extracted from
+PDFGeneratorService, supporting multiple input formats:
+- HTML tables
+- Cell boxes (layered approach)
+- Cells dictionary (Direct track)
+- TableData objects
+"""
+
+import logging
+from dataclasses import dataclass, field
+from html.parser import HTMLParser
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+from reportlab.lib import colors
+from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
+from reportlab.lib.styles import ParagraphStyle
+from reportlab.lib.utils import ImageReader
+from reportlab.platypus import Paragraph, Table, TableStyle
+
+logger = logging.getLogger(__name__)
+
+
+# ============================================================================
+# Configuration
+# ============================================================================
+
+@dataclass
+class TableRenderConfig:
+    """Configuration for table rendering."""
+    font_name: str = "Helvetica"
+    font_size: int = 8
+    min_font_size: int = 6
+    max_font_size: int = 10
+
+    # Padding options
+    left_padding: int = 2
+    right_padding: int = 2
+    top_padding: int = 2
+    bottom_padding: int = 2
+
+    # Border options
+    border_color: Any = colors.black
+    border_width: float = 0.5
+
+    # Alignment
+    horizontal_align: str = "CENTER"
+    vertical_align: str = "MIDDLE"
+
+    # Header styling
+    header_background: Any = colors.lightgrey
+
+    # Grid normalization threshold
+    grid_threshold: float = 10.0
+
+    # Merged cells threshold
+    merge_boundary_threshold: float = 5.0
+
+
+# ============================================================================
+# HTML Table Parser
+# ============================================================================
+
+class HTMLTableParser(HTMLParser):
+    """
+    Parse HTML table structure for rendering.
+
+    Extracts table rows, cells, and merged cell information (colspan/rowspan)
+    from HTML table markup.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.tables = []
+        self.current_table = None
+        self.current_row = None
+        self.current_cell = None
+        self.in_cell = False
+
+    def handle_starttag(self, tag: str, attrs: List[Tuple[str, str]]):
+        if tag == 'table':
+            self.current_table = {'rows': []}
+        elif tag == 'tr':
+            self.current_row = {'cells': []}
+        elif tag in ('td', 'th'):
+            # Extract colspan and rowspan attributes
+            attrs_dict = dict(attrs)
+            colspan = int(attrs_dict.get('colspan', 1))
+            rowspan = int(attrs_dict.get('rowspan', 1))
+            self.current_cell = {
+                'text': '',
+                'is_header': tag == 'th',
+                'colspan': colspan,
+                'rowspan': rowspan
+            }
+            self.in_cell = True
+
+    def handle_endtag(self, tag: str):
+        if tag == 'table' and self.current_table:
+            self.tables.append(self.current_table)
+            self.current_table = None
+        elif tag == 'tr' and self.current_row:
+            if self.current_table:
+                self.current_table['rows'].append(self.current_row)
+            self.current_row = None
+        elif tag in ('td', 'th') and self.current_cell:
+            if self.current_row:
+                self.current_row['cells'].append(self.current_cell)
+            self.current_cell = None
+            self.in_cell = False
+
+    def handle_data(self, data: str):
+        if self.in_cell and self.current_cell is not None:
+            self.current_cell['text'] += data
+
+
+# ============================================================================
+# Table Renderer
+# ============================================================================
+
+class TableRenderer:
+    """
+    Unified table rendering engine for PDF generation.
+
+    Supports multiple input formats and rendering modes:
+    - HTML table parsing and rendering
+    - Cell boxes rendering (layered approach)
+    - Direct track cells dictionary
+    - Translated content with dynamic font sizing
+    """
+
+    def __init__(self, config: Optional[TableRenderConfig] = None):
+        """
+        Initialize TableRenderer with configuration.
+
+        Args:
+            config: TableRenderConfig instance (uses defaults if None)
+        """
+        self.config = config or TableRenderConfig()
+
+    def render_from_html(
+        self,
+        pdf_canvas,
+        html_content: str,
+        table_bbox: Tuple[float, float, float, float],
+        page_height: float,
+        scale_w: float = 1.0,
+        scale_h: float = 1.0
+    ) -> bool:
+        """
+        Parse HTML and render table to PDF canvas.
+
+        Args:
+            pdf_canvas: ReportLab canvas
+            html_content: HTML table string
+            table_bbox: (x0, y0, x1, y1) bounding box
+            page_height: PDF page height for Y coordinate flip
+            scale_w: Horizontal scale factor
+            scale_h: Vertical scale factor
+
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            # Parse HTML
+            parser = HTMLTableParser()
+            parser.feed(html_content)
+
+            if not parser.tables:
+                logger.warning("No tables found in HTML content")
+                return False
+
+            table_data = parser.tables[0]
+            return self._render_parsed_table(
+                pdf_canvas, table_data, table_bbox, page_height, scale_w, scale_h
+            )
+
+        except Exception as e:
+            logger.error(f"HTML table rendering failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+
+    def render_from_cells_dict(
+        self,
+        pdf_canvas,
+        cells_dict: Dict,
+        table_bbox: Tuple[float, float, float, float],
+        page_height: float,
+        cell_boxes: Optional[List] = None
+    ) -> bool:
+        """
+        Render table from Direct track cell structure.
+
+        Args:
+            pdf_canvas: ReportLab canvas
+            cells_dict: Dict with 'rows', 'cols', 'cells' keys
+            table_bbox: (x0, y0, x1, y1) bounding box
+            page_height: PDF page height
+            cell_boxes: Optional precomputed cell boxes
+
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            # Convert cells dict to row format
+            rows = self._build_rows_from_cells_dict(cells_dict)
+
+            if not rows:
+                logger.warning("No rows built from cells dict")
+                return False
+
+            # Build table data structure
+            table_data = {'rows': rows}
+
+            # Calculate dimensions
+            x0, y0, x1, y1 = table_bbox
+            table_width = (x1 - x0)
+            table_height = (y1 - y0)
+
+            # Determine grid dimensions
+            num_rows = cells_dict.get('rows', len(rows))
+            num_cols = cells_dict.get('cols',
+                max(len(row['cells']) for row in rows) if rows else 1
+            )
+
+            # Calculate column widths and row heights
+            if cell_boxes:
+                col_widths, row_heights = self.compute_grid_from_cell_boxes(
+                    cell_boxes, table_bbox, num_rows, num_cols
+                )
+            else:
+                col_widths = [table_width / num_cols] * num_cols
+                row_heights = [table_height / num_rows] * num_rows
+
+            return self._render_with_dimensions(
+                pdf_canvas, table_data, table_bbox, page_height,
+                col_widths, row_heights
+            )
+
+        except Exception as e:
+            logger.error(f"Cells dict rendering failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+
+    def render_cell_borders(
+        self,
+        pdf_canvas,
+        cell_boxes: List[List[float]],
+        table_bbox: Tuple[float, float, float, float],
+        page_height: float,
+        embedded_images: Optional[List] = None,
+        output_dir: Optional[Path] = None
+    ) -> bool:
+        """
+        Render table cell borders only (layered approach).
+
+        This renders only the cell borders, not the text content.
+        Text is typically rendered separately by GapFillingService.
+
+        Args:
+            pdf_canvas: ReportLab canvas
+            cell_boxes: List of [x0, y0, x1, y1] for each cell
+            table_bbox: Table bounding box
+            page_height: PDF page height
+            embedded_images: Optional list of images within cells
+            output_dir: Directory for image files
+
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            if not cell_boxes:
+                # Draw outer border only
+                return self._draw_table_border(
+                    pdf_canvas, table_bbox, page_height
+                )
+
+            # Normalize cell boxes to grid
+            normalized_boxes = self.normalize_cell_boxes_to_grid(cell_boxes)
+
+            # Draw each cell border
+            pdf_canvas.saveState()
+            pdf_canvas.setStrokeColor(self.config.border_color)
+            pdf_canvas.setLineWidth(self.config.border_width)
+
+            for box in normalized_boxes:
+                if box is None:
+                    continue
+
+                x0, y0, x1, y1 = box
+                # Convert to PDF coordinates (flip Y)
+                pdf_x0 = x0
+                pdf_y0 = page_height - y1
+                pdf_x1 = x1
+                pdf_y1 = page_height - y0
+
+                # Draw cell rectangle
+                pdf_canvas.rect(pdf_x0, pdf_y0, pdf_x1 - pdf_x0, pdf_y1 - pdf_y0)
+
+            pdf_canvas.restoreState()
+
+            # Draw embedded images if any
+            if embedded_images and output_dir:
+                for img_info in embedded_images:
+                    self._draw_embedded_image(
+                        pdf_canvas, img_info, page_height, output_dir
+                    )
+
+            return True
+
+        except Exception as e:
+            logger.error(f"Cell borders rendering failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+
+    def render_with_translated_text(
+        self,
+        pdf_canvas,
+        cells: List[Dict],
+        cell_boxes: List,
+        table_bbox: Tuple[float, float, float, float],
+        page_height: float
+    ) -> bool:
+        """
+        Render table with translated content and dynamic font sizing.
+
+        Args:
+            pdf_canvas: ReportLab canvas
+            cells: List of cell dicts with 'translated_content'
+            cell_boxes: List of cell bounding boxes
+            table_bbox: Table bounding box
+            page_height: PDF page height
+
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            # Draw outer border
+            self._draw_table_border(pdf_canvas, table_bbox, page_height)
+
+            # Normalize cell boxes
+            if cell_boxes:
+                normalized_boxes = self.normalize_cell_boxes_to_grid(cell_boxes)
+            else:
+                logger.warning("No cell boxes for translated table")
+                return False
+
+            pdf_canvas.saveState()
+            pdf_canvas.setStrokeColor(self.config.border_color)
+            pdf_canvas.setLineWidth(self.config.border_width)
+
+            # Draw cell borders
+            for box in normalized_boxes:
+                if box is None:
+                    continue
+                x0, y0, x1, y1 = box
+                pdf_y0 = page_height - y1
+                pdf_canvas.rect(x0, pdf_y0, x1 - x0, y1 - y0)
+
+            pdf_canvas.restoreState()
+
+            # Render text in cells with dynamic font sizing
+            for i, cell in enumerate(cells):
+                if i >= len(normalized_boxes):
+                    break
+
+                box = normalized_boxes[i]
+                if box is None:
+                    continue
+
+                translated_text = cell.get('translated_content', '')
+                if not translated_text:
+                    continue
+
+                x0, y0, x1, y1 = box
+                cell_width = x1 - x0
+                cell_height = y1 - y0
+
+                # Find appropriate font size
+                font_size = self._fit_text_to_cell(
+                    pdf_canvas, translated_text, cell_width, cell_height
+                )
+
+                # Render centered text
+                pdf_canvas.setFont(self.config.font_name, font_size)
+
+                # Calculate text position (centered)
+                text_width = pdf_canvas.stringWidth(translated_text, self.config.font_name, font_size)
+                text_x = x0 + (cell_width - text_width) / 2
+                text_y = page_height - y0 - cell_height / 2 - font_size / 3
+
+                pdf_canvas.drawString(text_x, text_y, translated_text)
+
+            return True
+
+        except Exception as e:
+            logger.error(f"Translated table rendering failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return False
+
+    # =========================================================================
+    # Grid and Cell Box Helpers
+    # =========================================================================
+
+    def compute_grid_from_cell_boxes(
+        self,
+        cell_boxes: List,
+        table_bbox: Tuple[float, float, float, float],
+        num_rows: int,
+        num_cols: int
+    ) -> Tuple[Optional[List[float]], Optional[List[float]]]:
+        """
+        Calculate column widths and row heights from cell bounding boxes.
+
+        Args:
+            cell_boxes: List of [x0, y0, x1, y1] for each cell
+            table_bbox: Table bounding box
+            num_rows: Expected number of rows
+            num_cols: Expected number of columns
+
+        Returns:
+            Tuple of (col_widths, row_heights) or (None, None) on failure
+        """
+        try:
+            if not cell_boxes:
+                return None, None
+
+            # Filter valid boxes
+            valid_boxes = [b for b in cell_boxes if b is not None and len(b) >= 4]
+            if not valid_boxes:
+                return None, None
+
+            # Extract unique X and Y boundaries
+            x_boundaries = set()
+            y_boundaries = set()
+
+            for box in valid_boxes:
+                x0, y0, x1, y1 = box[:4]
+                x_boundaries.add(round(x0, 1))
+                x_boundaries.add(round(x1, 1))
+                y_boundaries.add(round(y0, 1))
+                y_boundaries.add(round(y1, 1))
+
+            # Sort boundaries
+            x_sorted = sorted(x_boundaries)
+            y_sorted = sorted(y_boundaries)
+
+            # Merge nearby boundaries
+            x_merged = self._merge_boundaries(x_sorted, self.config.merge_boundary_threshold)
+            y_merged = self._merge_boundaries(y_sorted, self.config.merge_boundary_threshold)
+
+            # Calculate widths and heights
+            col_widths = []
+            for i in range(len(x_merged) - 1):
+                col_widths.append(x_merged[i + 1] - x_merged[i])
+
+            row_heights = []
+            for i in range(len(y_merged) - 1):
+                row_heights.append(y_merged[i + 1] - y_merged[i])
+
+            # Validate against expected dimensions (allow for merged cells)
+            tolerance = max(num_cols, num_rows) // 2 + 1
+            if abs(len(col_widths) - num_cols) > tolerance:
+                logger.debug(f"Column count mismatch: {len(col_widths)} vs {num_cols}")
+            if abs(len(row_heights) - num_rows) > tolerance:
+                logger.debug(f"Row count mismatch: {len(row_heights)} vs {num_rows}")
+
+            return col_widths if col_widths else None, row_heights if row_heights else None
+
+        except Exception as e:
+            logger.error(f"Grid computation failed: {e}")
+            return None, None
+
+    def normalize_cell_boxes_to_grid(
+        self,
+        cell_boxes: List,
+        threshold: Optional[float] = None
+    ) -> List:
+        """
+        Snap cell boxes to aligned grid to eliminate coordinate variations.
+
+        Args:
+            cell_boxes: List of [x0, y0, x1, y1] for each cell
+            threshold: Clustering threshold (uses config default if None)
+
+        Returns:
+            Normalized cell boxes
+        """
+        threshold = threshold or self.config.grid_threshold
+
+        if not cell_boxes:
+            return []
+
+        try:
+            # Collect all coordinates
+            all_x = []
+            all_y = []
+
+            for box in cell_boxes:
+                if box is None or len(box) < 4:
+                    continue
+                x0, y0, x1, y1 = box[:4]
+                all_x.extend([x0, x1])
+                all_y.extend([y0, y1])
+
+            if not all_x or not all_y:
+                return cell_boxes
+
+            # Cluster and normalize X coordinates
+            x_clusters = self._cluster_values(sorted(all_x), threshold)
+            y_clusters = self._cluster_values(sorted(all_y), threshold)
+
+            # Build mapping
+            x_map = {v: avg for avg, values in x_clusters for v in values}
+            y_map = {v: avg for avg, values in y_clusters for v in values}
+
+            # Normalize boxes
+            normalized = []
+            for box in cell_boxes:
+                if box is None or len(box) < 4:
+                    normalized.append(box)
+                    continue
+
+                x0, y0, x1, y1 = box[:4]
+                normalized.append([
+                    x_map.get(x0, x0),
+                    y_map.get(y0, y0),
+                    x_map.get(x1, x1),
+                    y_map.get(y1, y1)
+                ])
+
+            return normalized
+
+        except Exception as e:
+            logger.error(f"Cell box normalization failed: {e}")
+            return cell_boxes
+
+    # =========================================================================
+    # Private Helper Methods
+    # =========================================================================
+
+    def _render_parsed_table(
+        self,
+        pdf_canvas,
+        table_data: Dict,
+        table_bbox: Tuple[float, float, float, float],
+        page_height: float,
+        scale_w: float = 1.0,
+        scale_h: float = 1.0
+    ) -> bool:
+        """Render a parsed table structure."""
+        rows = table_data.get('rows', [])
+        if not rows:
+            return False
+
+        # Build grid content
+        num_rows = len(rows)
+        num_cols = max(len(row.get('cells', [])) for row in rows)
+
+        # Track occupied cells for rowspan handling
+        occupied = [[False] * num_cols for _ in range(num_rows)]
+
+        grid = []
+        span_commands = []
+
+        for row_idx, row in enumerate(rows):
+            grid_row = [''] * num_cols
+            col_idx = 0
+
+            for cell in row.get('cells', []):
+                # Skip occupied cells
+                while col_idx < num_cols and occupied[row_idx][col_idx]:
+                    col_idx += 1
+
+                if col_idx >= num_cols:
+                    break
+
+                text = cell.get('text', '').strip()
+                colspan = cell.get('colspan', 1)
+                rowspan = cell.get('rowspan', 1)
+
+                # Place cell content
+                grid_row[col_idx] = text
+
+                # Mark occupied cells and build SPAN command
+                if colspan > 1 or rowspan > 1:
+                    end_col = min(col_idx + colspan - 1, num_cols - 1)
+                    end_row = min(row_idx + rowspan - 1, num_rows - 1)
+                    span_commands.append(
+                        ('SPAN', (col_idx, row_idx), (end_col, end_row))
+                    )
+
+                    for r in range(row_idx, end_row + 1):
+                        for c in range(col_idx, end_col + 1):
+                            if r < num_rows and c < num_cols:
+                                occupied[r][c] = True
+                else:
+                    occupied[row_idx][col_idx] = True
+
+                col_idx += colspan
+
+            grid.append(grid_row)
+
+        # Calculate dimensions
+        x0, y0, x1, y1 = table_bbox
+        table_width = (x1 - x0) * scale_w
+        table_height = (y1 - y0) * scale_h
+
+        col_widths = [table_width / num_cols] * num_cols
+        row_heights = [table_height / num_rows] * num_rows
+
+        # Create paragraph style
+        style = ParagraphStyle(
+            'TableCell',
+            fontName=self.config.font_name,
+            fontSize=self.config.font_size,
+            alignment=TA_CENTER,
+            leading=self.config.font_size * 1.2
+        )
+
+        # Convert to Paragraph objects
+        para_grid = []
+        for row in grid:
+            para_row = []
+            for cell in row:
+                if cell:
+                    para_row.append(Paragraph(cell, style))
+                else:
+                    para_row.append('')
+            para_grid.append(para_row)
+
+        # Build TableStyle
+        table_style_commands = [
+            ('GRID', (0, 0), (-1, -1), self.config.border_width, self.config.border_color),
+            ('VALIGN', (0, 0), (-1, -1), self.config.vertical_align),
+            ('ALIGN', (0, 0), (-1, -1), self.config.horizontal_align),
+            ('LEFTPADDING', (0, 0), (-1, -1), self.config.left_padding),
+            ('RIGHTPADDING', (0, 0), (-1, -1), self.config.right_padding),
+            ('TOPPADDING', (0, 0), (-1, -1), self.config.top_padding),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), self.config.bottom_padding),
+            ('FONTNAME', (0, 0), (-1, -1), self.config.font_name),
+            ('FONTSIZE', (0, 0), (-1, -1), self.config.font_size),
+        ]
+        table_style_commands.extend(span_commands)
+
+        # Create and draw table
+        table = Table(para_grid, colWidths=col_widths, rowHeights=row_heights)
+        table.setStyle(TableStyle(table_style_commands))
+
+        # Position and draw
+        pdf_x = x0
+        pdf_y = page_height - y1  # Flip Y
+
+        table.wrapOn(pdf_canvas, table_width, table_height)
+        table.drawOn(pdf_canvas, pdf_x, pdf_y)
+
+        return True
+
+    def _render_with_dimensions(
+        self,
+        pdf_canvas,
+        table_data: Dict,
+        table_bbox: Tuple[float, float, float, float],
+        page_height: float,
+        col_widths: List[float],
+        row_heights: List[float]
+    ) -> bool:
+        """Render table with specified dimensions."""
+        rows = table_data.get('rows', [])
+        if not rows:
+            return False
+
+        num_rows = len(rows)
+        num_cols = max(len(row.get('cells', [])) for row in rows)
+
+        # Adjust widths/heights if needed
+        if len(col_widths) != num_cols:
+            x0, y0, x1, y1 = table_bbox
+            col_widths = [(x1 - x0) / num_cols] * num_cols
+        if len(row_heights) != num_rows:
+            x0, y0, x1, y1 = table_bbox
+            row_heights = [(y1 - y0) / num_rows] * num_rows
+
+        # Build grid with proper positioning
+        grid = []
+        span_commands = []
+        occupied = [[False] * num_cols for _ in range(num_rows)]
+
+        for row_idx, row in enumerate(rows):
+            grid_row = [''] * num_cols
+
+            for cell in row.get('cells', []):
+                # Get column position
+                col_idx = cell.get('col', 0)
+
+                # Skip if out of bounds or occupied
+                while col_idx < num_cols and occupied[row_idx][col_idx]:
+                    col_idx += 1
+                if col_idx >= num_cols:
+                    continue
+
+                text = cell.get('text', '').strip()
+                colspan = cell.get('colspan', 1)
+                rowspan = cell.get('rowspan', 1)
+
+                grid_row[col_idx] = text
+
+                if colspan > 1 or rowspan > 1:
+                    end_col = min(col_idx + colspan - 1, num_cols - 1)
+                    end_row = min(row_idx + rowspan - 1, num_rows - 1)
+                    span_commands.append(
+                        ('SPAN', (col_idx, row_idx), (end_col, end_row))
+                    )
+                    for r in range(row_idx, end_row + 1):
+                        for c in range(col_idx, end_col + 1):
+                            if r < num_rows and c < num_cols:
+                                occupied[r][c] = True
+                else:
+                    occupied[row_idx][col_idx] = True
+
+            grid.append(grid_row)
+
+        # Create style and table
+        style = ParagraphStyle(
+            'TableCell',
+            fontName=self.config.font_name,
+            fontSize=self.config.font_size,
+            alignment=TA_CENTER
+        )
+
+        para_grid = []
+        for row in grid:
+            para_row = [Paragraph(cell, style) if cell else '' for cell in row]
+            para_grid.append(para_row)
+
+        table_style_commands = [
+            ('GRID', (0, 0), (-1, -1), self.config.border_width, self.config.border_color),
+            ('VALIGN', (0, 0), (-1, -1), self.config.vertical_align),
+            ('LEFTPADDING', (0, 0), (-1, -1), 0),
+            ('RIGHTPADDING', (0, 0), (-1, -1), 0),
+            ('TOPPADDING', (0, 0), (-1, -1), 0),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 1),
+        ]
+        table_style_commands.extend(span_commands)
+
+        table = Table(para_grid, colWidths=col_widths, rowHeights=row_heights)
+        table.setStyle(TableStyle(table_style_commands))
+
+        x0, y0, x1, y1 = table_bbox
+        pdf_x = x0
+        pdf_y = page_height - y1
+
+        table.wrapOn(pdf_canvas, x1 - x0, y1 - y0)
+        table.drawOn(pdf_canvas, pdf_x, pdf_y)
+
+        return True
+
+    def _build_rows_from_cells_dict(self, cells_dict: Dict) -> List[Dict]:
+        """Convert Direct track cell structure to row format."""
+        cells = cells_dict.get('cells', [])
+        if not cells:
+            return []
+
+        num_rows = cells_dict.get('rows', 0)
+        num_cols = cells_dict.get('cols', 0)
+
+        # Group cells by row
+        rows_data = {}
+        for cell in cells:
+            row_idx = cell.get('row', 0)
+            if row_idx not in rows_data:
+                rows_data[row_idx] = []
+            rows_data[row_idx].append(cell)
+
+        # Build row list
+        rows = []
+        for row_idx in range(num_rows):
+            row_cells = rows_data.get(row_idx, [])
+
+            # Sort by column
+            row_cells.sort(key=lambda c: c.get('col', 0))
+
+            formatted_cells = []
+            for cell in row_cells:
+                content = cell.get('content', '')
+                if isinstance(content, list):
+                    content = '\n'.join(str(c) for c in content)
+
+                formatted_cells.append({
+                    'text': str(content) if content else '',
+                    'colspan': cell.get('col_span', 1),
+                    'rowspan': cell.get('row_span', 1),
+                    'col': cell.get('col', 0),
+                    'is_header': cell.get('is_header', False)
+                })
+
+            rows.append({'cells': formatted_cells})
+
+        return rows
+
+    def _draw_table_border(
+        self,
+        pdf_canvas,
+        table_bbox: Tuple[float, float, float, float],
+        page_height: float
+    ) -> bool:
+        """Draw outer table border."""
+        try:
+            x0, y0, x1, y1 = table_bbox
+            pdf_y0 = page_height - y1
+            pdf_y1 = page_height - y0
+
+            pdf_canvas.saveState()
+            pdf_canvas.setStrokeColor(self.config.border_color)
+            pdf_canvas.setLineWidth(self.config.border_width)
+            pdf_canvas.rect(x0, pdf_y0, x1 - x0, pdf_y1 - pdf_y0)
+            pdf_canvas.restoreState()
+
+            return True
+        except Exception as e:
+            logger.error(f"Failed to draw table border: {e}")
+            return False
+
+    def _draw_embedded_image(
+        self,
+        pdf_canvas,
+        img_info: Dict,
+        page_height: float,
+        output_dir: Path
+    ) -> bool:
+        """Draw an image embedded within a table cell."""
+        try:
+            img_path = img_info.get('path')
+            if not img_path:
+                return False
+
+            # Resolve path
+            if not Path(img_path).is_absolute():
+                img_path = output_dir / img_path
+
+            if not Path(img_path).exists():
+                logger.warning(f"Embedded image not found: {img_path}")
+                return False
+
+            bbox = img_info.get('bbox', {})
+            x0 = bbox.get('x0', 0)
+            y0 = bbox.get('y0', 0)
+            width = bbox.get('width', 100)
+            height = bbox.get('height', 100)
+
+            # Flip Y coordinate
+            pdf_y = page_height - y0 - height
+
+            # Draw image
+            img = ImageReader(str(img_path))
+            pdf_canvas.drawImage(img, x0, pdf_y, width, height)
+
+            return True
+
+        except Exception as e:
+            logger.error(f"Failed to draw embedded image: {e}")
+            return False
+
+    def _fit_text_to_cell(
+        self,
+        pdf_canvas,
+        text: str,
+        cell_width: float,
+        cell_height: float
+    ) -> int:
+        """Find font size that fits text in cell."""
+        for size in range(self.config.max_font_size, self.config.min_font_size - 1, -1):
+            text_width = pdf_canvas.stringWidth(text, self.config.font_name, size)
+            if text_width <= cell_width - 6:  # 3pt padding each side
+                return size
+        return self.config.min_font_size
+
+    def _merge_boundaries(self, values: List[float], threshold: float) -> List[float]:
+        """Merge nearby boundary values."""
+        if not values:
+            return []
+
+        merged = [values[0]]
+        for v in values[1:]:
+            if abs(v - merged[-1]) > threshold:
+                merged.append(v)
+
+        return merged
+
+    def _cluster_values(self, values: List[float], threshold: float) -> List[Tuple[float, List[float]]]:
+        """Cluster nearby values and return (average, members) pairs."""
+        if not values:
+            return []
+
+        clusters = []
+        current_cluster = [values[0]]
+
+        for v in values[1:]:
+            if abs(v - current_cluster[-1]) <= threshold:
+                current_cluster.append(v)
+            else:
+                avg = sum(current_cluster) / len(current_cluster)
+                clusters.append((avg, current_cluster))
+                current_cluster = [v]
+
+        if current_cluster:
+            avg = sum(current_cluster) / len(current_cluster)
+            clusters.append((avg, current_cluster))
+
+        return clusters