diff --git a/backend/app/services/direct_extraction_engine.py b/backend/app/services/direct_extraction_engine.py
index 7e09b41..0ed9405 100644
--- a/backend/app/services/direct_extraction_engine.py
+++ b/backend/app/services/direct_extraction_engine.py
@@ -190,6 +190,10 @@ class DirectExtractionEngine:
         elements = []
         element_counter = 0
 
+        # Get page-level metadata (for final Page metadata)
+        drawings = page.get_drawings()
+        links = page.get_links()
+
         # Get page dimensions
         rect = page.rect
         dimensions = Dimensions(
@@ -198,18 +202,8 @@ class DirectExtractionEngine:
             dpi=72  # PDF standard DPI
         )
 
-        # Extract text blocks with formatting (sort=True for reading order)
-        text_dict = page.get_text("dict", sort=True)
-        for block_idx, block in enumerate(text_dict.get("blocks", [])):
-            if block.get("type") == 0:  # Text block
-                element = self._process_text_block(
-                    block, page_num, element_counter
-                )
-                if element:
-                    elements.append(element)
-                    element_counter += 1
-
-        # Extract tables (if enabled)
+        # Extract tables first (if enabled) to get table regions
+        table_bboxes = []
         if self.enable_table_detection:
             try:
                 # Try native table detection (PyMuPDF 1.23.0+)
@@ -218,16 +212,32 @@ class DirectExtractionEngine:
                     element = self._process_native_table(
                         table, page_num, element_counter
                     )
-                    if element:
+                    if element and element.bbox:
                         elements.append(element)
+                        table_bboxes.append(element.bbox)
                         element_counter += 1
             except AttributeError:
                 # Fallback to positional table detection
                 logger.debug("Native table detection not available, using positional detection")
                 table_elements = self._detect_tables_by_position(page, page_num, element_counter)
+                for elem in table_elements:
+                    if elem.bbox:
+                        table_bboxes.append(elem.bbox)
                 elements.extend(table_elements)
                 element_counter += len(table_elements)
 
+        # Extract text blocks with formatting (sort=True for reading order)
+        # Filter out lines that overlap with table regions
+        text_dict = page.get_text("dict", sort=True)
+        for block_idx, block in enumerate(text_dict.get("blocks", [])):
+            if block.get("type") == 0:  # Text block
+                element = self._process_text_block(
+                    block, page_num, element_counter, table_bboxes
+                )
+                if element:
+                    elements.append(element)
+                    element_counter += 1
+
         # Extract images (if enabled)
         if self.enable_image_extraction:
             image_elements = self._extract_images(
@@ -236,6 +246,14 @@ class DirectExtractionEngine:
             elements.extend(image_elements)
             element_counter += len(image_elements)
 
+        # Extract vector graphics (charts, diagrams) from drawing commands
+        if self.enable_image_extraction:
+            vector_elements = self._extract_vector_graphics(
+                page, page_num, document_id, element_counter, output_dir
+            )
+            elements.extend(vector_elements)
+            element_counter += len(vector_elements)
+
         # Extract hyperlinks
         links = page.get_links()
         for link_idx, link in enumerate(links):
@@ -258,16 +276,15 @@ class DirectExtractionEngine:
                     elements.append(element)
                     element_counter += 1
 
-        # Extract vector graphics (as metadata)
-        drawings = page.get_drawings()
-        if drawings:
-            logger.debug(f"Page {page_num} contains {len(drawings)} vector drawing commands")
-
         # PyMuPDF's sort=True already provides good reading order for multi-column layouts
         # (top-to-bottom, left-to-right within each row). We don't need to re-sort.
         # NOTE: If sort=True is not used in get_text(), uncomment the line below:
         # elements = self._sort_elements_for_reading_order(elements, dimensions)
 
+        # Deduplicate: Remove CHART elements that overlap with TABLE elements
+        # (Tables have structured data, so they take priority over vector graphics)
+        elements = self._deduplicate_table_chart_overlap(elements)
+
         # Post-process elements for header/footer detection and structure
         elements = self._detect_headers_footers(elements, dimensions)
         elements = self._build_section_hierarchy(elements)
@@ -519,24 +536,58 @@ class DirectExtractionEngine:
 
         return elements
 
-    def _process_text_block(self, block: Dict, page_num: int, counter: int) -> Optional[DocumentElement]:
-        """Process a text block into a DocumentElement"""
-        # Calculate block bounding box
-        bbox_data = block.get("bbox", [0, 0, 0, 0])
-        bbox = BoundingBox(
-            x0=bbox_data[0],
-            y0=bbox_data[1],
-            x1=bbox_data[2],
-            y1=bbox_data[3]
-        )
+    def _process_text_block(self, block: Dict, page_num: int, counter: int,
+                            table_bboxes: List[BoundingBox] = None) -> Optional[DocumentElement]:
+        """
+        Process a text block into a DocumentElement.
+
+        Args:
+            block: Text block from PyMuPDF
+            page_num: Page number
+            counter: Element counter
+            table_bboxes: List of table bounding boxes to filter overlapping lines
+
+        Returns:
+            DocumentElement or None if all lines overlap with tables
+        """
+        if table_bboxes is None:
+            table_bboxes = []
 
         # Extract text content and span information
+        # Filter out lines that significantly overlap with table regions
         text_parts = []
         styles = []
         span_children = []  # Store span-level children for inline styling
         span_counter = 0
+        valid_line_bboxes = []  # Track bboxes of valid lines for overall bbox calculation
 
         for line in block.get("lines", []):
+            line_bbox_data = line.get("bbox", [0, 0, 0, 0])
+
+            # Check if this line overlaps with any table region
+            line_overlaps_table = False
+            for table_bbox in table_bboxes:
+                overlap_x0 = max(line_bbox_data[0], table_bbox.x0)
+                overlap_y0 = max(line_bbox_data[1], table_bbox.y0)
+                overlap_x1 = min(line_bbox_data[2], table_bbox.x1)
+                overlap_y1 = min(line_bbox_data[3], table_bbox.y1)
+
+                if overlap_x0 < overlap_x1 and overlap_y0 < overlap_y1:
+                    # Calculate overlap ratio
+                    line_height = line_bbox_data[3] - line_bbox_data[1]
+                    overlap_height = overlap_y1 - overlap_y0
+                    if line_height > 0:
+                        overlap_ratio = overlap_height / line_height
+                        if overlap_ratio >= 0.5:  # Line significantly overlaps with table
+                            line_overlaps_table = True
+                            break
+
+            if line_overlaps_table:
+                continue  # Skip this line
+
+            # Process valid line
+            valid_line_bboxes.append(line_bbox_data)
+
             for span in line.get("spans", []):
                 text = span.get("text", "")
                 if text:
@@ -553,7 +604,7 @@ class DirectExtractionEngine:
                     styles.append(style)
 
                     # Create span child element for inline styling
-                    span_bbox_data = span.get("bbox", bbox_data)
+                    span_bbox_data = span.get("bbox", [0, 0, 0, 0])
                     span_bbox = BoundingBox(
                         x0=span_bbox_data[0],
                         y0=span_bbox_data[1],
@@ -574,10 +625,22 @@ class DirectExtractionEngine:
                     span_counter += 1
 
         if not text_parts:
-            return None
+            return None  # All lines overlapped with tables
 
         full_text = "".join(text_parts)
 
+        # Calculate bbox from valid lines only
+        if valid_line_bboxes:
+            min_x0 = min(b[0] for b in valid_line_bboxes)
+            min_y0 = min(b[1] for b in valid_line_bboxes)
+            max_x1 = max(b[2] for b in valid_line_bboxes)
+            max_y1 = max(b[3] for b in valid_line_bboxes)
+            bbox = BoundingBox(x0=min_x0, y0=min_y0, x1=max_x1, y1=max_y1)
+        else:
+            # Fallback to original bbox if no valid lines found
+            bbox_data = block.get("bbox", [0, 0, 0, 0])
+            bbox = BoundingBox(x0=bbox_data[0], y0=bbox_data[1], x1=bbox_data[2], y1=bbox_data[3])
+
         # Determine element type based on content and style
         element_type = self._infer_element_type(full_text, styles)
 
@@ -642,6 +705,30 @@ class DirectExtractionEngine:
                 y1=bbox_data[3]
             )
 
+            # Extract column widths from table cells
+            column_widths = []
+            if hasattr(table, 'cells') and table.cells:
+                # Group cells by column
+                cols_x = {}
+                for cell in table.cells:
+                    col_idx = None
+                    # Determine column index by x0 position
+                    for idx, x0 in enumerate(sorted(set(c[0] for c in table.cells))):
+                        if abs(cell[0] - x0) < 1.0:  # Within 1pt tolerance
+                            col_idx = idx
+                            break
+
+                    if col_idx is not None:
+                        if col_idx not in cols_x:
+                            cols_x[col_idx] = {'x0': cell[0], 'x1': cell[2]}
+                        else:
+                            cols_x[col_idx]['x1'] = max(cols_x[col_idx]['x1'], cell[2])
+
+                # Calculate width for each column
+                for col_idx in sorted(cols_x.keys()):
+                    width = cols_x[col_idx]['x1'] - cols_x[col_idx]['x0']
+                    column_widths.append(width)
+
             # Create table cells
             cells = []
             for row_idx, row in enumerate(data):
@@ -661,12 +748,16 @@ class DirectExtractionEngine:
                 headers=data[0] if data else None  # Assume first row is header
             )
 
+            # Store column widths in metadata
+            metadata = {"column_widths": column_widths} if column_widths else None
+
             return DocumentElement(
                 element_id=f"table_{page_num}_{counter}",
                 type=ElementType.TABLE,
                 content=table_data,
                 bbox=bbox,
-                confidence=1.0
+                confidence=1.0,
+                metadata=metadata
             )
 
         except Exception as e:
@@ -908,4 +999,298 @@ class DirectExtractionEngine:
             except Exception as e:
                 logger.error(f"Error extracting image {img_idx}: {e}")
 
-        return elements
\ No newline at end of file
+        return elements
+
+    def _extract_vector_graphics(self,
+                                 page: fitz.Page,
+                                 page_num: int,
+                                 document_id: str,
+                                 counter: int,
+                                 output_dir: Optional[Path]) -> List[DocumentElement]:
+        """
+        Extract vector graphics (charts, diagrams) from page.
+
+        This method identifies regions that are composed of vector drawing commands
+        (paths, lines, rectangles) rather than embedded raster images. These are
+        typically charts created in Excel, vector diagrams, or other graphics.
+
+        Args:
+            page: PyMuPDF page object
+            page_num: Page number (1-indexed)
+            document_id: Unique document identifier
+            counter: Starting counter for element IDs
+            output_dir: Directory to save rendered graphics
+
+        Returns:
+            List of DocumentElement objects representing vector graphics
+        """
+        elements = []
+
+        try:
+            # Get all drawing commands
+            drawings = page.get_drawings()
+            if not drawings:
+                return elements
+
+            logger.debug(f"Page {page_num} contains {len(drawings)} vector drawing commands")
+
+            # Cluster drawings into groups (charts, diagrams, etc.)
+            try:
+                # PyMuPDF's cluster_drawings() groups nearby drawings automatically
+                drawing_clusters = page.cluster_drawings()
+                logger.debug(f"Clustered into {len(drawing_clusters)} groups")
+            except (AttributeError, TypeError) as e:
+                # cluster_drawings not available or has different signature
+                # Fallback: try to identify charts by analyzing drawing density
+                logger.warning(f"cluster_drawings() failed ({e}), using fallback method")
+                drawing_clusters = self._cluster_drawings_fallback(page, drawings)
+
+            for cluster_idx, bbox in enumerate(drawing_clusters):
+                # Ignore small regions (likely noise or separator lines)
+                if bbox.width < 50 or bbox.height < 50:
+                    logger.debug(f"Skipping small cluster {cluster_idx}: {bbox.width:.1f}x{bbox.height:.1f}")
+                    continue
+
+                # Render the region to a raster image
+                # matrix=fitz.Matrix(2, 2) increases resolution to ~200 DPI
+                try:
+                    pix = page.get_pixmap(clip=bbox, matrix=fitz.Matrix(2, 2))
+
+                    # Save image if output directory provided
+                    if output_dir:
+                        output_dir.mkdir(parents=True, exist_ok=True)
+                        filename = f"{document_id}_p{page_num}_chart{cluster_idx}.png"
+                        filepath = output_dir / filename
+                        pix.save(str(filepath))
+
+                        # Create DocumentElement
+                        image_data = {
+                            "saved_path": str(filepath),
+                            "width": pix.width,
+                            "height": pix.height,
+                            "colorspace": pix.colorspace.name if pix.colorspace else "unknown",
+                            "source": "vector_graphics"
+                        }
+
+                        element = DocumentElement(
+                            element_id=f"chart_{page_num}_{counter + cluster_idx}",
+                            type=ElementType.CHART,  # Use CHART type for vector graphics
+                            content=image_data,
+                            bbox=BoundingBox(
+                                x0=bbox.x0,
+                                y0=bbox.y0,
+                                x1=bbox.x1,
+                                y1=bbox.y1
+                            ),
+                            confidence=0.85,  # Slightly lower confidence than raster images
+                            metadata={
+                                "cluster_index": cluster_idx,
+                                "drawing_count": len(drawings)
+                            }
+                        )
+                        elements.append(element)
+                        logger.debug(f"Extracted chart {cluster_idx}: {bbox.width:.1f}x{bbox.height:.1f} -> {filepath}")
+
+                    pix = None  # Free memory
+
+                except Exception as e:
+                    logger.error(f"Error rendering vector graphic cluster {cluster_idx}: {e}")
+                    continue
+
+        except Exception as e:
+            logger.error(f"Error extracting vector graphics: {e}")
+
+        return elements
+
+    def _cluster_drawings_fallback(self, page: fitz.Page, drawings: list) -> list:
+        """
+        Fallback method to cluster drawings when cluster_drawings() is not available.
+
+        This uses a simple spatial clustering approach based on bounding boxes.
+        """
+        if not drawings:
+            return []
+
+        # Collect all drawing bounding boxes
+        bboxes = []
+        for drawing in drawings:
+            rect = drawing.get('rect')
+            if rect:
+                bboxes.append(fitz.Rect(rect))
+
+        if not bboxes:
+            return []
+
+        # Simple clustering: merge overlapping or nearby rectangles
+        clusters = []
+        tolerance = 20
+
+        for bbox in bboxes:
+            # Try to merge with existing cluster
+            merged = False
+            for i, cluster in enumerate(clusters):
+                # Check if bbox is close to this cluster
+                expanded_cluster = cluster + (-tolerance, -tolerance, tolerance, tolerance)
+                if expanded_cluster.intersects(bbox):
+                    # Merge bbox into cluster
+                    clusters[i] = cluster | bbox  # Union of rectangles
+                    merged = True
+                    break
+
+            if not merged:
+                # Create new cluster
+                clusters.append(bbox)
+
+        # Filter out very small clusters
+        filtered_clusters = [c for c in clusters if c.width >= 50 and c.height >= 50]
+
+        logger.debug(f"Fallback clustering: {len(bboxes)} drawings -> {len(clusters)} clusters -> {len(filtered_clusters)} filtered")
+
+        return filtered_clusters
+
+    def _deduplicate_table_chart_overlap(self, elements: List[DocumentElement]) -> List[DocumentElement]:
+        """
+        Intelligently resolve TABLE-CHART overlaps based on table structure completeness.
+
+        When a region is detected as both TABLE and CHART:
+        - Calculate cell completeness = actual_cells / (rows × cols)
+        - If completeness ≥50% → Real table with complete structure → Keep TABLE
+        - If completeness <50% → False positive (chart detected as table) → Keep CHART
+
+        Args:
+            elements: List of extracted elements
+
+        Returns:
+            Filtered list with low-quality overlaps removed
+        """
+        # Collect all tables and charts
+        tables = [elem for elem in elements if elem.type == ElementType.TABLE]
+        charts = [elem for elem in elements if elem.type == ElementType.CHART]
+
+        if not tables or not charts:
+            return elements  # No potential conflicts
+
+        # Analyze TABLE structure completeness
+        table_completeness = {}
+        for table in tables:
+            if hasattr(table.content, 'rows') and hasattr(table.content, 'cols') and hasattr(table.content, 'cells'):
+                expected_cells = table.content.rows * table.content.cols
+                actual_cells = len(table.content.cells)
+
+                if expected_cells > 0:
+                    completeness = actual_cells / expected_cells
+                    table_completeness[table.element_id] = completeness
+                else:
+                    table_completeness[table.element_id] = 0.0
+            else:
+                table_completeness[table.element_id] = 0.0
+
+        # Check overlaps and decide what to keep
+        filtered_elements = []
+        removed_charts = 0
+        removed_tables = 0
+
+        # Process TABLEs
+        for table in tables:
+            if not table.bbox:
+                filtered_elements.append(table)
+                continue
+
+            # Check if this TABLE overlaps with any CHART
+            overlaps_chart = False
+            for chart in charts:
+                if not chart.bbox:
+                    continue
+
+                # Calculate overlap
+                overlap_x0 = max(table.bbox.x0, chart.bbox.x0)
+                overlap_y0 = max(table.bbox.y0, chart.bbox.y0)
+                overlap_x1 = min(table.bbox.x1, chart.bbox.x1)
+                overlap_y1 = min(table.bbox.y1, chart.bbox.y1)
+
+                if overlap_x0 < overlap_x1 and overlap_y0 < overlap_y1:
+                    overlap_area = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
+                    table_area = (table.bbox.x1 - table.bbox.x0) * (table.bbox.y1 - table.bbox.y0)
+
+                    if table_area > 0:
+                        overlap_ratio = overlap_area / table_area
+
+                        if overlap_ratio >= 0.8:
+                            overlaps_chart = True
+                            completeness = table_completeness.get(table.element_id, 0.0)
+
+                            logger.debug(
+                                f"TABLE-CHART overlap: {table.element_id} vs {chart.element_id}: "
+                                f"{overlap_ratio*100:.1f}% overlap, TABLE cell completeness: {completeness*100:.1f}%"
+                            )
+
+                            # Decision: Keep TABLE only if structure is complete
+                            if completeness < 0.5:  # <50% cell completeness
+                                logger.info(
+                                    f"Removing incomplete TABLE {table.element_id} "
+                                    f"({completeness*100:.1f}% completeness, overlaps with CHART {chart.element_id})"
+                                )
+                                removed_tables += 1
+                                break
+                            else:
+                                logger.info(
+                                    f"Keeping TABLE {table.element_id} with {completeness*100:.1f}% completeness "
+                                    f"(will remove overlapping CHART {chart.element_id})"
+                                )
+
+            if not overlaps_chart or table_completeness.get(table.element_id, 0.0) >= 0.5:
+                filtered_elements.append(table)
+
+        # Process CHARTs
+        for chart in charts:
+            if not chart.bbox:
+                filtered_elements.append(chart)
+                continue
+
+            # Check if this CHART should be removed due to overlap with high-quality TABLE
+            should_remove = False
+            for table in tables:
+                if not table.bbox:
+                    continue
+
+                # Calculate overlap
+                overlap_x0 = max(chart.bbox.x0, table.bbox.x0)
+                overlap_y0 = max(chart.bbox.y0, table.bbox.y0)
+                overlap_x1 = min(chart.bbox.x1, table.bbox.x1)
+                overlap_y1 = min(chart.bbox.y1, table.bbox.y1)
+
+                if overlap_x0 < overlap_x1 and overlap_y0 < overlap_y1:
+                    overlap_area = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
+                    chart_area = (chart.bbox.x1 - chart.bbox.x0) * (chart.bbox.y1 - chart.bbox.y0)
+
+                    if chart_area > 0:
+                        overlap_ratio = overlap_area / chart_area
+
+                        if overlap_ratio >= 0.8:
+                            completeness = table_completeness.get(table.element_id, 0.0)
+
+                            # Remove CHART only if TABLE structure is complete
+                            if completeness >= 0.5:
+                                should_remove = True
+                                logger.info(
+                                    f"Removing CHART {chart.element_id} "
+                                    f"({overlap_ratio*100:.1f}% overlap with TABLE {table.element_id} having {completeness*100:.1f}% completeness)"
+                                )
+                                removed_charts += 1
+                                break
+
+            if not should_remove:
+                filtered_elements.append(chart)
+
+        # Process all other elements
+        for elem in elements:
+            if elem.type not in [ElementType.TABLE, ElementType.CHART]:
+                filtered_elements.append(elem)
+
+        if removed_charts > 0 or removed_tables > 0:
+            logger.info(
+                f"Deduplication complete: removed {removed_tables} incomplete TABLE(s), "
+                f"{removed_charts} overlapping CHART(s)"
+            )
+
+        return filtered_elements
\ No newline at end of file
diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py
index 4556e50..bcffbb1 100644
--- a/backend/app/services/pdf_generator_service.py
+++ b/backend/app/services/pdf_generator_service.py
@@ -744,7 +744,15 @@ class PDFGeneratorService:
                         all_elements.append(('text', elem))
 
                 logger.debug(f"Drawing {len(all_elements)} elements in extraction order (preserves multi-column reading order)")
-                logger.debug(f"Exclusion regions: {len(regions_to_avoid)} tables/images")
+                logger.debug(f"Exclusion regions: {len(regions_to_avoid)} (tables/images/charts)")
+
+                # Debug: Log exclusion region types
+                region_types = {}
+                for region in regions_to_avoid:
+                    region_type = region.type.name
+                    region_types[region_type] = region_types.get(region_type, 0) + 1
+                if region_types:
+                    logger.debug(f"  Exclusion region breakdown: {region_types}")
 
                 # Draw elements in document order
                 for elem_type, elem in all_elements:
@@ -2133,7 +2141,8 @@ class PDFGeneratorService:
 
             # Transform coordinates
             pdf_x = bbox.x0
-            pdf_y = page_height - bbox.y1  # Bottom of table
+            # Use exact bbox position (no buffer) - scaling will ensure table fits
+            pdf_y = page_height - bbox.y1  # Bottom of table (ReportLab Y coordinate)
 
             table_width = bbox.x1 - bbox.x0
             table_height = bbox.y1 - bbox.y0
@@ -2148,20 +2157,53 @@ class PDFGeneratorService:
             from reportlab.platypus import Table, TableStyle
             from reportlab.lib import colors
 
-            t = Table(table_content, colWidths=[table_width / len(table_content[0])] * len(table_content[0]))
+            # Use original column widths from extraction if available
+            # Otherwise let ReportLab auto-calculate
+            col_widths = None
+            if element.metadata and 'column_widths' in element.metadata:
+                col_widths = element.metadata['column_widths']
+                logger.debug(f"Using extracted column widths: {col_widths}")
 
-            # Apply style
+            # Create table without rowHeights (will use canvas scaling instead)
+            t = Table(table_content, colWidths=col_widths)
+
+            # Apply style with minimal padding to reduce table extension
             style = TableStyle([
                 ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
                 ('FONTSIZE', (0, 0), (-1, -1), 8),
                 ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                 ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+                # Set minimal padding to prevent table from extending beyond bbox
+                # User reported padding=1 was still insufficient
+                ('TOPPADDING', (0, 0), (-1, -1), 0),
+                ('BOTTOMPADDING', (0, 0), (-1, -1), 0),
+                ('LEFTPADDING', (0, 0), (-1, -1), 1),
+                ('RIGHTPADDING', (0, 0), (-1, -1), 1),
             ])
             t.setStyle(style)
 
-            # Draw table
-            t.wrapOn(pdf_canvas, table_width, table_height)
-            t.drawOn(pdf_canvas, pdf_x, pdf_y)
+            # CRITICAL: Use canvas scaling to fit table within bbox
+            # This is more reliable than rowHeights which doesn't always work
+
+            # Step 1: Wrap to get actual rendered size
+            actual_width, actual_height = t.wrapOn(pdf_canvas, table_width * 10, table_height * 10)
+            logger.info(f"Table natural size: {actual_width:.1f} × {actual_height:.1f}pt, bbox: {table_width:.1f} × {table_height:.1f}pt")
+
+            # Step 2: Calculate scale factor to fit within bbox
+            scale_x = table_width / actual_width if actual_width > table_width else 1.0
+            scale_y = table_height / actual_height if actual_height > table_height else 1.0
+            scale = min(scale_x, scale_y, 1.0)  # Never scale up, only down
+
+            logger.info(f"Scale factor: {scale:.3f} (x={scale_x:.3f}, y={scale_y:.3f})")
+
+            # Step 3: Draw with scaling using canvas transform
+            pdf_canvas.saveState()
+            pdf_canvas.translate(pdf_x, pdf_y)
+            pdf_canvas.scale(scale, scale)
+            t.drawOn(pdf_canvas, 0, 0)
+            pdf_canvas.restoreState()
+
+            logger.info(f"Drew table at ({pdf_x:.1f}, {pdf_y:.1f}) with scale {scale:.3f}, final size: {actual_width * scale:.1f} × {actual_height * scale:.1f}pt")
 
             logger.debug(f"Drew table element: {len(rows)} rows")
 
diff --git a/backend/check_tables.py b/backend/check_tables.py
deleted file mode 100644
index a1ea620..0000000
--- a/backend/check_tables.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python3
-"""Check existing tables"""
-
-from sqlalchemy import create_engine, text
-from app.core.config import settings
-
-engine = create_engine(settings.database_url)
-
-with engine.connect() as conn:
-    # Get all tables
-    result = conn.execute(text("SHOW TABLES"))
-    tables = [row[0] for row in result.fetchall()]
-
-    print("Existing tables:")
-    for table in sorted(tables):
-        print(f"  - {table}")
-
-    # Check which V2 tables exist
-    v2_tables = ['tool_ocr_users', 'tool_ocr_sessions', 'tool_ocr_tasks',
-                 'tool_ocr_task_files', 'tool_ocr_audit_logs']
-    print("\nV2 Tables status:")
-    for table in v2_tables:
-        exists = table in tables
-        print(f"  {'✓' if exists else '✗'} {table}")
-
-    # Check which old tables exist
-    old_tables = ['paddle_ocr_users', 'paddle_ocr_batches', 'paddle_ocr_files',
-                  'paddle_ocr_results', 'paddle_ocr_export_rules', 'paddle_ocr_translation_configs']
-    print("\nOld Tables status:")
-    for table in old_tables:
-        exists = table in tables
-        print(f"  {'✓' if exists else '✗'} {table}")
diff --git a/backend/verify_chart_recognition.py b/backend/verify_chart_recognition.py
deleted file mode 100755
index 4e21fd2..0000000
--- a/backend/verify_chart_recognition.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-"""
-Verify if chart recognition can be enabled in the current PaddlePaddle version
-Run this in the conda environment: conda activate tool_ocr && python verify_chart_recognition.py
-"""
-
-import sys
-
-def check_paddle_api():
-    """Check if fused_rms_norm_ext API is available"""
-    try:
-        import paddle
-        print(f"✅ PaddlePaddle version: {paddle.__version__}")
-
-        # Check if the API exists
-        import paddle.incubate.nn.functional as F
-
-        has_base = hasattr(F, 'fused_rms_norm')
-        has_ext = hasattr(F, 'fused_rms_norm_ext')
-
-        print(f"\n📊 API Availability:")
-        print(f"   - fused_rms_norm:     {'✅ Available' if has_base else '❌ Not found'}")
-        print(f"   - fused_rms_norm_ext: {'✅ Available' if has_ext else '❌ Not found'}")
-
-        if has_ext:
-            print(f"\n🎉 Chart recognition CAN be enabled!")
-            print(f"\n📝 Action required:")
-            print(f"   1. Edit backend/app/services/ocr_service.py")
-            print(f"   2. Change line 217: use_chart_recognition=False → True")
-            print(f"   3. Restart the backend service")
-            print(f"\n⚠️  Note: This will enable deep chart analysis (may increase processing time)")
-            return True
-        else:
-            print(f"\n❌ Chart recognition CANNOT be enabled yet")
-            print(f"\n📝 Current PaddlePaddle version ({paddle.__version__}) does not support fused_rms_norm_ext")
-            print(f"\n💡 Options:")
-            print(f"   1. Upgrade PaddlePaddle: pip install --upgrade paddlepaddle>=3.2.0")
-            print(f"   2. Check for newer versions: pip search paddlepaddle")
-            print(f"   3. Wait for official PaddlePaddle update")
-            return False
-
-    except ImportError as e:
-        print(f"❌ PaddlePaddle not installed: {e}")
-        print(f"\n💡 Install PaddlePaddle:")
-        print(f"   pip install paddlepaddle>=3.2.0")
-        return False
-    except Exception as e:
-        print(f"❌ Error: {e}")
-        return False
-
-if __name__ == "__main__":
-    print("=" * 70)
-    print("Chart Recognition Availability Checker")
-    print("=" * 70)
-    print()
-
-    can_enable = check_paddle_api()
-
-    print()
-    print("=" * 70)
-    sys.exit(0 if can_enable else 1)
diff --git a/demo_docs/basic/chinese_simple.png b/demo_docs/basic/chinese_simple.png
deleted file mode 100644
index 287284d..0000000
Binary files a/demo_docs/basic/chinese_simple.png and /dev/null differ
diff --git a/demo_docs/basic/chinese_traditional.png b/demo_docs/basic/chinese_traditional.png
deleted file mode 100644
index d604cf1..0000000
Binary files a/demo_docs/basic/chinese_traditional.png and /dev/null differ
diff --git a/demo_docs/basic/english.png b/demo_docs/basic/english.png
deleted file mode 100644
index f273a66..0000000
Binary files a/demo_docs/basic/english.png and /dev/null differ
diff --git a/demo_docs/layout/document.png b/demo_docs/layout/document.png
deleted file mode 100644
index e934939..0000000
Binary files a/demo_docs/layout/document.png and /dev/null differ
diff --git a/demo_docs/mixed/4. (附件二)具體事蹟簡報格式(最佳創新獎).pdf b/demo_docs/mixed/4. (附件二)具體事蹟簡報格式(最佳創新獎).pdf
deleted file mode 100644
index 7ff1d4c..0000000
Binary files a/demo_docs/mixed/4. (附件二)具體事蹟簡報格式(最佳創新獎).pdf and /dev/null differ
diff --git a/demo_docs/mixed/Workflow使用分析.pdf b/demo_docs/mixed/Workflow使用分析.pdf
deleted file mode 100644
index ab6f6e7..0000000
Binary files a/demo_docs/mixed/Workflow使用分析.pdf and /dev/null differ
diff --git a/demo_docs/tables/simple_table.png b/demo_docs/tables/simple_table.png
deleted file mode 100644
index b619e8a..0000000
Binary files a/demo_docs/tables/simple_table.png and /dev/null differ
diff --git a/demo_docs/tables/截圖 2025-11-12 上午10.33.12.png b/demo_docs/tables/截圖 2025-11-12 上午10.33.12.png
deleted file mode 100644
index 2e7166a..0000000
Binary files a/demo_docs/tables/截圖 2025-11-12 上午10.33.12.png and /dev/null differ
diff --git a/demo_docs/tables/截圖 2025-11-12 上午10.34.33.png b/demo_docs/tables/截圖 2025-11-12 上午10.34.33.png
deleted file mode 100644
index 8e4d7f3..0000000
Binary files a/demo_docs/tables/截圖 2025-11-12 上午10.34.33.png and /dev/null differ