diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py
index 8b5291b..61b2230 100644
--- a/backend/app/services/pdf_generator_service.py
+++ b/backend/app/services/pdf_generator_service.py
@@ -885,10 +885,8 @@ class PDFGeneratorService:
             True if successful, False otherwise
         """
         try:
-            # Check if PDF already exists (caching)
-            if output_path.exists():
-                logger.info(f"PDF already exists: {output_path.name}")
-                return True
+            # Note: Removed PDF caching - always regenerate to ensure latest code changes take effect
+            # If caching is needed, implement at a higher level with proper cache invalidation
 
             # Get text regions
             text_regions = ocr_data.get('text_regions', [])
@@ -1223,6 +1221,21 @@ class PDFGeneratorService:
                         mediabox = page.mediabox
                         width_pt = float(mediabox.width)
                         height_pt = float(mediabox.height)
+
+                        # IMPORTANT: Consider page rotation!
+                        # PDF pages can have /Rotate attribute (0, 90, 180, 270)
+                        # When rotation is 90 or 270 degrees, width and height should be swapped
+                        # because pdf2image and PDF viewers apply this rotation when rendering
+                        rotation = page.get('/Rotate', 0)
+                        if rotation is None:
+                            rotation = 0
+                        rotation = int(rotation) % 360
+
+                        if rotation in (90, 270):
+                            # Swap width and height for 90/270 degree rotation
+                            width_pt, height_pt = height_pt, width_pt
+                            logger.info(f"Page {page_idx}: Rotation={rotation}°, swapped dimensions to {width_pt:.1f} x {height_pt:.1f}")
+
                         page_sizes[page_idx] = (width_pt, height_pt)
 
                     logger.info(f"Extracted dimensions from PDF: {total_pages} pages")
@@ -1256,9 +1269,23 @@ class PDFGeneratorService:
             return page_sizes[0]
         return None
 
-    def _get_bbox_coords(self, bbox: Union[List[List[float]], List[float]]) -> Optional[Tuple[float, float, float, float]]:
-        """將任何 bbox 格式 (多邊形或 [x1,y1,x2,y2]) 轉換為 [x_min, y_min, x_max, y_max]"""
+    def _get_bbox_coords(self, bbox: Union[Dict, List[List[float]], List[float]]) -> Optional[Tuple[float, float, float, float]]:
+        """將任何 bbox 格式 (dict, 多邊形或 [x1,y1,x2,y2]) 轉換為 [x_min, y_min, x_max, y_max]"""
         try:
+            if bbox is None:
+                return None
+
+            # Dict format from UnifiedDocument: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
+            if isinstance(bbox, dict):
+                if 'x0' in bbox and 'y0' in bbox and 'x1' in bbox and 'y1' in bbox:
+                    return float(bbox['x0']), float(bbox['y0']), float(bbox['x1']), float(bbox['y1'])
+                else:
+                    logger.warning(f"Dict bbox 缺少必要欄位: {bbox}")
+                    return None
+
+            if not isinstance(bbox, (list, tuple)) or len(bbox) < 4:
+                return None
+
             if isinstance(bbox[0], (list, tuple)):
                 # 處理多邊形 [[x, y], ...]
                 x_coords = [p[0] for p in bbox if isinstance(p, (list, tuple)) and len(p) >= 2]
@@ -1268,7 +1295,7 @@ class PDFGeneratorService:
                 return min(x_coords), min(y_coords), max(x_coords), max(y_coords)
             elif isinstance(bbox[0], (int, float)) and len(bbox) == 4:
                 # 處理 [x1, y1, x2, y2]
-                return bbox[0], bbox[1], bbox[2], bbox[3]
+                return float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])
             else:
                 logger.warning(f"未知的 bbox 格式: {bbox}")
                 return None
@@ -1337,14 +1364,56 @@ class PDFGeneratorService:
 
         return not no_overlap
 
-    def _filter_text_in_regions(self, text_regions: List[Dict], regions_to_avoid: List[Dict], tolerance: float = 10.0) -> List[Dict]:
+    def _calculate_overlap_ratio(self, text_bbox_data: Dict, avoid_bbox_data: Dict) -> float:
         """
-        過濾掉與 'regions_to_avoid'（例如表格、圖片）重疊的文字區域。
+        計算文字區域與避免區域的重疊比例。
+
+        Args:
+            text_bbox_data: 文字區域 bbox 數據
+            avoid_bbox_data: 避免區域 bbox 數據
+
+        Returns:
+            重疊面積佔文字區域面積的比例 (0.0 - 1.0)
+        """
+        text_coords = self._get_bbox_coords(text_bbox_data.get('bbox'))
+        avoid_coords = self._get_bbox_coords(avoid_bbox_data.get('bbox'))
+
+        if not text_coords or not avoid_coords:
+            return 0.0
+
+        tx0, ty0, tx1, ty1 = text_coords
+        ax0, ay0, ax1, ay1 = avoid_coords
+
+        # Calculate text area
+        text_area = (tx1 - tx0) * (ty1 - ty0)
+        if text_area <= 0:
+            return 0.0
+
+        # Calculate intersection
+        inter_x0 = max(tx0, ax0)
+        inter_y0 = max(ty0, ay0)
+        inter_x1 = min(tx1, ax1)
+        inter_y1 = min(ty1, ay1)
+
+        # Check if there's actual intersection
+        if inter_x1 <= inter_x0 or inter_y1 <= inter_y0:
+            return 0.0
+
+        inter_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0)
+        return inter_area / text_area
+
+    def _filter_text_in_regions(self, text_regions: List[Dict], regions_to_avoid: List[Dict], overlap_threshold: float = 0.5) -> List[Dict]:
+        """
+        過濾掉與 'regions_to_avoid'（例如表格、圖片）顯著重疊的文字區域。
+
+        使用重疊比例閾值來判斷是否過濾，避免過濾掉僅相鄰但不重疊的文字。
 
         Args:
             text_regions: 文字區域列表
             regions_to_avoid: 需要避免的區域列表（表格、圖片）
-            tolerance: 容錯值（像素），增加到 10.0 以更好地處理邊界情況
+            overlap_threshold: 重疊比例閾值 (0.0-1.0)，只有當文字區域
+                              與避免區域的重疊比例超過此閾值時才會被過濾
+                              預設 0.5 表示超過 50% 重疊才過濾
 
         Returns:
             過濾後的文字區域列表
@@ -1354,17 +1423,24 @@ class PDFGeneratorService:
 
         for text_region in text_regions:
             should_filter = False
+            max_overlap = 0.0
 
             for avoid_region in regions_to_avoid:
-                # 使用重疊檢測：只要有任何重疊就過濾掉
-                if self._bbox_overlaps(text_region, avoid_region, tolerance=tolerance):
+                # 計算重疊比例
+                overlap_ratio = self._calculate_overlap_ratio(text_region, avoid_region)
+                max_overlap = max(max_overlap, overlap_ratio)
+
+                # 只有當重疊比例超過閾值時才過濾
+                if overlap_ratio > overlap_threshold:
                     should_filter = True
                     filtered_count += 1
-                    logger.debug(f"過濾掉重疊文字: {text_region.get('text', '')[:20]}...")
-                    break  # 找到一個重疊區域就足夠了
+                    logger.debug(f"過濾掉重疊文字 (重疊比例: {overlap_ratio:.1%}): {text_region.get('text', '')[:30]}...")
+                    break
 
             if not should_filter:
                 filtered_text.append(text_region)
+                if max_overlap > 0:
+                    logger.debug(f"保留文字 (最大重疊比例: {max_overlap:.1%}): {text_region.get('text', '')[:30]}...")
 
         logger.info(f"原始文字區域: {len(text_regions)}, 過濾後: {len(filtered_text)}, 移除: {filtered_count}")
         return filtered_text
@@ -1391,17 +1467,42 @@ class PDFGeneratorService:
         bbox = region.get('bbox', [])
         confidence = region.get('confidence', 1.0)
 
-        if not text or not bbox or len(bbox) < 4:
+        if not text or not bbox:
             return
 
         try:
-            # bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
-            # Points: top-left, top-right, bottom-right, bottom-left
-            # OCR coordinates: origin (0,0) at top-left, Y increases downward
-            ocr_x_left = bbox[0][0]    # Left X
-            ocr_y_top = bbox[0][1]     # Top Y in OCR coordinates
-            ocr_x_right = bbox[2][0]   # Right X
-            ocr_y_bottom = bbox[2][1]  # Bottom Y in OCR coordinates
+            # Handle different bbox formats
+            if isinstance(bbox, dict):
+                # Dict format from UnifiedDocument: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
+                if 'x0' in bbox and 'y0' in bbox and 'x1' in bbox and 'y1' in bbox:
+                    ocr_x_left = float(bbox['x0'])
+                    ocr_y_top = float(bbox['y0'])
+                    ocr_x_right = float(bbox['x1'])
+                    ocr_y_bottom = float(bbox['y1'])
+                else:
+                    logger.warning(f"Dict bbox missing required keys: {bbox}")
+                    return
+            elif isinstance(bbox, list):
+                if len(bbox) < 4:
+                    return
+                # Polygon format [[x,y], [x,y], [x,y], [x,y]] (4 points)
+                if isinstance(bbox[0], list):
+                    ocr_x_left = bbox[0][0]    # Left X
+                    ocr_y_top = bbox[0][1]     # Top Y in OCR coordinates
+                    ocr_x_right = bbox[2][0]   # Right X
+                    ocr_y_bottom = bbox[2][1]  # Bottom Y in OCR coordinates
+                # Simple list format [x0, y0, x1, y1]
+                elif isinstance(bbox[0], (int, float)):
+                    ocr_x_left = bbox[0]
+                    ocr_y_top = bbox[1]
+                    ocr_x_right = bbox[2]
+                    ocr_y_bottom = bbox[3]
+                else:
+                    logger.warning(f"Unexpected bbox list format: {bbox}")
+                    return
+            else:
+                logger.warning(f"Invalid bbox format: {bbox}")
+                return
 
             logger.info(f"[文字] '{text[:20]}...' OCR原始座標: L={ocr_x_left:.0f}, T={ocr_y_top:.0f}, R={ocr_x_right:.0f}, B={ocr_y_bottom:.0f}")
 
@@ -1489,13 +1590,17 @@ class PDFGeneratorService:
             if settings.pdf_enable_bbox_debug:
                 pdf_canvas.setStrokeColorRGB(1, 0, 0, 0.3)  # Red, semi-transparent
                 pdf_canvas.setLineWidth(0.5)
-                # Transform all bbox points to PDF coordinates (apply scaling first)
-                pdf_points = [(p[0] * scale_w, page_height - p[1] * scale_h) for p in bbox]
+                # Use already-extracted coordinates (works for all bbox formats)
+                # Draw rectangle using the scaled coordinates
+                pdf_x1 = ocr_x_left * scale_w
+                pdf_y1 = page_height - ocr_y_top * scale_h
+                pdf_x2 = ocr_x_right * scale_w
+                pdf_y2 = page_height - ocr_y_bottom * scale_h
                 # Draw bbox rectangle
-                for i in range(4):
-                    x1, y1 = pdf_points[i]
-                    x2, y2 = pdf_points[(i + 1) % 4]
-                    pdf_canvas.line(x1, y1, x2, y2)
+                pdf_canvas.line(pdf_x1, pdf_y1, pdf_x2, pdf_y1)  # top
+                pdf_canvas.line(pdf_x2, pdf_y1, pdf_x2, pdf_y2)  # right
+                pdf_canvas.line(pdf_x2, pdf_y2, pdf_x1, pdf_y2)  # bottom
+                pdf_canvas.line(pdf_x1, pdf_y2, pdf_x1, pdf_y1)  # left
 
         except Exception as e:
             logger.warning(f"Failed to draw text region '{text[:20]}...': {e}")
@@ -1560,7 +1665,17 @@ class PDFGeneratorService:
                 return
 
             # Handle different bbox formats
-            if isinstance(table_bbox, list) and len(table_bbox) == 4:
+            if isinstance(table_bbox, dict):
+                # Dict format from UnifiedDocument: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
+                if 'x0' in table_bbox and 'y0' in table_bbox and 'x1' in table_bbox and 'y1' in table_bbox:
+                    ocr_x_left_raw = float(table_bbox['x0'])
+                    ocr_y_top_raw = float(table_bbox['y0'])
+                    ocr_x_right_raw = float(table_bbox['x1'])
+                    ocr_y_bottom_raw = float(table_bbox['y1'])
+                else:
+                    logger.error(f"Dict bbox missing required keys (x0, y0, x1, y1): {table_bbox}")
+                    return
+            elif isinstance(table_bbox, list) and len(table_bbox) == 4:
                 # Simple bbox format [x0, y0, x1, y1]
                 if isinstance(table_bbox[0], (int, float)):
                     ocr_x_left_raw = table_bbox[0]
@@ -1595,32 +1710,87 @@ class PDFGeneratorService:
             pdf_x = ocr_x_left
             pdf_y = page_height - ocr_y_bottom
 
-            # Build table data for ReportLab
-            # Convert parsed structure to simple 2D array
-            max_cols = max(len(row['cells']) for row in rows)
-
-            logger.info(f"[表格] {len(rows)}行x{max_cols}列 → PDF位置: ({pdf_x:.1f}, {pdf_y:.1f}), 寬x高: {table_width:.0f}x{table_height:.0f}")
-            reportlab_data = []
+            # Build table data for ReportLab with proper colspan/rowspan handling
+            # First pass: determine the actual grid size by accounting for spans
+            num_rows = len(rows)
 
+            # Calculate actual number of columns by checking first row's total span
+            max_cols = 0
             for row in rows:
-                row_data = []
+                row_cols = sum(cell.get('colspan', 1) for cell in row['cells'])
+                max_cols = max(max_cols, row_cols)
+
+            logger.info(f"[表格] {num_rows}行x{max_cols}列 → PDF位置: ({pdf_x:.1f}, {pdf_y:.1f}), 寬x高: {table_width:.0f}x{table_height:.0f}")
+
+            # Create a grid to track occupied cells (for rowspan handling)
+            # occupied[row][col] = True if cell is occupied by a span from above
+            occupied = [[False] * max_cols for _ in range(num_rows)]
+
+            # Build the 2D data array and collect span commands
+            reportlab_data = []
+            span_commands = []
+
+            for row_idx, row in enumerate(rows):
+                row_data = [''] * max_cols
+                col_idx = 0
+
                 for cell in row['cells']:
+                    # Skip occupied cells (from rowspan above)
+                    while col_idx < max_cols and occupied[row_idx][col_idx]:
+                        col_idx += 1
+
+                    if col_idx >= max_cols:
+                        break
+
                     text = cell['text'].strip()
-                    row_data.append(text)
-                # Pad row if needed
-                while len(row_data) < max_cols:
-                    row_data.append('')
+                    colspan = cell.get('colspan', 1)
+                    rowspan = cell.get('rowspan', 1)
+
+                    # Place text in the top-left cell of the span
+                    row_data[col_idx] = text
+
+                    # Mark cells as occupied for rowspan
+                    for r in range(row_idx, min(row_idx + rowspan, num_rows)):
+                        for c in range(col_idx, min(col_idx + colspan, max_cols)):
+                            if r > row_idx or c > col_idx:
+                                occupied[r][c] = True
+
+                    # Add SPAN command if cell spans multiple rows/cols
+                    if colspan > 1 or rowspan > 1:
+                        span_end_col = min(col_idx + colspan - 1, max_cols - 1)
+                        span_end_row = min(row_idx + rowspan - 1, num_rows - 1)
+                        span_commands.append(('SPAN', (col_idx, row_idx), (span_end_col, span_end_row)))
+
+                    col_idx += colspan
+
                 reportlab_data.append(row_data)
 
             # Calculate column widths (equal distribution)
             col_widths = [table_width / max_cols] * max_cols
 
             # Create ReportLab Table
-            # Use smaller font size to fit in bbox
-            font_size = min(table_height / len(rows) * 0.5, 10)
-            font_size = max(font_size, 6)
+            # Use smaller font to fit content with auto-wrap
+            font_size = 8  # Fixed reasonable font size for table content
 
-            # Create table with font
+            # Create paragraph style for text wrapping in cells
+            cell_style = ParagraphStyle(
+                'CellStyle',
+                fontName=self.font_name if self.font_registered else 'Helvetica',
+                fontSize=font_size,
+                leading=font_size * 1.2,
+                alignment=TA_CENTER,
+                wordWrap='CJK',  # Better wrapping for Chinese text
+            )
+
+            # Convert text to Paragraph objects for auto-wrapping
+            for row_idx, row_data in enumerate(reportlab_data):
+                for col_idx, cell_text in enumerate(row_data):
+                    if cell_text:
+                        # Escape HTML special characters and create Paragraph
+                        escaped_text = cell_text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+                        reportlab_data[row_idx][col_idx] = Paragraph(escaped_text, cell_style)
+
+            # Create table WITHOUT fixed row heights - let it auto-size based on content
             table = Table(reportlab_data, colWidths=col_widths)
 
             # Apply table style
@@ -1640,13 +1810,36 @@ class PDFGeneratorService:
                 style.add('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey)
                 style.add('FONT', (0, 0), (-1, 0), self.font_name if self.font_registered else 'Helvetica-Bold', font_size)
 
+            # Add span commands for merged cells
+            for span_cmd in span_commands:
+                style.add(*span_cmd)
+
             table.setStyle(style)
 
-            # Calculate table size
-            table.wrapOn(pdf_canvas, table_width, table_height)
+            logger.info(f"[表格] 套用 {len(span_commands)} 個合併儲存格 (SPAN)")
 
-            # Draw table at position
-            table.drawOn(pdf_canvas, pdf_x, pdf_y)
+            # Calculate actual table size after wrapping
+            actual_width, actual_height = table.wrapOn(pdf_canvas, table_width, table_height)
+
+            logger.info(f"[表格] 目標尺寸: {table_width:.0f}x{table_height:.0f}, 實際尺寸: {actual_width:.0f}x{actual_height:.0f}")
+
+            # Scale table to fit bbox if it exceeds the target size
+            scale_x = table_width / actual_width if actual_width > table_width else 1.0
+            scale_y = table_height / actual_height if actual_height > table_height else 1.0
+            scale_factor = min(scale_x, scale_y)  # Use smaller scale to fit both dimensions
+
+            if scale_factor < 1.0:
+                logger.info(f"[表格] 縮放比例: {scale_factor:.2f} (需要縮小以適應 bbox)")
+                # Apply scaling transformation
+                pdf_canvas.saveState()
+                pdf_canvas.translate(pdf_x, pdf_y)
+                pdf_canvas.scale(scale_factor, scale_factor)
+                # Draw at origin since we've already translated
+                table.drawOn(pdf_canvas, 0, 0)
+                pdf_canvas.restoreState()
+            else:
+                # Draw table at position without scaling
+                table.drawOn(pdf_canvas, pdf_x, pdf_y)
 
             logger.info(f"Drew table at ({pdf_x:.0f}, {pdf_y:.0f}) size {table_width:.0f}x{table_height:.0f} with {len(rows)} rows")
 
@@ -1696,17 +1889,43 @@ class PDFGeneratorService:
 
             # Get bbox for positioning
             bbox = region.get('bbox', [])
-            if not bbox or len(bbox) < 4:
-                # If no bbox, skip for now
+            if not bbox:
                 logger.warning(f"No bbox for image {image_path_str}")
                 return
 
-            # bbox from OCR: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
-            # OCR coordinates: origin (0,0) at top-left, Y increases downward
-            ocr_x_left_raw = bbox[0][0]
-            ocr_y_top_raw = bbox[0][1]
-            ocr_x_right_raw = bbox[2][0]
-            ocr_y_bottom_raw = bbox[2][1]
+            # Handle different bbox formats
+            if isinstance(bbox, dict):
+                # Dict format from UnifiedDocument: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
+                if 'x0' in bbox and 'y0' in bbox and 'x1' in bbox and 'y1' in bbox:
+                    ocr_x_left_raw = float(bbox['x0'])
+                    ocr_y_top_raw = float(bbox['y0'])
+                    ocr_x_right_raw = float(bbox['x1'])
+                    ocr_y_bottom_raw = float(bbox['y1'])
+                else:
+                    logger.warning(f"Dict bbox missing required keys for image: {bbox}")
+                    return
+            elif isinstance(bbox, list):
+                if len(bbox) < 4:
+                    logger.warning(f"List bbox too short for image: {bbox}")
+                    return
+                # Polygon format [[x,y], [x,y], [x,y], [x,y]]
+                if isinstance(bbox[0], list):
+                    ocr_x_left_raw = bbox[0][0]
+                    ocr_y_top_raw = bbox[0][1]
+                    ocr_x_right_raw = bbox[2][0]
+                    ocr_y_bottom_raw = bbox[2][1]
+                # Simple list format [x0, y0, x1, y1]
+                elif isinstance(bbox[0], (int, float)):
+                    ocr_x_left_raw = bbox[0]
+                    ocr_y_top_raw = bbox[1]
+                    ocr_x_right_raw = bbox[2]
+                    ocr_y_bottom_raw = bbox[3]
+                else:
+                    logger.warning(f"Unexpected bbox list format for image: {bbox}")
+                    return
+            else:
+                logger.warning(f"Invalid bbox format for image: {bbox}")
+                return
 
             logger.info(f"[圖片] '{image_path_str}' OCR原始座標: L={ocr_x_left_raw:.0f}, T={ocr_y_top_raw:.0f}, R={ocr_x_right_raw:.0f}, B={ocr_y_bottom_raw:.0f}")