diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py index 8fe1137..611114e 100644 --- a/backend/app/services/pdf_generator_service.py +++ b/backend/app/services/pdf_generator_service.py @@ -1064,17 +1064,39 @@ class PDFGeneratorService: # Step 4: Detect content orientation mismatch # This handles rotated scans where content bbox exceeds page dimensions - needs_rotation, adjusted_width, adjusted_height = self._detect_content_orientation( - target_width, target_height, ocr_data + # IMPORTANT: Use OCR dimensions (pixels) for detection, not PDF points + # because content bboxes are in the same coordinate system as OCR dimensions + needs_rotation, adjusted_ocr_width, adjusted_ocr_height = self._detect_content_orientation( + ocr_width, ocr_height, ocr_data ) + # If orientation change detected, calculate the adjusted target dimensions + if needs_rotation: + # Swap target dimensions to match the detected orientation + adjusted_width = target_height + adjusted_height = target_width + elif adjusted_ocr_width != ocr_width or adjusted_ocr_height != ocr_height: + # Content extends beyond OCR dimensions, scale target proportionally + scale_w = adjusted_ocr_width / ocr_width if ocr_width > 0 else 1.0 + scale_h = adjusted_ocr_height / ocr_height if ocr_height > 0 else 1.0 + adjusted_width = target_width * scale_w + adjusted_height = target_height * scale_h + else: + adjusted_width = target_width + adjusted_height = target_height + if needs_rotation or (adjusted_width != target_width or adjusted_height != target_height): logger.info(f"頁面尺寸調整: {target_width:.1f}x{target_height:.1f} -> " f"{adjusted_width:.1f}x{adjusted_height:.1f} (旋轉={needs_rotation})") target_width, target_height = adjusted_width, adjusted_height - # Also update page_dimensions for consistency in per-page processing + # Update BOTH page_dimensions AND original_page_sizes for consistency + # This ensures per-page processing uses the adjusted dimensions if 0 in page_dimensions: page_dimensions[0] = {'width': target_width, 'height': target_height} + if 0 in original_page_sizes: + # Override original file dimensions with content-based dimensions + original_page_sizes[0] = (target_width, target_height) + logger.info(f"覆蓋原始文件尺寸以適應內容方向") # Create PDF canvas with initial page size (will be updated per page) pdf_canvas = canvas.Canvas(str(output_path), pagesize=(target_width, target_height))