From 6252be6c6f9617d654876c09c9029f426bd7c4c8 Mon Sep 17 00:00:00 2001 From: egg Date: Sun, 30 Nov 2025 13:42:48 +0800 Subject: [PATCH] fix: correct scale factor calculation for rotated documents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When rotation is detected, the OCR coordinate system needs to be swapped: - Original OCR dimensions: 1242 x 1755 (portrait image) - Content coordinates: up to x=1593 (exceeds image width, indicates rotation) - Rotated OCR dimensions: 1755 x 1242 (matching content coordinate system) Previously, page_dimensions was incorrectly set to target PDF dimensions, causing scale factors to be ~1.0 instead of ~0.48. Now correctly: - original_page_sizes[0] = target PDF dimensions (842.4 x 595.68) - page_dimensions[0] = swapped OCR dimensions (1755 x 1242) - Scale = 842.4/1755 ≈ 0.48 for both x and y 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- backend/app/services/pdf_generator_service.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py index 611114e..9e39643 100644 --- a/backend/app/services/pdf_generator_service.py +++ b/backend/app/services/pdf_generator_service.py @@ -1089,15 +1089,23 @@ class PDFGeneratorService: logger.info(f"頁面尺寸調整: {target_width:.1f}x{target_height:.1f} -> " f"{adjusted_width:.1f}x{adjusted_height:.1f} (旋轉={needs_rotation})") target_width, target_height = adjusted_width, adjusted_height - # Update BOTH page_dimensions AND original_page_sizes for consistency - # This ensures per-page processing uses the adjusted dimensions - if 0 in page_dimensions: - page_dimensions[0] = {'width': target_width, 'height': target_height} + + # Update original_page_sizes with the new TARGET dimensions if 0 in original_page_sizes: - # Override original file dimensions with content-based dimensions original_page_sizes[0] = (target_width, target_height) logger.info(f"覆蓋原始文件尺寸以適應內容方向") + # CRITICAL: Update page_dimensions with SWAPPED OCR dimensions + # This is the coordinate system that the content bboxes are in + # When content is rotated, width and height are effectively swapped + if needs_rotation and 0 in page_dimensions: + # Swap the OCR dimensions to match the rotated content coordinate system + original_ocr_w = page_dimensions[0]['width'] + original_ocr_h = page_dimensions[0]['height'] + page_dimensions[0] = {'width': original_ocr_h, 'height': original_ocr_w} + logger.info(f"旋轉 OCR 座標系: {original_ocr_w:.1f}x{original_ocr_h:.1f} -> " + f"{original_ocr_h:.1f}x{original_ocr_w:.1f}") + # Create PDF canvas with initial page size (will be updated per page) pdf_canvas = canvas.Canvas(str(output_path), pagesize=(target_width, target_height))