From 6252be6c6f9617d654876c09c9029f426bd7c4c8 Mon Sep 17 00:00:00 2001
From: egg <lin4637lin4637@gmail.com>
Date: Sun, 30 Nov 2025 13:42:48 +0800
Subject: [PATCH] fix: correct scale factor calculation for rotated documents
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When rotation is detected, the OCR coordinate system needs to be swapped:
- Original OCR dimensions: 1242 x 1755 (portrait image)
- Content coordinates: up to x=1593 (exceeds image width, indicates rotation)
- Rotated OCR dimensions: 1755 x 1242 (matching content coordinate system)

Previously, page_dimensions was incorrectly set to target PDF dimensions,
causing scale factors to be ~1.0 instead of ~0.48.

Now correctly:
- original_page_sizes[0] = target PDF dimensions (842.4 x 595.68)
- page_dimensions[0] = swapped OCR dimensions (1755 x 1242)
- Scale = 842.4/1755 ≈ 0.48 for both x and y

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 backend/app/services/pdf_generator_service.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/backend/app/services/pdf_generator_service.py b/backend/app/services/pdf_generator_service.py
index 611114e..9e39643 100644
--- a/backend/app/services/pdf_generator_service.py
+++ b/backend/app/services/pdf_generator_service.py
@@ -1089,15 +1089,23 @@ class PDFGeneratorService:
                 logger.info(f"頁面尺寸調整: {target_width:.1f}x{target_height:.1f} -> "
                            f"{adjusted_width:.1f}x{adjusted_height:.1f} (旋轉={needs_rotation})")
                 target_width, target_height = adjusted_width, adjusted_height
-                # Update BOTH page_dimensions AND original_page_sizes for consistency
-                # This ensures per-page processing uses the adjusted dimensions
-                if 0 in page_dimensions:
-                    page_dimensions[0] = {'width': target_width, 'height': target_height}
+
+                # Update original_page_sizes with the new TARGET dimensions
                 if 0 in original_page_sizes:
-                    # Override original file dimensions with content-based dimensions
                     original_page_sizes[0] = (target_width, target_height)
                     logger.info(f"覆蓋原始文件尺寸以適應內容方向")
 
+                # CRITICAL: Update page_dimensions with SWAPPED OCR dimensions
+                # This is the coordinate system that the content bboxes are in
+                # When content is rotated, width and height are effectively swapped
+                if needs_rotation and 0 in page_dimensions:
+                    # Swap the OCR dimensions to match the rotated content coordinate system
+                    original_ocr_w = page_dimensions[0]['width']
+                    original_ocr_h = page_dimensions[0]['height']
+                    page_dimensions[0] = {'width': original_ocr_h, 'height': original_ocr_w}
+                    logger.info(f"旋轉 OCR 座標系: {original_ocr_w:.1f}x{original_ocr_h:.1f} -> "
+                               f"{original_ocr_h:.1f}x{original_ocr_w:.1f}")
+
             # Create PDF canvas with initial page size (will be updated per page)
             pdf_canvas = canvas.Canvas(str(output_path), pagesize=(target_width, target_height))