fix: correct orientation detection to use OCR pixel coordinates
Fixed two issues in PDF orientation detection: 1. Unit mismatch: Orientation detection was comparing content bboxes (in pixels) against PDF page dimensions (in points). Now correctly uses OCR dimensions (pixels) for detection. 2. Priority override: When orientation adjustment is needed, now also updates original_page_sizes dict so per-page processing uses the adjusted dimensions instead of the original PDF dimensions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1064,17 +1064,39 @@ class PDFGeneratorService:
|
|||||||
|
|
||||||
# Step 4: Detect content orientation mismatch
|
# Step 4: Detect content orientation mismatch
|
||||||
# This handles rotated scans where content bbox exceeds page dimensions
|
# This handles rotated scans where content bbox exceeds page dimensions
|
||||||
needs_rotation, adjusted_width, adjusted_height = self._detect_content_orientation(
|
# IMPORTANT: Use OCR dimensions (pixels) for detection, not PDF points
|
||||||
target_width, target_height, ocr_data
|
# because content bboxes are in the same coordinate system as OCR dimensions
|
||||||
|
needs_rotation, adjusted_ocr_width, adjusted_ocr_height = self._detect_content_orientation(
|
||||||
|
ocr_width, ocr_height, ocr_data
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# If orientation change detected, calculate the adjusted target dimensions
|
||||||
|
if needs_rotation:
|
||||||
|
# Swap target dimensions to match the detected orientation
|
||||||
|
adjusted_width = target_height
|
||||||
|
adjusted_height = target_width
|
||||||
|
elif adjusted_ocr_width != ocr_width or adjusted_ocr_height != ocr_height:
|
||||||
|
# Content extends beyond OCR dimensions, scale target proportionally
|
||||||
|
scale_w = adjusted_ocr_width / ocr_width if ocr_width > 0 else 1.0
|
||||||
|
scale_h = adjusted_ocr_height / ocr_height if ocr_height > 0 else 1.0
|
||||||
|
adjusted_width = target_width * scale_w
|
||||||
|
adjusted_height = target_height * scale_h
|
||||||
|
else:
|
||||||
|
adjusted_width = target_width
|
||||||
|
adjusted_height = target_height
|
||||||
|
|
||||||
if needs_rotation or (adjusted_width != target_width or adjusted_height != target_height):
|
if needs_rotation or (adjusted_width != target_width or adjusted_height != target_height):
|
||||||
logger.info(f"頁面尺寸調整: {target_width:.1f}x{target_height:.1f} -> "
|
logger.info(f"頁面尺寸調整: {target_width:.1f}x{target_height:.1f} -> "
|
||||||
f"{adjusted_width:.1f}x{adjusted_height:.1f} (旋轉={needs_rotation})")
|
f"{adjusted_width:.1f}x{adjusted_height:.1f} (旋轉={needs_rotation})")
|
||||||
target_width, target_height = adjusted_width, adjusted_height
|
target_width, target_height = adjusted_width, adjusted_height
|
||||||
# Also update page_dimensions for consistency in per-page processing
|
# Update BOTH page_dimensions AND original_page_sizes for consistency
|
||||||
|
# This ensures per-page processing uses the adjusted dimensions
|
||||||
if 0 in page_dimensions:
|
if 0 in page_dimensions:
|
||||||
page_dimensions[0] = {'width': target_width, 'height': target_height}
|
page_dimensions[0] = {'width': target_width, 'height': target_height}
|
||||||
|
if 0 in original_page_sizes:
|
||||||
|
# Override original file dimensions with content-based dimensions
|
||||||
|
original_page_sizes[0] = (target_width, target_height)
|
||||||
|
logger.info(f"覆蓋原始文件尺寸以適應內容方向")
|
||||||
|
|
||||||
# Create PDF canvas with initial page size (will be updated per page)
|
# Create PDF canvas with initial page size (will be updated per page)
|
||||||
pdf_canvas = canvas.Canvas(str(output_path), pagesize=(target_width, target_height))
|
pdf_canvas = canvas.Canvas(str(output_path), pagesize=(target_width, target_height))
|
||||||
|
|||||||
Reference in New Issue
Block a user