feat: enable document orientation detection for scanned PDFs
- Enable PP-StructureV3's use_doc_orientation_classify feature - Detect rotation angle from doc_preprocessor_res.angle - Swap page dimensions (width <-> height) for 90°/270° rotations - Output PDF now correctly displays landscape-scanned content Also includes: - Archive completed openspec proposals - Add simplify-frontend-ocr-config proposal (pending) - Code cleanup and frontend simplification 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
5
backend/app/utils/__init__.py
Normal file
5
backend/app/utils/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Utility modules for the OCR application."""
|
||||
|
||||
from .bbox_utils import normalize_bbox, get_bbox_center, calculate_ioa
|
||||
|
||||
__all__ = ['normalize_bbox', 'get_bbox_center', 'calculate_ioa']
|
||||
265
backend/app/utils/bbox_utils.py
Normal file
265
backend/app/utils/bbox_utils.py
Normal file
@@ -0,0 +1,265 @@
|
||||
"""
|
||||
Unified bounding box utilities for consistent bbox handling across services.
|
||||
|
||||
Supports multiple bbox formats:
|
||||
- Nested polygon: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
||||
- Flat rectangle: [x0, y0, x1, y1]
|
||||
- Flat polygon: [x1, y1, x2, y2, x3, y3, x4, y4]
|
||||
- Dict format: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
BboxCoords = Tuple[float, float, float, float] # (x0, y0, x1, y1)
|
||||
|
||||
|
||||
def normalize_bbox(
|
||||
bbox: Union[Dict, List, Tuple, None]
|
||||
) -> Optional[BboxCoords]:
|
||||
"""
|
||||
Normalize any bbox format to (x0, y0, x1, y1) tuple.
|
||||
|
||||
Handles:
|
||||
- Nested polygon: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
||||
- Flat rectangle: [x0, y0, x1, y1]
|
||||
- Flat polygon: [x1, y1, x2, y2, x3, y3, x4, y4]
|
||||
- Dict format: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
|
||||
|
||||
Args:
|
||||
bbox: Bounding box in any supported format
|
||||
|
||||
Returns:
|
||||
Normalized (x0, y0, x1, y1) tuple or None if invalid
|
||||
"""
|
||||
if bbox is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Dict format: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
|
||||
if isinstance(bbox, dict):
|
||||
if 'x0' in bbox and 'y0' in bbox and 'x1' in bbox and 'y1' in bbox:
|
||||
return (
|
||||
float(bbox['x0']),
|
||||
float(bbox['y0']),
|
||||
float(bbox['x1']),
|
||||
float(bbox['y1'])
|
||||
)
|
||||
# Alternative dict keys
|
||||
if 'x_min' in bbox or 'y_min' in bbox:
|
||||
return (
|
||||
float(bbox.get('x_min', bbox.get('x0', 0))),
|
||||
float(bbox.get('y_min', bbox.get('y0', 0))),
|
||||
float(bbox.get('x_max', bbox.get('x1', 0))),
|
||||
float(bbox.get('y_max', bbox.get('y1', 0)))
|
||||
)
|
||||
logger.warning(f"Dict bbox missing required fields: {bbox}")
|
||||
return None
|
||||
|
||||
# List/tuple formats
|
||||
if isinstance(bbox, (list, tuple)):
|
||||
if len(bbox) == 0:
|
||||
return None
|
||||
|
||||
# Nested polygon format: [[x1,y1], [x2,y2], ...]
|
||||
if isinstance(bbox[0], (list, tuple)):
|
||||
xs = [pt[0] for pt in bbox if len(pt) >= 2]
|
||||
ys = [pt[1] for pt in bbox if len(pt) >= 2]
|
||||
if xs and ys:
|
||||
return (
|
||||
float(min(xs)),
|
||||
float(min(ys)),
|
||||
float(max(xs)),
|
||||
float(max(ys))
|
||||
)
|
||||
return None
|
||||
|
||||
# Flat rectangle: [x0, y0, x1, y1]
|
||||
if len(bbox) == 4:
|
||||
return (
|
||||
float(bbox[0]),
|
||||
float(bbox[1]),
|
||||
float(bbox[2]),
|
||||
float(bbox[3])
|
||||
)
|
||||
|
||||
# Flat polygon: [x1, y1, x2, y2, x3, y3, x4, y4, ...]
|
||||
if len(bbox) >= 8 and len(bbox) % 2 == 0:
|
||||
xs = [bbox[i] for i in range(0, len(bbox), 2)]
|
||||
ys = [bbox[i] for i in range(1, len(bbox), 2)]
|
||||
return (
|
||||
float(min(xs)),
|
||||
float(min(ys)),
|
||||
float(max(xs)),
|
||||
float(max(ys))
|
||||
)
|
||||
|
||||
logger.warning(f"Unknown bbox format: {type(bbox).__name__}, value: {bbox}")
|
||||
return None
|
||||
|
||||
except (TypeError, ValueError, IndexError) as e:
|
||||
logger.warning(f"Failed to normalize bbox {bbox}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_bbox_center(bbox: Union[Dict, List, Tuple, BboxCoords, None]) -> Optional[Tuple[float, float]]:
|
||||
"""
|
||||
Get the center point of a bounding box.
|
||||
|
||||
Args:
|
||||
bbox: Bounding box in any supported format or already normalized
|
||||
|
||||
Returns:
|
||||
(center_x, center_y) tuple or None if invalid
|
||||
"""
|
||||
# If already normalized tuple
|
||||
if isinstance(bbox, tuple) and len(bbox) == 4:
|
||||
x0, y0, x1, y1 = bbox
|
||||
return ((x0 + x1) / 2, (y0 + y1) / 2)
|
||||
|
||||
# Otherwise normalize first
|
||||
coords = normalize_bbox(bbox)
|
||||
if coords is None:
|
||||
return None
|
||||
|
||||
x0, y0, x1, y1 = coords
|
||||
return ((x0 + x1) / 2, (y0 + y1) / 2)
|
||||
|
||||
|
||||
def get_bbox_area(bbox: Union[Dict, List, Tuple, BboxCoords, None]) -> float:
|
||||
"""
|
||||
Calculate the area of a bounding box.
|
||||
|
||||
Args:
|
||||
bbox: Bounding box in any supported format
|
||||
|
||||
Returns:
|
||||
Area in square pixels, 0 if invalid
|
||||
"""
|
||||
coords = normalize_bbox(bbox) if not (isinstance(bbox, tuple) and len(bbox) == 4) else bbox
|
||||
if coords is None:
|
||||
return 0.0
|
||||
|
||||
x0, y0, x1, y1 = coords
|
||||
return max(0, x1 - x0) * max(0, y1 - y0)
|
||||
|
||||
|
||||
def calculate_ioa(
|
||||
inner_bbox: Union[Dict, List, Tuple, BboxCoords, None],
|
||||
outer_bbox: Union[Dict, List, Tuple, BboxCoords, None]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Intersection over Area (IoA) of inner bbox with respect to outer bbox.
|
||||
|
||||
IoA = intersection_area / inner_area
|
||||
|
||||
Args:
|
||||
inner_bbox: The bbox to check (numerator area)
|
||||
outer_bbox: The reference bbox
|
||||
|
||||
Returns:
|
||||
IoA ratio (0.0 to 1.0), 0.0 if either bbox is invalid
|
||||
"""
|
||||
inner_coords = normalize_bbox(inner_bbox) if not (isinstance(inner_bbox, tuple) and len(inner_bbox) == 4) else inner_bbox
|
||||
outer_coords = normalize_bbox(outer_bbox) if not (isinstance(outer_bbox, tuple) and len(outer_bbox) == 4) else outer_bbox
|
||||
|
||||
if inner_coords is None or outer_coords is None:
|
||||
return 0.0
|
||||
|
||||
inner_x0, inner_y0, inner_x1, inner_y1 = inner_coords
|
||||
outer_x0, outer_y0, outer_x1, outer_y1 = outer_coords
|
||||
|
||||
# Calculate intersection
|
||||
inter_x0 = max(inner_x0, outer_x0)
|
||||
inter_y0 = max(inner_y0, outer_y0)
|
||||
inter_x1 = min(inner_x1, outer_x1)
|
||||
inter_y1 = min(inner_y1, outer_y1)
|
||||
|
||||
if inter_x1 <= inter_x0 or inter_y1 <= inter_y0:
|
||||
return 0.0
|
||||
|
||||
intersection_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0)
|
||||
inner_area = (inner_x1 - inner_x0) * (inner_y1 - inner_y0)
|
||||
|
||||
if inner_area <= 0:
|
||||
return 0.0
|
||||
|
||||
return intersection_area / inner_area
|
||||
|
||||
|
||||
def calculate_iou(
|
||||
bbox1: Union[Dict, List, Tuple, BboxCoords, None],
|
||||
bbox2: Union[Dict, List, Tuple, BboxCoords, None]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Intersection over Union (IoU) of two bounding boxes.
|
||||
|
||||
Args:
|
||||
bbox1: First bounding box
|
||||
bbox2: Second bounding box
|
||||
|
||||
Returns:
|
||||
IoU ratio (0.0 to 1.0), 0.0 if either bbox is invalid
|
||||
"""
|
||||
coords1 = normalize_bbox(bbox1) if not (isinstance(bbox1, tuple) and len(bbox1) == 4) else bbox1
|
||||
coords2 = normalize_bbox(bbox2) if not (isinstance(bbox2, tuple) and len(bbox2) == 4) else bbox2
|
||||
|
||||
if coords1 is None or coords2 is None:
|
||||
return 0.0
|
||||
|
||||
x0_1, y0_1, x1_1, y1_1 = coords1
|
||||
x0_2, y0_2, x1_2, y1_2 = coords2
|
||||
|
||||
# Calculate intersection
|
||||
inter_x0 = max(x0_1, x0_2)
|
||||
inter_y0 = max(y0_1, y0_2)
|
||||
inter_x1 = min(x1_1, x1_2)
|
||||
inter_y1 = min(y1_1, y1_2)
|
||||
|
||||
if inter_x1 <= inter_x0 or inter_y1 <= inter_y0:
|
||||
return 0.0
|
||||
|
||||
intersection_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0)
|
||||
area1 = (x1_1 - x0_1) * (y1_1 - y0_1)
|
||||
area2 = (x1_2 - x0_2) * (y1_2 - y0_2)
|
||||
union_area = area1 + area2 - intersection_area
|
||||
|
||||
if union_area <= 0:
|
||||
return 0.0
|
||||
|
||||
return intersection_area / union_area
|
||||
|
||||
|
||||
def is_bbox_inside(
|
||||
inner_bbox: Union[Dict, List, Tuple, BboxCoords, None],
|
||||
outer_bbox: Union[Dict, List, Tuple, BboxCoords, None],
|
||||
tolerance: float = 0.0
|
||||
) -> bool:
|
||||
"""
|
||||
Check if inner_bbox is completely inside outer_bbox (with optional tolerance).
|
||||
|
||||
Args:
|
||||
inner_bbox: The bbox to check
|
||||
outer_bbox: The containing bbox
|
||||
tolerance: Allowed overflow in pixels
|
||||
|
||||
Returns:
|
||||
True if inner is inside outer (within tolerance)
|
||||
"""
|
||||
inner_coords = normalize_bbox(inner_bbox) if not (isinstance(inner_bbox, tuple) and len(inner_bbox) == 4) else inner_bbox
|
||||
outer_coords = normalize_bbox(outer_bbox) if not (isinstance(outer_bbox, tuple) and len(outer_bbox) == 4) else outer_bbox
|
||||
|
||||
if inner_coords is None or outer_coords is None:
|
||||
return False
|
||||
|
||||
inner_x0, inner_y0, inner_x1, inner_y1 = inner_coords
|
||||
outer_x0, outer_y0, outer_x1, outer_y1 = outer_coords
|
||||
|
||||
return (
|
||||
inner_x0 >= outer_x0 - tolerance and
|
||||
inner_y0 >= outer_y0 - tolerance and
|
||||
inner_x1 <= outer_x1 + tolerance and
|
||||
inner_y1 <= outer_y1 + tolerance
|
||||
)
|
||||
Reference in New Issue
Block a user