feat: enable document orientation detection for scanned PDFs

- Enable PP-StructureV3's use_doc_orientation_classify feature
- Detect rotation angle from doc_preprocessor_res.angle
- Swap page dimensions (width <-> height) for 90°/270° rotations
- Output PDF now correctly displays landscape-scanned content

Also includes:
- Archive completed openspec proposals
- Add simplify-frontend-ocr-config proposal (pending)
- Code cleanup and frontend simplification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-11 17:13:46 +08:00
parent 57070af307
commit cfe65158a3
58 changed files with 1271 additions and 3048 deletions

View File

@@ -0,0 +1,5 @@
"""Utility modules for the OCR application."""
from .bbox_utils import normalize_bbox, get_bbox_center, calculate_ioa
__all__ = ['normalize_bbox', 'get_bbox_center', 'calculate_ioa']

View File

@@ -0,0 +1,265 @@
"""
Unified bounding box utilities for consistent bbox handling across services.
Supports multiple bbox formats:
- Nested polygon: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
- Flat rectangle: [x0, y0, x1, y1]
- Flat polygon: [x1, y1, x2, y2, x3, y3, x4, y4]
- Dict format: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
"""
import logging
from typing import Any, Dict, List, Optional, Tuple, Union
logger = logging.getLogger(__name__)
BboxCoords = Tuple[float, float, float, float] # (x0, y0, x1, y1)
def normalize_bbox(
bbox: Union[Dict, List, Tuple, None]
) -> Optional[BboxCoords]:
"""
Normalize any bbox format to (x0, y0, x1, y1) tuple.
Handles:
- Nested polygon: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
- Flat rectangle: [x0, y0, x1, y1]
- Flat polygon: [x1, y1, x2, y2, x3, y3, x4, y4]
- Dict format: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
Args:
bbox: Bounding box in any supported format
Returns:
Normalized (x0, y0, x1, y1) tuple or None if invalid
"""
if bbox is None:
return None
try:
# Dict format: {"x0": ..., "y0": ..., "x1": ..., "y1": ...}
if isinstance(bbox, dict):
if 'x0' in bbox and 'y0' in bbox and 'x1' in bbox and 'y1' in bbox:
return (
float(bbox['x0']),
float(bbox['y0']),
float(bbox['x1']),
float(bbox['y1'])
)
# Alternative dict keys
if 'x_min' in bbox or 'y_min' in bbox:
return (
float(bbox.get('x_min', bbox.get('x0', 0))),
float(bbox.get('y_min', bbox.get('y0', 0))),
float(bbox.get('x_max', bbox.get('x1', 0))),
float(bbox.get('y_max', bbox.get('y1', 0)))
)
logger.warning(f"Dict bbox missing required fields: {bbox}")
return None
# List/tuple formats
if isinstance(bbox, (list, tuple)):
if len(bbox) == 0:
return None
# Nested polygon format: [[x1,y1], [x2,y2], ...]
if isinstance(bbox[0], (list, tuple)):
xs = [pt[0] for pt in bbox if len(pt) >= 2]
ys = [pt[1] for pt in bbox if len(pt) >= 2]
if xs and ys:
return (
float(min(xs)),
float(min(ys)),
float(max(xs)),
float(max(ys))
)
return None
# Flat rectangle: [x0, y0, x1, y1]
if len(bbox) == 4:
return (
float(bbox[0]),
float(bbox[1]),
float(bbox[2]),
float(bbox[3])
)
# Flat polygon: [x1, y1, x2, y2, x3, y3, x4, y4, ...]
if len(bbox) >= 8 and len(bbox) % 2 == 0:
xs = [bbox[i] for i in range(0, len(bbox), 2)]
ys = [bbox[i] for i in range(1, len(bbox), 2)]
return (
float(min(xs)),
float(min(ys)),
float(max(xs)),
float(max(ys))
)
logger.warning(f"Unknown bbox format: {type(bbox).__name__}, value: {bbox}")
return None
except (TypeError, ValueError, IndexError) as e:
logger.warning(f"Failed to normalize bbox {bbox}: {e}")
return None
def get_bbox_center(bbox: Union[Dict, List, Tuple, BboxCoords, None]) -> Optional[Tuple[float, float]]:
"""
Get the center point of a bounding box.
Args:
bbox: Bounding box in any supported format or already normalized
Returns:
(center_x, center_y) tuple or None if invalid
"""
# If already normalized tuple
if isinstance(bbox, tuple) and len(bbox) == 4:
x0, y0, x1, y1 = bbox
return ((x0 + x1) / 2, (y0 + y1) / 2)
# Otherwise normalize first
coords = normalize_bbox(bbox)
if coords is None:
return None
x0, y0, x1, y1 = coords
return ((x0 + x1) / 2, (y0 + y1) / 2)
def get_bbox_area(bbox: Union[Dict, List, Tuple, BboxCoords, None]) -> float:
"""
Calculate the area of a bounding box.
Args:
bbox: Bounding box in any supported format
Returns:
Area in square pixels, 0 if invalid
"""
coords = normalize_bbox(bbox) if not (isinstance(bbox, tuple) and len(bbox) == 4) else bbox
if coords is None:
return 0.0
x0, y0, x1, y1 = coords
return max(0, x1 - x0) * max(0, y1 - y0)
def calculate_ioa(
inner_bbox: Union[Dict, List, Tuple, BboxCoords, None],
outer_bbox: Union[Dict, List, Tuple, BboxCoords, None]
) -> float:
"""
Calculate Intersection over Area (IoA) of inner bbox with respect to outer bbox.
IoA = intersection_area / inner_area
Args:
inner_bbox: The bbox to check (numerator area)
outer_bbox: The reference bbox
Returns:
IoA ratio (0.0 to 1.0), 0.0 if either bbox is invalid
"""
inner_coords = normalize_bbox(inner_bbox) if not (isinstance(inner_bbox, tuple) and len(inner_bbox) == 4) else inner_bbox
outer_coords = normalize_bbox(outer_bbox) if not (isinstance(outer_bbox, tuple) and len(outer_bbox) == 4) else outer_bbox
if inner_coords is None or outer_coords is None:
return 0.0
inner_x0, inner_y0, inner_x1, inner_y1 = inner_coords
outer_x0, outer_y0, outer_x1, outer_y1 = outer_coords
# Calculate intersection
inter_x0 = max(inner_x0, outer_x0)
inter_y0 = max(inner_y0, outer_y0)
inter_x1 = min(inner_x1, outer_x1)
inter_y1 = min(inner_y1, outer_y1)
if inter_x1 <= inter_x0 or inter_y1 <= inter_y0:
return 0.0
intersection_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0)
inner_area = (inner_x1 - inner_x0) * (inner_y1 - inner_y0)
if inner_area <= 0:
return 0.0
return intersection_area / inner_area
def calculate_iou(
bbox1: Union[Dict, List, Tuple, BboxCoords, None],
bbox2: Union[Dict, List, Tuple, BboxCoords, None]
) -> float:
"""
Calculate Intersection over Union (IoU) of two bounding boxes.
Args:
bbox1: First bounding box
bbox2: Second bounding box
Returns:
IoU ratio (0.0 to 1.0), 0.0 if either bbox is invalid
"""
coords1 = normalize_bbox(bbox1) if not (isinstance(bbox1, tuple) and len(bbox1) == 4) else bbox1
coords2 = normalize_bbox(bbox2) if not (isinstance(bbox2, tuple) and len(bbox2) == 4) else bbox2
if coords1 is None or coords2 is None:
return 0.0
x0_1, y0_1, x1_1, y1_1 = coords1
x0_2, y0_2, x1_2, y1_2 = coords2
# Calculate intersection
inter_x0 = max(x0_1, x0_2)
inter_y0 = max(y0_1, y0_2)
inter_x1 = min(x1_1, x1_2)
inter_y1 = min(y1_1, y1_2)
if inter_x1 <= inter_x0 or inter_y1 <= inter_y0:
return 0.0
intersection_area = (inter_x1 - inter_x0) * (inter_y1 - inter_y0)
area1 = (x1_1 - x0_1) * (y1_1 - y0_1)
area2 = (x1_2 - x0_2) * (y1_2 - y0_2)
union_area = area1 + area2 - intersection_area
if union_area <= 0:
return 0.0
return intersection_area / union_area
def is_bbox_inside(
inner_bbox: Union[Dict, List, Tuple, BboxCoords, None],
outer_bbox: Union[Dict, List, Tuple, BboxCoords, None],
tolerance: float = 0.0
) -> bool:
"""
Check if inner_bbox is completely inside outer_bbox (with optional tolerance).
Args:
inner_bbox: The bbox to check
outer_bbox: The containing bbox
tolerance: Allowed overflow in pixels
Returns:
True if inner is inside outer (within tolerance)
"""
inner_coords = normalize_bbox(inner_bbox) if not (isinstance(inner_bbox, tuple) and len(inner_bbox) == 4) else inner_bbox
outer_coords = normalize_bbox(outer_bbox) if not (isinstance(outer_bbox, tuple) and len(outer_bbox) == 4) else outer_bbox
if inner_coords is None or outer_coords is None:
return False
inner_x0, inner_y0, inner_x1, inner_y1 = inner_coords
outer_x0, outer_y0, outer_x1, outer_y1 = outer_coords
return (
inner_x0 >= outer_x0 - tolerance and
inner_y0 >= outer_y0 - tolerance and
inner_x1 <= outer_x1 + tolerance and
inner_y1 <= outer_y1 + tolerance
)