feat: enable document orientation detection for scanned PDFs

- Enable PP-StructureV3's use_doc_orientation_classify feature
- Detect rotation angle from doc_preprocessor_res.angle
- Swap page dimensions (width <-> height) for 90°/270° rotations
- Output PDF now correctly displays landscape-scanned content

Also includes:
- Archive completed openspec proposals
- Add simplify-frontend-ocr-config proposal (pending)
- Code cleanup and frontend simplification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-11 17:13:46 +08:00
parent 57070af307
commit cfe65158a3
58 changed files with 1271 additions and 3048 deletions

View File

@@ -16,6 +16,7 @@ from app.models.unified_document import (
DocumentElement, BoundingBox, ElementType, Dimensions
)
from app.core.config import settings
from app.utils.bbox_utils import normalize_bbox as _normalize_bbox
logger = logging.getLogger(__name__)
@@ -49,32 +50,9 @@ class TextRegion:
@property
def normalized_bbox(self) -> Tuple[float, float, float, float]:
"""Get normalized bbox as (x0, y0, x1, y1)."""
if not self.bbox:
return (0, 0, 0, 0)
# Check if bbox is nested list format [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
# This is common PaddleOCR polygon format
if len(self.bbox) >= 1 and isinstance(self.bbox[0], (list, tuple)):
# Nested format: extract all x and y coordinates
xs = [pt[0] for pt in self.bbox if len(pt) >= 2]
ys = [pt[1] for pt in self.bbox if len(pt) >= 2]
if xs and ys:
return (min(xs), min(ys), max(xs), max(ys))
return (0, 0, 0, 0)
# Flat format
if len(self.bbox) == 4:
# Simple [x0, y0, x1, y1] format
return (float(self.bbox[0]), float(self.bbox[1]),
float(self.bbox[2]), float(self.bbox[3]))
elif len(self.bbox) >= 8:
# Flat polygon format: [x1, y1, x2, y2, x3, y3, x4, y4]
xs = [self.bbox[i] for i in range(0, len(self.bbox), 2)]
ys = [self.bbox[i] for i in range(1, len(self.bbox), 2)]
return (min(xs), min(ys), max(xs), max(ys))
return (0, 0, 0, 0)
"""Get normalized bbox as (x0, y0, x1, y1). Uses shared bbox utility."""
result = _normalize_bbox(self.bbox)
return result if result else (0, 0, 0, 0)
@property
def center(self) -> Tuple[float, float]:
@@ -171,10 +149,6 @@ class GapFillingService:
settings, 'gap_filling_enabled', True
)
# Legacy compatibility
self.iou_threshold = getattr(settings, 'gap_filling_iou_threshold', 0.15)
self.dedup_iou_threshold = getattr(settings, 'gap_filling_dedup_iou_threshold', 0.5)
def should_activate(
self,
raw_ocr_regions: List[TextRegion],