feat: enable document orientation detection for scanned PDFs
- Enable PP-StructureV3's use_doc_orientation_classify feature - Detect rotation angle from doc_preprocessor_res.angle - Swap page dimensions (width <-> height) for 90°/270° rotations - Output PDF now correctly displays landscape-scanned content Also includes: - Archive completed openspec proposals - Add simplify-frontend-ocr-config proposal (pending) - Code cleanup and frontend simplification 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,8 @@ from typing import Dict, List, Optional, Set, Tuple
|
||||
from reportlab.pdfgen import canvas
|
||||
from reportlab.lib.colors import black
|
||||
|
||||
from app.utils.bbox_utils import normalize_bbox
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -162,6 +164,7 @@ class TextRegionRenderer:
|
||||
def get_bbox_as_rect(self, bbox: List[List[float]]) -> Tuple[float, float, float, float]:
|
||||
"""
|
||||
Convert quadrilateral bbox to axis-aligned rectangle (x0, y0, x1, y1).
|
||||
Uses shared bbox utility.
|
||||
|
||||
Args:
|
||||
bbox: List of 4 [x, y] coordinate pairs
|
||||
@@ -169,12 +172,8 @@ class TextRegionRenderer:
|
||||
Returns:
|
||||
Tuple of (x0, y0, x1, y1) - min/max coordinates
|
||||
"""
|
||||
if len(bbox) < 4:
|
||||
return (0.0, 0.0, 0.0, 0.0)
|
||||
|
||||
x_coords = [p[0] for p in bbox]
|
||||
y_coords = [p[1] for p in bbox]
|
||||
return (min(x_coords), min(y_coords), max(x_coords), max(y_coords))
|
||||
result = normalize_bbox(bbox)
|
||||
return result if result else (0.0, 0.0, 0.0, 0.0)
|
||||
|
||||
def get_bbox_left_baseline(
|
||||
self,
|
||||
@@ -646,19 +645,26 @@ def load_raw_ocr_regions(result_dir: str, task_id: str, page_num: int) -> List[D
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
||||
# Construct filename pattern
|
||||
filename = f"{task_id}_edit_page_{page_num}_raw_ocr_regions.json"
|
||||
file_path = Path(result_dir) / filename
|
||||
result_path = Path(result_dir)
|
||||
|
||||
if not file_path.exists():
|
||||
logger.warning(f"Raw OCR regions file not found: {file_path}")
|
||||
return []
|
||||
# Use glob pattern to find raw OCR regions file
|
||||
# Filename format: {task_id}_{original_filename}_page_{page_num}_raw_ocr_regions.json
|
||||
# The original_filename varies based on uploaded file (e.g., scan, document, etc.)
|
||||
glob_pattern = f"{task_id}_*_page_{page_num}_raw_ocr_regions.json"
|
||||
matching_files = list(result_path.glob(glob_pattern))
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
regions = json.load(f)
|
||||
logger.info(f"Loaded {len(regions)} raw OCR regions from {filename}")
|
||||
return regions
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load raw OCR regions: {e}")
|
||||
return []
|
||||
if matching_files:
|
||||
# Use the first matching file (there should only be one per page)
|
||||
file_path = matching_files[0]
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
regions = json.load(f)
|
||||
logger.info(f"Loaded {len(regions)} raw OCR regions from {file_path.name}")
|
||||
return regions
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load raw OCR regions from {file_path}: {e}")
|
||||
return []
|
||||
|
||||
logger.warning(f"Raw OCR regions file not found for task {task_id} page {page_num}. "
|
||||
f"Glob pattern: {glob_pattern}")
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user