feat: enable document orientation detection for scanned PDFs
- Enable PP-StructureV3's use_doc_orientation_classify feature - Detect rotation angle from doc_preprocessor_res.angle - Swap page dimensions (width <-> height) for 90°/270° rotations - Output PDF now correctly displays landscape-scanned content Also includes: - Archive completed openspec proposals - Add simplify-frontend-ocr-config proposal (pending) - Code cleanup and frontend simplification 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -10,6 +10,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
import gc # For garbage collection
|
||||
import warnings # For suppressing PaddleX deprecation warnings
|
||||
|
||||
from paddleocr import PaddleOCR, PPStructureV3
|
||||
from PIL import Image
|
||||
@@ -34,7 +35,21 @@ from app.services.layout_preprocessing_service import (
|
||||
get_layout_preprocessing_service,
|
||||
LayoutPreprocessingService,
|
||||
)
|
||||
from app.schemas.task import PreprocessingModeEnum, PreprocessingConfig, TableDetectionConfig
|
||||
from app.schemas.task import PreprocessingModeEnum, PreprocessingConfig
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TableDetectionConfig:
|
||||
"""Internal table detection configuration for OCR service.
|
||||
|
||||
Note: This was previously in app.schemas.task but is now internal to OCR service
|
||||
as frontend no longer configures these options.
|
||||
"""
|
||||
enable_wired_table: bool = True
|
||||
enable_wireless_table: bool = True
|
||||
enable_region_detection: bool = True
|
||||
|
||||
|
||||
# Import dual-track components
|
||||
try:
|
||||
@@ -798,7 +813,12 @@ class OCRService:
|
||||
if textline_ori_model:
|
||||
pp_kwargs['textline_orientation_model_name'] = textline_ori_model
|
||||
|
||||
self.structure_engine = PPStructureV3(**pp_kwargs)
|
||||
# Suppress DeprecationWarning during PPStructureV3 initialization
|
||||
# Workaround for PaddleX bug: it incorrectly treats Python's datetime.utcnow()
|
||||
# deprecation warning as a model loading error in PP-Chart2Table
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
||||
self.structure_engine = PPStructureV3(**pp_kwargs)
|
||||
|
||||
# Track model loading for cache management
|
||||
self._model_last_used['structure'] = datetime.now()
|
||||
@@ -881,7 +901,10 @@ class OCRService:
|
||||
if settings.textline_orientation_model_name:
|
||||
cpu_kwargs['textline_orientation_model_name'] = settings.textline_orientation_model_name
|
||||
|
||||
self.structure_engine = PPStructureV3(**cpu_kwargs)
|
||||
# Suppress DeprecationWarning during PPStructureV3 initialization (CPU fallback)
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
||||
self.structure_engine = PPStructureV3(**cpu_kwargs)
|
||||
self._current_layout_model = layout_model # Track current model for recreation check
|
||||
# Track table detection config for recreation check
|
||||
if table_detection_config:
|
||||
@@ -1429,6 +1452,22 @@ class OCRService:
|
||||
raw_ocr_regions=text_regions # For table content rebuilding
|
||||
)
|
||||
|
||||
# Get detected rotation from layout analysis (default: "0" = no rotation)
|
||||
detected_rotation = "0"
|
||||
if layout_data:
|
||||
detected_rotation = layout_data.get('detected_rotation', '0')
|
||||
|
||||
# Adjust page dimensions based on detected rotation
|
||||
# When rotation is 90° or 270°, the page orientation changes (portrait <-> landscape)
|
||||
# PP-StructureV3 returns coordinates based on the rotated image, so we need to swap dimensions
|
||||
if detected_rotation in ['90', '270']:
|
||||
original_width, original_height = ocr_width, ocr_height
|
||||
ocr_width, ocr_height = original_height, original_width
|
||||
logger.info(
|
||||
f"Page dimensions adjusted for {detected_rotation}° rotation: "
|
||||
f"{original_width}x{original_height} -> {ocr_width}x{ocr_height}"
|
||||
)
|
||||
|
||||
# Generate Markdown
|
||||
markdown_content = self.generate_markdown(text_regions, layout_data)
|
||||
|
||||
@@ -1450,7 +1489,8 @@ class OCRService:
|
||||
'ocr_dimensions': {
|
||||
'width': ocr_width,
|
||||
'height': ocr_height
|
||||
}
|
||||
},
|
||||
'detected_rotation': detected_rotation # Document orientation: "0", "90", "180", "270"
|
||||
}
|
||||
|
||||
# If layout data is enhanced, add enhanced results for converter
|
||||
@@ -1705,7 +1745,8 @@ class OCRService:
|
||||
'total_elements': result['total_elements'],
|
||||
'reading_order': result['reading_order'],
|
||||
'element_types': result.get('element_types', {}),
|
||||
'enhanced': True
|
||||
'enhanced': True,
|
||||
'detected_rotation': result.get('detected_rotation', '0') # Document orientation
|
||||
}
|
||||
|
||||
# Extract images metadata
|
||||
|
||||
Reference in New Issue
Block a user