feat: enable document orientation detection for scanned PDFs

- Enable PP-StructureV3's use_doc_orientation_classify feature - Detect rotation angle from doc_preprocessor_res.angle - Swap page dimensions (width <-> height) for 90°/270° rotations - Output PDF now correctly displays landscape-scanned content Also includes: - Archive completed openspec proposals - Add simplify-frontend-ocr-config proposal (pending) - Code cleanup and frontend simplification 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 17:13:46 +08:00
parent 57070af307
commit cfe65158a3
58 changed files with 1271 additions and 3048 deletions
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -10,6 +10,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
 from datetime import datetime
 import uuid
 import gc  # For garbage collection
+import warnings  # For suppressing PaddleX deprecation warnings

 from paddleocr import PaddleOCR, PPStructureV3
 from PIL import Image
@@ -34,7 +35,21 @@ from app.services.layout_preprocessing_service import (
    get_layout_preprocessing_service,
    LayoutPreprocessingService,
 )
-from app.schemas.task import PreprocessingModeEnum, PreprocessingConfig, TableDetectionConfig
+from app.schemas.task import PreprocessingModeEnum, PreprocessingConfig
+from dataclasses import dataclass
+
+
+@dataclass
+class TableDetectionConfig:
+    """Internal table detection configuration for OCR service.
+
+    Note: This was previously in app.schemas.task but is now internal to OCR service
+    as frontend no longer configures these options.
+    """
+    enable_wired_table: bool = True
+    enable_wireless_table: bool = True
+    enable_region_detection: bool = True
+

 # Import dual-track components
 try:
@@ -798,7 +813,12 @@ class OCRService:
                if textline_ori_model:
                    pp_kwargs['textline_orientation_model_name'] = textline_ori_model

-                self.structure_engine = PPStructureV3(**pp_kwargs)
+                # Suppress DeprecationWarning during PPStructureV3 initialization
+                # Workaround for PaddleX bug: it incorrectly treats Python's datetime.utcnow()
+                # deprecation warning as a model loading error in PP-Chart2Table
+                with warnings.catch_warnings():
+                    warnings.filterwarnings('ignore', category=DeprecationWarning)
+                    self.structure_engine = PPStructureV3(**pp_kwargs)

                # Track model loading for cache management
                self._model_last_used['structure'] = datetime.now()
@@ -881,7 +901,10 @@ class OCRService:
                    if settings.textline_orientation_model_name:
                        cpu_kwargs['textline_orientation_model_name'] = settings.textline_orientation_model_name

-                    self.structure_engine = PPStructureV3(**cpu_kwargs)
+                    # Suppress DeprecationWarning during PPStructureV3 initialization (CPU fallback)
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings('ignore', category=DeprecationWarning)
+                        self.structure_engine = PPStructureV3(**cpu_kwargs)
                    self._current_layout_model = layout_model  # Track current model for recreation check
                    # Track table detection config for recreation check
                    if table_detection_config:
@@ -1429,6 +1452,22 @@ class OCRService:
                    raw_ocr_regions=text_regions  # For table content rebuilding
                )

+            # Get detected rotation from layout analysis (default: "0" = no rotation)
+            detected_rotation = "0"
+            if layout_data:
+                detected_rotation = layout_data.get('detected_rotation', '0')
+
+            # Adjust page dimensions based on detected rotation
+            # When rotation is 90° or 270°, the page orientation changes (portrait <-> landscape)
+            # PP-StructureV3 returns coordinates based on the rotated image, so we need to swap dimensions
+            if detected_rotation in ['90', '270']:
+                original_width, original_height = ocr_width, ocr_height
+                ocr_width, ocr_height = original_height, original_width
+                logger.info(
+                    f"Page dimensions adjusted for {detected_rotation}° rotation: "
+                    f"{original_width}x{original_height} -> {ocr_width}x{ocr_height}"
+                )
+
            # Generate Markdown
            markdown_content = self.generate_markdown(text_regions, layout_data)

@@ -1450,7 +1489,8 @@ class OCRService:
                'ocr_dimensions': {
                    'width': ocr_width,
                    'height': ocr_height
-                }
+                },
+                'detected_rotation': detected_rotation  # Document orientation: "0", "90", "180", "270"
            }

            # If layout data is enhanced, add enhanced results for converter
@@ -1705,7 +1745,8 @@ class OCRService:
                        'total_elements': result['total_elements'],
                        'reading_order': result['reading_order'],
                        'element_types': result.get('element_types', {}),
-                        'enhanced': True
+                        'enhanced': True,
+                        'detected_rotation': result.get('detected_rotation', '0')  # Document orientation
                    }

                    # Extract images metadata