feat: upgrade PP-StructureV3 models to latest versions

- Layout: PP-DocLayout-S → PP-DocLayout_plus-L (83.2% mAP) - Table: Single model → Dual SLANeXt (wired/wireless) - Formula: PP-FormulaNet_plus-L for enhanced recognition - Add preprocessing flags support (orientation, unwarping) - Update frontend i18n descriptions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 14:21:24 +08:00
parent 59206a6ab8
commit 6235280c45
9 changed files with 504 additions and 25 deletions
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -91,6 +91,11 @@ class Settings(BaseSettings):
    enable_table_recognition: bool = Field(default=True)  # Table structure recognition
    enable_seal_recognition: bool = Field(default=True)  # Seal/stamp recognition
    enable_text_recognition: bool = Field(default=True)  # General text recognition
+
+    # PP-StructureV3 Preprocessing (Stage 1)
+    use_doc_orientation_classify: bool = Field(default=True)  # Auto-detect and correct document rotation
+    use_doc_unwarping: bool = Field(default=True)  # Correct document warping from photos
+    use_textline_orientation: bool = Field(default=True)  # Detect textline orientation
    layout_detection_threshold: float = Field(default=0.2)  # Lower threshold for more sensitive detection
    layout_nms_threshold: float = Field(default=0.2)  # Lower NMS to preserve more individual elements
    layout_merge_mode: str = Field(default="small")  # Use 'small' to minimize bbox merging
@@ -99,20 +104,48 @@ class Settings(BaseSettings):
    text_det_box_thresh: float = Field(default=0.3)  # Lower box threshold for better detection
    text_det_unclip_ratio: float = Field(default=1.2)  # Smaller unclip for tighter text boxes

-    # Layout Detection Model Configuration
+    # Layout Detection Model Configuration (Stage 3)
    # Available models:
    # - None (default): Use PP-StructureV3's built-in model (PubLayNet-based)
-    # - "PP-DocLayout-S": Better for Chinese docs, papers, contracts, exams (23 categories)
+    # - "PP-DocLayout_plus-L": Best for Chinese docs (83.2% mAP, 20 categories) - complex layouts
+    # - "PP-DocLayout-L": High accuracy (90.4% mAP, 23 categories) - general purpose
    # - "picodet_lcnet_x1_0_fgd_layout_cdla": CDLA-based model for Chinese document layout
    layout_detection_model_name: Optional[str] = Field(
-        default="PP-DocLayout-S",
-        description="Layout detection model name. Set to 'PP-DocLayout-S' for better Chinese document support."
+        default="PP-DocLayout_plus-L",
+        description="Layout detection model name. PP-DocLayout_plus-L recommended for complex Chinese documents."
    )
    layout_detection_model_dir: Optional[str] = Field(
        default=None,
        description="Custom layout detection model directory. If None, downloads official model."
    )

+    # Table Structure Recognition Model Configuration (Stage 4)
+    # PP-StructureV3 uses separate models for wired (bordered) and wireless (borderless) tables
+    # Both models should be configured for comprehensive table detection
+    # Available models:
+    # - "SLANeXt_wired": Best for wired/bordered tables (69.65% accuracy, 351MB)
+    # - "SLANeXt_wireless": Best for wireless/borderless tables (69.65% accuracy, 351MB)
+    # - "SLANet": Legacy model (59.52% accuracy, 6.9MB)
+    # - "SLANet_plus": Improved legacy (63.69% accuracy, 6.9MB)
+    wired_table_model_name: Optional[str] = Field(
+        default="SLANeXt_wired",
+        description="Table structure model for bordered tables. SLANeXt_wired recommended."
+    )
+    wireless_table_model_name: Optional[str] = Field(
+        default="SLANeXt_wireless",
+        description="Table structure model for borderless tables. SLANeXt_wireless recommended."
+    )
+
+    # Formula Recognition Model Configuration (Stage 4)
+    # Available models:
+    # - "PP-FormulaNet_plus-L": Best for Chinese formulas (90.64% Chinese, 92.22% English BLEU)
+    # - "PP-FormulaNet-L": Good for English formulas (90.36% English BLEU)
+    # - "PP-FormulaNet-S": Fast inference (87% English BLEU)
+    formula_recognition_model_name: Optional[str] = Field(
+        default="PP-FormulaNet_plus-L",
+        description="Formula recognition model. PP-FormulaNet_plus-L recommended for Chinese formula support."
+    )
+
    # ===== Gap Filling Configuration =====
    # Supplements PP-StructureV3 output with raw OCR regions when detection is incomplete
    gap_filling_enabled: bool = Field(default=True)  # Enable gap filling for OCR track