feat: simplify layout model selection and archive proposals

Changes: - Replace PP-Structure 7-slider parameter UI with simple 3-option layout model selector - Add layout model mapping: chinese (PP-DocLayout-S), default (PubLayNet), cdla - Add LayoutModelSelector component and zh-TW translations - Fix "default" model behavior with sentinel value for PubLayNet - Add gap filling service for OCR track coverage improvement - Add PP-Structure debug utilities - Archive completed/incomplete proposals: - add-ocr-track-gap-filling (complete) - fix-ocr-track-table-rendering (incomplete) - simplify-ppstructure-model-selection (22/25 tasks) - Add new layout model tests, archive old PP-Structure param tests - Update OpenSpec ocr-processing spec with layout model requirements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 13:27:00 +08:00
parent c65df754cf
commit 59206a6ab8
35 changed files with 3621 additions and 658 deletions
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -3,7 +3,7 @@ Tool_OCR - Configuration Management
 Loads environment variables and provides centralized configuration
 """

-from typing import List
+from typing import List, Optional
 from pydantic_settings import BaseSettings
 from pydantic import Field
 from pathlib import Path
@@ -99,6 +99,33 @@ class Settings(BaseSettings):
    text_det_box_thresh: float = Field(default=0.3)  # Lower box threshold for better detection
    text_det_unclip_ratio: float = Field(default=1.2)  # Smaller unclip for tighter text boxes

+    # Layout Detection Model Configuration
+    # Available models:
+    # - None (default): Use PP-StructureV3's built-in model (PubLayNet-based)
+    # - "PP-DocLayout-S": Better for Chinese docs, papers, contracts, exams (23 categories)
+    # - "picodet_lcnet_x1_0_fgd_layout_cdla": CDLA-based model for Chinese document layout
+    layout_detection_model_name: Optional[str] = Field(
+        default="PP-DocLayout-S",
+        description="Layout detection model name. Set to 'PP-DocLayout-S' for better Chinese document support."
+    )
+    layout_detection_model_dir: Optional[str] = Field(
+        default=None,
+        description="Custom layout detection model directory. If None, downloads official model."
+    )
+
+    # ===== Gap Filling Configuration =====
+    # Supplements PP-StructureV3 output with raw OCR regions when detection is incomplete
+    gap_filling_enabled: bool = Field(default=True)  # Enable gap filling for OCR track
+    gap_filling_coverage_threshold: float = Field(default=0.7)  # Activate when coverage < 70%
+    gap_filling_iou_threshold: float = Field(default=0.15)  # IoU threshold for coverage detection
+    gap_filling_confidence_threshold: float = Field(default=0.3)  # Min confidence for raw OCR regions
+    gap_filling_dedup_iou_threshold: float = Field(default=0.5)  # IoU threshold for deduplication
+
+    # ===== Debug Configuration =====
+    # Enable debug outputs for PP-StructureV3 analysis
+    pp_structure_debug_enabled: bool = Field(default=True)  # Save debug files for PP-StructureV3
+    pp_structure_debug_visualization: bool = Field(default=True)  # Generate visualization images
+
    # Performance tuning
    use_fp16_inference: bool = Field(default=False)  # Half-precision (if supported)
    enable_cudnn_benchmark: bool = Field(default=True)  # Optimize convolution algorithms