feat: simplify layout model selection and archive proposals
Changes: - Replace PP-Structure 7-slider parameter UI with simple 3-option layout model selector - Add layout model mapping: chinese (PP-DocLayout-S), default (PubLayNet), cdla - Add LayoutModelSelector component and zh-TW translations - Fix "default" model behavior with sentinel value for PubLayNet - Add gap filling service for OCR track coverage improvement - Add PP-Structure debug utilities - Archive completed/incomplete proposals: - add-ocr-track-gap-filling (complete) - fix-ocr-track-table-rendering (incomplete) - simplify-ppstructure-model-selection (22/25 tasks) - Add new layout model tests, archive old PP-Structure param tests - Update OpenSpec ocr-processing spec with layout model requirements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,7 @@ Tool_OCR - Configuration Management
|
||||
Loads environment variables and provides centralized configuration
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import Field
|
||||
from pathlib import Path
|
||||
@@ -99,6 +99,33 @@ class Settings(BaseSettings):
|
||||
text_det_box_thresh: float = Field(default=0.3) # Lower box threshold for better detection
|
||||
text_det_unclip_ratio: float = Field(default=1.2) # Smaller unclip for tighter text boxes
|
||||
|
||||
# Layout Detection Model Configuration
|
||||
# Available models:
|
||||
# - None (default): Use PP-StructureV3's built-in model (PubLayNet-based)
|
||||
# - "PP-DocLayout-S": Better for Chinese docs, papers, contracts, exams (23 categories)
|
||||
# - "picodet_lcnet_x1_0_fgd_layout_cdla": CDLA-based model for Chinese document layout
|
||||
layout_detection_model_name: Optional[str] = Field(
|
||||
default="PP-DocLayout-S",
|
||||
description="Layout detection model name. Set to 'PP-DocLayout-S' for better Chinese document support."
|
||||
)
|
||||
layout_detection_model_dir: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Custom layout detection model directory. If None, downloads official model."
|
||||
)
|
||||
|
||||
# ===== Gap Filling Configuration =====
|
||||
# Supplements PP-StructureV3 output with raw OCR regions when detection is incomplete
|
||||
gap_filling_enabled: bool = Field(default=True) # Enable gap filling for OCR track
|
||||
gap_filling_coverage_threshold: float = Field(default=0.7) # Activate when coverage < 70%
|
||||
gap_filling_iou_threshold: float = Field(default=0.15) # IoU threshold for coverage detection
|
||||
gap_filling_confidence_threshold: float = Field(default=0.3) # Min confidence for raw OCR regions
|
||||
gap_filling_dedup_iou_threshold: float = Field(default=0.5) # IoU threshold for deduplication
|
||||
|
||||
# ===== Debug Configuration =====
|
||||
# Enable debug outputs for PP-StructureV3 analysis
|
||||
pp_structure_debug_enabled: bool = Field(default=True) # Save debug files for PP-StructureV3
|
||||
pp_structure_debug_visualization: bool = Field(default=True) # Generate visualization images
|
||||
|
||||
# Performance tuning
|
||||
use_fp16_inference: bool = Field(default=False) # Half-precision (if supported)
|
||||
enable_cudnn_benchmark: bool = Field(default=True) # Optimize convolution algorithms
|
||||
|
||||
Reference in New Issue
Block a user