feat: simplify layout model selection and archive proposals
Changes: - Replace PP-Structure 7-slider parameter UI with simple 3-option layout model selector - Add layout model mapping: chinese (PP-DocLayout-S), default (PubLayNet), cdla - Add LayoutModelSelector component and zh-TW translations - Fix "default" model behavior with sentinel value for PubLayNet - Add gap filling service for OCR track coverage improvement - Add PP-Structure debug utilities - Archive completed/incomplete proposals: - add-ocr-track-gap-filling (complete) - fix-ocr-track-table-rendering (incomplete) - simplify-ppstructure-model-selection (22/25 tasks) - Add new layout model tests, archive old PP-Structure param tests - Update OpenSpec ocr-processing spec with layout model requirements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -24,6 +24,19 @@ class ProcessingTrackEnum(str, Enum):
|
||||
AUTO = "auto" # Auto-detect best track
|
||||
|
||||
|
||||
class LayoutModelEnum(str, Enum):
|
||||
"""Layout detection model selection for OCR track.
|
||||
|
||||
Different models are optimized for different document types:
|
||||
- CHINESE: PP-DocLayout-S, optimized for Chinese documents (forms, contracts, invoices)
|
||||
- DEFAULT: PubLayNet-based, optimized for English academic papers
|
||||
- CDLA: CDLA model, specialized Chinese document layout analysis
|
||||
"""
|
||||
CHINESE = "chinese" # PP-DocLayout-S - Best for Chinese documents (recommended)
|
||||
DEFAULT = "default" # PubLayNet-based - Best for English documents
|
||||
CDLA = "cdla" # CDLA model - Alternative for Chinese layout
|
||||
|
||||
|
||||
class TaskCreate(BaseModel):
|
||||
"""Task creation request"""
|
||||
filename: Optional[str] = Field(None, description="Original filename")
|
||||
@@ -132,7 +145,11 @@ class UploadResponse(BaseModel):
|
||||
# ===== Dual-Track Processing Schemas =====
|
||||
|
||||
class PPStructureV3Params(BaseModel):
|
||||
"""PP-StructureV3 fine-tuning parameters for OCR track"""
|
||||
"""PP-StructureV3 fine-tuning parameters for OCR track.
|
||||
|
||||
DEPRECATED: This class is deprecated and will be removed in a future version.
|
||||
Use `layout_model` parameter in ProcessingOptions instead.
|
||||
"""
|
||||
layout_detection_threshold: Optional[float] = Field(
|
||||
None, ge=0, le=1,
|
||||
description="Layout block detection score threshold (lower=more blocks, higher=high confidence only)"
|
||||
@@ -172,10 +189,10 @@ class ProcessingOptions(BaseModel):
|
||||
include_images: bool = Field(default=True, description="Extract and save images")
|
||||
confidence_threshold: Optional[float] = Field(None, ge=0, le=1, description="OCR confidence threshold")
|
||||
|
||||
# PP-StructureV3 fine-tuning parameters (OCR track only)
|
||||
pp_structure_params: Optional[PPStructureV3Params] = Field(
|
||||
None,
|
||||
description="Fine-tuning parameters for PP-StructureV3 (OCR track only)"
|
||||
# Layout model selection (OCR track only)
|
||||
layout_model: Optional[LayoutModelEnum] = Field(
|
||||
default=LayoutModelEnum.CHINESE,
|
||||
description="Layout detection model: 'chinese' (recommended for Chinese docs), 'default' (English docs), 'cdla' (Chinese layout)"
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user