feat: simplify layout model selection and archive proposals

Changes:
- Replace PP-Structure 7-slider parameter UI with simple 3-option layout model selector
- Add layout model mapping: chinese (PP-DocLayout-S), default (PubLayNet), cdla
- Add LayoutModelSelector component and zh-TW translations
- Fix "default" model behavior with sentinel value for PubLayNet
- Add gap filling service for OCR track coverage improvement
- Add PP-Structure debug utilities
- Archive completed/incomplete proposals:
  - add-ocr-track-gap-filling (complete)
  - fix-ocr-track-table-rendering (incomplete)
  - simplify-ppstructure-model-selection (22/25 tasks)
- Add new layout model tests, archive old PP-Structure param tests
- Update OpenSpec ocr-processing spec with layout model requirements

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-27 13:27:00 +08:00
parent c65df754cf
commit 59206a6ab8
35 changed files with 3621 additions and 658 deletions

View File

@@ -24,6 +24,19 @@ class ProcessingTrackEnum(str, Enum):
AUTO = "auto" # Auto-detect best track
class LayoutModelEnum(str, Enum):
"""Layout detection model selection for OCR track.
Different models are optimized for different document types:
- CHINESE: PP-DocLayout-S, optimized for Chinese documents (forms, contracts, invoices)
- DEFAULT: PubLayNet-based, optimized for English academic papers
- CDLA: CDLA model, specialized Chinese document layout analysis
"""
CHINESE = "chinese" # PP-DocLayout-S - Best for Chinese documents (recommended)
DEFAULT = "default" # PubLayNet-based - Best for English documents
CDLA = "cdla" # CDLA model - Alternative for Chinese layout
class TaskCreate(BaseModel):
"""Task creation request"""
filename: Optional[str] = Field(None, description="Original filename")
@@ -132,7 +145,11 @@ class UploadResponse(BaseModel):
# ===== Dual-Track Processing Schemas =====
class PPStructureV3Params(BaseModel):
"""PP-StructureV3 fine-tuning parameters for OCR track"""
"""PP-StructureV3 fine-tuning parameters for OCR track.
DEPRECATED: This class is deprecated and will be removed in a future version.
Use `layout_model` parameter in ProcessingOptions instead.
"""
layout_detection_threshold: Optional[float] = Field(
None, ge=0, le=1,
description="Layout block detection score threshold (lower=more blocks, higher=high confidence only)"
@@ -172,10 +189,10 @@ class ProcessingOptions(BaseModel):
include_images: bool = Field(default=True, description="Extract and save images")
confidence_threshold: Optional[float] = Field(None, ge=0, le=1, description="OCR confidence threshold")
# PP-StructureV3 fine-tuning parameters (OCR track only)
pp_structure_params: Optional[PPStructureV3Params] = Field(
None,
description="Fine-tuning parameters for PP-StructureV3 (OCR track only)"
# Layout model selection (OCR track only)
layout_model: Optional[LayoutModelEnum] = Field(
default=LayoutModelEnum.CHINESE,
description="Layout detection model: 'chinese' (recommended for Chinese docs), 'default' (English docs), 'cdla' (Chinese layout)"
)