feat: simplify layout model selection and archive proposals

Changes: - Replace PP-Structure 7-slider parameter UI with simple 3-option layout model selector - Add layout model mapping: chinese (PP-DocLayout-S), default (PubLayNet), cdla - Add LayoutModelSelector component and zh-TW translations - Fix "default" model behavior with sentinel value for PubLayNet - Add gap filling service for OCR track coverage improvement - Add PP-Structure debug utilities - Archive completed/incomplete proposals: - add-ocr-track-gap-filling (complete) - fix-ocr-track-table-rendering (incomplete) - simplify-ppstructure-model-selection (22/25 tasks) - Add new layout model tests, archive old PP-Structure param tests - Update OpenSpec ocr-processing spec with layout model requirements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 13:27:00 +08:00
parent c65df754cf
commit 59206a6ab8
35 changed files with 3621 additions and 658 deletions
--- a/backend/app/schemas/task.py
+++ b/backend/app/schemas/task.py
@@ -24,6 +24,19 @@ class ProcessingTrackEnum(str, Enum):
    AUTO = "auto"         # Auto-detect best track


+class LayoutModelEnum(str, Enum):
+    """Layout detection model selection for OCR track.
+
+    Different models are optimized for different document types:
+    - CHINESE: PP-DocLayout-S, optimized for Chinese documents (forms, contracts, invoices)
+    - DEFAULT: PubLayNet-based, optimized for English academic papers
+    - CDLA: CDLA model, specialized Chinese document layout analysis
+    """
+    CHINESE = "chinese"   # PP-DocLayout-S - Best for Chinese documents (recommended)
+    DEFAULT = "default"   # PubLayNet-based - Best for English documents
+    CDLA = "cdla"         # CDLA model - Alternative for Chinese layout
+
+
 class TaskCreate(BaseModel):
    """Task creation request"""
    filename: Optional[str] = Field(None, description="Original filename")
@@ -132,7 +145,11 @@ class UploadResponse(BaseModel):
 # ===== Dual-Track Processing Schemas =====

 class PPStructureV3Params(BaseModel):
-    """PP-StructureV3 fine-tuning parameters for OCR track"""
+    """PP-StructureV3 fine-tuning parameters for OCR track.
+
+    DEPRECATED: This class is deprecated and will be removed in a future version.
+    Use `layout_model` parameter in ProcessingOptions instead.
+    """
    layout_detection_threshold: Optional[float] = Field(
        None, ge=0, le=1,
        description="Layout block detection score threshold (lower=more blocks, higher=high confidence only)"
@@ -172,10 +189,10 @@ class ProcessingOptions(BaseModel):
    include_images: bool = Field(default=True, description="Extract and save images")
    confidence_threshold: Optional[float] = Field(None, ge=0, le=1, description="OCR confidence threshold")

-    # PP-StructureV3 fine-tuning parameters (OCR track only)
-    pp_structure_params: Optional[PPStructureV3Params] = Field(
-        None,
-        description="Fine-tuning parameters for PP-StructureV3 (OCR track only)"
+    # Layout model selection (OCR track only)
+    layout_model: Optional[LayoutModelEnum] = Field(
+        default=LayoutModelEnum.CHINESE,
+        description="Layout detection model: 'chinese' (recommended for Chinese docs), 'default' (English docs), 'cdla' (Chinese layout)"
    )