feat: upgrade PP-StructureV3 models to latest versions
- Layout: PP-DocLayout-S → PP-DocLayout_plus-L (83.2% mAP) - Table: Single model → Dual SLANeXt (wired/wireless) - Formula: PP-FormulaNet_plus-L for enhanced recognition - Add preprocessing flags support (orientation, unwarping) - Update frontend i18n descriptions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -50,11 +50,11 @@ logger = logging.getLogger(__name__)
|
||||
_USE_PUBLAYNET_DEFAULT = "__USE_PUBLAYNET_DEFAULT__"
|
||||
|
||||
# Layout model mapping: user-friendly names to actual model names
|
||||
# - "chinese": PP-DocLayout-S - Best for Chinese documents (forms, contracts, invoices)
|
||||
# - "chinese": PP-DocLayout_plus-L - Best for Chinese documents (83.2% mAP, complex layouts)
|
||||
# - "default": PubLayNet-based default model - Best for English documents
|
||||
# - "cdla": picodet_lcnet_x1_0_fgd_layout_cdla - Alternative for Chinese layout
|
||||
LAYOUT_MODEL_MAPPING = {
|
||||
"chinese": "PP-DocLayout-S",
|
||||
"chinese": "PP-DocLayout_plus-L",
|
||||
"default": _USE_PUBLAYNET_DEFAULT, # Uses default PubLayNet-based model (no custom model)
|
||||
"cdla": "picodet_lcnet_x1_0_fgd_layout_cdla",
|
||||
}
|
||||
@@ -517,34 +517,63 @@ class OCRService:
|
||||
layout_model_name = settings.layout_detection_model_name
|
||||
layout_model_dir = settings.layout_detection_model_dir
|
||||
|
||||
# Preprocessing configuration (Stage 1)
|
||||
use_orientation = settings.use_doc_orientation_classify
|
||||
use_unwarping = settings.use_doc_unwarping
|
||||
use_textline = settings.use_textline_orientation
|
||||
|
||||
# Table and formula model configuration (Stage 4)
|
||||
wired_table_model = settings.wired_table_model_name
|
||||
wireless_table_model = settings.wireless_table_model_name
|
||||
formula_model = settings.formula_recognition_model_name
|
||||
|
||||
logger.info(f"PP-StructureV3 config: table={use_table}, formula={use_formula}, chart={use_chart}")
|
||||
logger.info(f"Preprocessing: orientation={use_orientation}, unwarping={use_unwarping}, textline={use_textline}")
|
||||
logger.info(f"Layout model: name={layout_model_name}, dir={layout_model_dir}")
|
||||
logger.info(f"Table models: wired={wired_table_model}, wireless={wireless_table_model}")
|
||||
logger.info(f"Formula model: {formula_model}")
|
||||
logger.info(f"Layout config: threshold={layout_threshold}, nms={layout_nms}, merge={layout_merge}, unclip={layout_unclip}")
|
||||
logger.info(f"Text detection: thresh={text_thresh}, box_thresh={text_box_thresh}, unclip={text_unclip}")
|
||||
|
||||
# Build PPStructureV3 kwargs
|
||||
pp_kwargs = {
|
||||
'use_doc_orientation_classify': False,
|
||||
'use_doc_unwarping': False,
|
||||
'use_textline_orientation': False,
|
||||
# Preprocessing (Stage 1)
|
||||
'use_doc_orientation_classify': use_orientation,
|
||||
'use_doc_unwarping': use_unwarping,
|
||||
'use_textline_orientation': use_textline,
|
||||
# Element recognition (Stage 4)
|
||||
'use_table_recognition': use_table,
|
||||
'use_formula_recognition': use_formula,
|
||||
'use_chart_recognition': use_chart,
|
||||
# Layout detection parameters
|
||||
'layout_threshold': layout_threshold,
|
||||
'layout_nms': layout_nms,
|
||||
'layout_unclip_ratio': layout_unclip,
|
||||
'layout_merge_bboxes_mode': layout_merge,
|
||||
# Text detection parameters
|
||||
'text_det_thresh': text_thresh,
|
||||
'text_det_box_thresh': text_box_thresh,
|
||||
'text_det_unclip_ratio': text_unclip,
|
||||
}
|
||||
|
||||
# Add layout model configuration if specified
|
||||
# Add layout model configuration if specified (Stage 3)
|
||||
if layout_model_name:
|
||||
pp_kwargs['layout_detection_model_name'] = layout_model_name
|
||||
if layout_model_dir:
|
||||
pp_kwargs['layout_detection_model_dir'] = layout_model_dir
|
||||
|
||||
# Add table structure model configuration (Stage 4)
|
||||
# PPStructureV3 uses separate models for wired (bordered) and wireless (borderless) tables
|
||||
# Both models should be configured for comprehensive table detection
|
||||
if wired_table_model:
|
||||
pp_kwargs['wired_table_structure_recognition_model_name'] = wired_table_model
|
||||
if wireless_table_model:
|
||||
pp_kwargs['wireless_table_structure_recognition_model_name'] = wireless_table_model
|
||||
|
||||
# Add formula recognition model configuration (Stage 4)
|
||||
if formula_model:
|
||||
pp_kwargs['formula_recognition_model_name'] = formula_model
|
||||
|
||||
self.structure_engine = PPStructureV3(**pp_kwargs)
|
||||
|
||||
# Track model loading for cache management
|
||||
@@ -571,12 +600,15 @@ class OCRService:
|
||||
layout_threshold = settings.layout_detection_threshold
|
||||
layout_model_name = settings.layout_detection_model_name
|
||||
layout_model_dir = settings.layout_detection_model_dir
|
||||
wired_table_model = settings.wired_table_model_name
|
||||
wireless_table_model = settings.wireless_table_model_name
|
||||
formula_model = settings.formula_recognition_model_name
|
||||
|
||||
# Build CPU fallback kwargs
|
||||
cpu_kwargs = {
|
||||
'use_doc_orientation_classify': False,
|
||||
'use_doc_unwarping': False,
|
||||
'use_textline_orientation': False,
|
||||
'use_doc_orientation_classify': settings.use_doc_orientation_classify,
|
||||
'use_doc_unwarping': settings.use_doc_unwarping,
|
||||
'use_textline_orientation': settings.use_textline_orientation,
|
||||
'use_table_recognition': use_table,
|
||||
'use_formula_recognition': use_formula,
|
||||
'use_chart_recognition': use_chart,
|
||||
@@ -586,6 +618,12 @@ class OCRService:
|
||||
cpu_kwargs['layout_detection_model_name'] = layout_model_name
|
||||
if layout_model_dir:
|
||||
cpu_kwargs['layout_detection_model_dir'] = layout_model_dir
|
||||
if wired_table_model:
|
||||
cpu_kwargs['wired_table_structure_recognition_model_name'] = wired_table_model
|
||||
if wireless_table_model:
|
||||
cpu_kwargs['wireless_table_structure_recognition_model_name'] = wireless_table_model
|
||||
if formula_model:
|
||||
cpu_kwargs['formula_recognition_model_name'] = formula_model
|
||||
|
||||
self.structure_engine = PPStructureV3(**cpu_kwargs)
|
||||
self._current_layout_model = layout_model # Track current model for recreation check
|
||||
|
||||
Reference in New Issue
Block a user