chore: backup before code cleanup
Backup commit before executing remove-unused-code proposal. This includes all pending changes and new features. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -6,7 +6,7 @@ Supports both PaddleOCR (for scanned documents) and direct extraction (for edita
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
import gc # For garbage collection
|
||||
@@ -446,6 +446,47 @@ class OCRService:
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clear GPU cache: {e}")
|
||||
|
||||
def _apply_ocr_config(self, ocr_config: 'OCRConfig'):
|
||||
"""
|
||||
Apply OCR configuration from preset or custom settings.
|
||||
This modifies the runtime settings used by PP-Structure.
|
||||
|
||||
Args:
|
||||
ocr_config: OCRConfig object with processing settings
|
||||
"""
|
||||
logger.info(f"Applying OCR config: {ocr_config.model_dump()}")
|
||||
|
||||
# Store the config for use in PP-Structure initialization
|
||||
self._runtime_ocr_config = ocr_config
|
||||
|
||||
# Apply table parsing mode settings
|
||||
# These will be used when initializing PP-StructureV3
|
||||
settings.table_parsing_mode = ocr_config.table_parsing_mode.value if hasattr(ocr_config.table_parsing_mode, 'value') else ocr_config.table_parsing_mode
|
||||
|
||||
# Apply preprocessing settings
|
||||
settings.use_doc_orientation_classify = ocr_config.use_doc_orientation_classify
|
||||
settings.use_doc_unwarping = ocr_config.use_doc_unwarping
|
||||
settings.use_textline_orientation = ocr_config.use_textline_orientation
|
||||
|
||||
# Apply recognition module settings
|
||||
settings.enable_chart_recognition = ocr_config.enable_chart_recognition
|
||||
settings.enable_formula_recognition = ocr_config.enable_formula_recognition
|
||||
settings.enable_seal_recognition = ocr_config.enable_seal_recognition
|
||||
settings.enable_region_detection = ocr_config.enable_region_detection
|
||||
|
||||
# Apply layout threshold if specified
|
||||
if ocr_config.layout_threshold is not None:
|
||||
settings.layout_detection_threshold = ocr_config.layout_threshold
|
||||
if ocr_config.layout_nms_threshold is not None:
|
||||
settings.layout_nms_threshold = ocr_config.layout_nms_threshold
|
||||
|
||||
# Invalidate existing structure engine to force re-initialization with new settings
|
||||
if self.structure_engine is not None:
|
||||
logger.info("Invalidating PP-StructureV3 engine to apply new OCR config")
|
||||
self._unload_structure_engine()
|
||||
|
||||
logger.info(f"OCR config applied: table_parsing_mode={settings.table_parsing_mode}")
|
||||
|
||||
def get_ocr_engine(self, lang: str = 'ch') -> PaddleOCR:
|
||||
"""
|
||||
Get or create OCR engine for specified language with GPU support
|
||||
@@ -615,6 +656,39 @@ class OCRService:
|
||||
formula_model = settings.formula_recognition_model_name
|
||||
chart_model = settings.chart_recognition_model_name
|
||||
|
||||
# Apply table_parsing_mode settings
|
||||
# This is the KEY configuration to prevent "cell explosion" on datasheet-type documents
|
||||
table_parsing_mode = settings.table_parsing_mode
|
||||
logger.info(f"Table parsing mode: {table_parsing_mode}")
|
||||
|
||||
if table_parsing_mode == "disabled":
|
||||
# 方案A: 完全關閉 table recognition
|
||||
use_table = False
|
||||
wired_table_model = None
|
||||
wireless_table_model = None
|
||||
wired_cell_det_model = None
|
||||
wireless_cell_det_model = None
|
||||
logger.info("Table parsing DISABLED - no cell segmentation")
|
||||
|
||||
elif table_parsing_mode == "classification_only":
|
||||
# 方案C: 只做 table classification,不做 cell segmentation
|
||||
use_table = False # Don't parse table structure
|
||||
wired_table_model = None
|
||||
wireless_table_model = None
|
||||
wired_cell_det_model = None
|
||||
wireless_cell_det_model = None
|
||||
# Keep table_cls_model to identify table regions
|
||||
logger.info("Table parsing CLASSIFICATION_ONLY - regions identified but no cell parsing")
|
||||
|
||||
elif table_parsing_mode == "conservative":
|
||||
# 方案B: 保守模式 - 只禁用 wireless tables (aggressive)
|
||||
# 注意:不要修改 layout_threshold,它會影響所有元素偵測,不只是表格
|
||||
wireless_table_model = None
|
||||
wireless_cell_det_model = None
|
||||
logger.info(f"Table parsing CONSERVATIVE - wireless disabled (layout_threshold unchanged)")
|
||||
|
||||
# else: "full" mode - use all default settings (aggressive)
|
||||
|
||||
# Apply table detection config overrides for individual table types
|
||||
if table_detection_config:
|
||||
if not table_detection_config.enable_wired_table:
|
||||
@@ -1343,6 +1417,7 @@ class OCRService:
|
||||
|
||||
if detect_layout:
|
||||
# Pass current_page to analyze_layout for correct page numbering
|
||||
# Also pass text_regions for table content rebuilding
|
||||
layout_data, images_metadata = self.analyze_layout(
|
||||
image_path,
|
||||
output_dir=output_dir,
|
||||
@@ -1350,7 +1425,8 @@ class OCRService:
|
||||
layout_model=layout_model,
|
||||
preprocessing_mode=preprocessing_mode,
|
||||
preprocessing_config=preprocessing_config,
|
||||
table_detection_config=table_detection_config
|
||||
table_detection_config=table_detection_config,
|
||||
raw_ocr_regions=text_regions # For table content rebuilding
|
||||
)
|
||||
|
||||
# Generate Markdown
|
||||
@@ -1379,6 +1455,12 @@ class OCRService:
|
||||
|
||||
# If layout data is enhanced, add enhanced results for converter
|
||||
if layout_data and layout_data.get('enhanced'):
|
||||
# Debug: check if table elements have rebuild_stats
|
||||
for elem in layout_data.get('elements', []):
|
||||
if elem.get('type') == 'table':
|
||||
has_rebuild = 'rebuild_stats' in elem
|
||||
logger.info(f"[OCR_SERVICE] Table {elem.get('element_id')}: has rebuild_stats={has_rebuild}, keys={list(elem.keys())[:10]}")
|
||||
|
||||
result['enhanced_results'] = [{
|
||||
'elements': layout_data.get('elements', []),
|
||||
'reading_order': layout_data.get('reading_order', []),
|
||||
@@ -1509,7 +1591,8 @@ class OCRService:
|
||||
layout_model: Optional[str] = None,
|
||||
preprocessing_mode: Optional[PreprocessingModeEnum] = None,
|
||||
preprocessing_config: Optional[PreprocessingConfig] = None,
|
||||
table_detection_config: Optional[TableDetectionConfig] = None
|
||||
table_detection_config: Optional[TableDetectionConfig] = None,
|
||||
raw_ocr_regions: Optional[List[Dict[str, Any]]] = None
|
||||
) -> Tuple[Optional[Dict], List[Dict]]:
|
||||
"""
|
||||
Analyze document layout using PP-StructureV3 with enhanced element extraction
|
||||
@@ -1522,6 +1605,7 @@ class OCRService:
|
||||
preprocessing_mode: Preprocessing mode ('auto', 'manual', 'disabled')
|
||||
preprocessing_config: Manual preprocessing config (used when mode='manual')
|
||||
table_detection_config: Table detection config (wired/wireless/region options)
|
||||
raw_ocr_regions: Optional list of raw OCR text regions for table content rebuilding
|
||||
|
||||
Returns:
|
||||
Tuple of (layout_data, images_metadata)
|
||||
@@ -1607,7 +1691,8 @@ class OCRService:
|
||||
preprocessed_image=preprocessed_image,
|
||||
scaling_info=scaling_info,
|
||||
save_visualization=True, # Save layout detection visualization images
|
||||
use_cv_table_detection=use_cv_table_detection
|
||||
use_cv_table_detection=use_cv_table_detection,
|
||||
raw_ocr_regions=raw_ocr_regions # For table content rebuilding
|
||||
)
|
||||
|
||||
if result.get('has_parsing_res_list'):
|
||||
@@ -2225,7 +2310,8 @@ class OCRService:
|
||||
layout_model: Optional[str] = None,
|
||||
preprocessing_mode: Optional[PreprocessingModeEnum] = None,
|
||||
preprocessing_config: Optional[PreprocessingConfig] = None,
|
||||
table_detection_config: Optional[TableDetectionConfig] = None
|
||||
table_detection_config: Optional[TableDetectionConfig] = None,
|
||||
ocr_config: Optional['OCRConfig'] = None
|
||||
) -> Union[UnifiedDocument, Dict]:
|
||||
"""
|
||||
Main processing method with dual-track support.
|
||||
@@ -2242,11 +2328,16 @@ class OCRService:
|
||||
preprocessing_mode: Layout preprocessing mode ('auto', 'manual', 'disabled')
|
||||
preprocessing_config: Manual preprocessing config (used when mode='manual')
|
||||
table_detection_config: Table detection config (wired/wireless/region options)
|
||||
ocr_config: OCR processing config from preset or custom settings
|
||||
|
||||
Returns:
|
||||
UnifiedDocument if dual-track is enabled and use_dual_track=True,
|
||||
Dict with legacy format otherwise
|
||||
"""
|
||||
# Apply OCR config to settings if provided
|
||||
if ocr_config:
|
||||
self._apply_ocr_config(ocr_config)
|
||||
|
||||
# Use dual-track processing if:
|
||||
# 1. use_dual_track is True (auto-detection), OR
|
||||
# 2. force_track is specified (explicit track selection)
|
||||
|
||||
Reference in New Issue
Block a user