feat: refactor dual-track architecture (Phase 1-5)
## Backend Changes - **Service Layer Refactoring**: - Add ProcessingOrchestrator for unified document processing - Add PDFTableRenderer for table rendering extraction - Add PDFFontManager for font management with CJK support - Add MemoryPolicyEngine (73% code reduction from MemoryGuard) - **Bug Fixes**: - Fix Direct Track table row span calculation - Fix OCR Track image path handling - Add cell_boxes coordinate validation - Filter out small decorative images - Add covering image detection ## Frontend Changes - **State Management**: - Add TaskStore for centralized task state management - Add localStorage persistence for recent tasks - Add processing state tracking - **Type Consolidation**: - Merge shared types from api.ts to apiV2.ts - Update imports in authStore, uploadStore, ResultsTable, SettingsPage - **Page Integration**: - Integrate TaskStore in ProcessingPage and TaskDetailPage - Update useTaskValidation hook with cache sync ## Testing - Direct Track: edit.pdf (3 pages, 1.281s), edit3.pdf (2 pages, 0.203s) - Cell boxes validation: 43 valid, 0 invalid - Table merging: 12 merged cells verified 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -26,6 +26,10 @@ except ImportError:
|
||||
from app.core.config import settings
|
||||
from app.services.office_converter import OfficeConverter, OfficeConverterError
|
||||
from app.services.memory_manager import get_model_manager, MemoryConfig, MemoryGuard, prediction_context
|
||||
from app.services.memory_policy_engine import (
|
||||
MemoryPolicyEngine, MemoryPolicyConfig, get_memory_policy_engine,
|
||||
prediction_context as new_prediction_context
|
||||
)
|
||||
from app.services.layout_preprocessing_service import (
|
||||
get_layout_preprocessing_service,
|
||||
LayoutPreprocessingService,
|
||||
@@ -38,6 +42,9 @@ try:
|
||||
from app.services.direct_extraction_engine import DirectExtractionEngine
|
||||
from app.services.ocr_to_unified_converter import OCRToUnifiedConverter
|
||||
from app.services.unified_document_exporter import UnifiedDocumentExporter
|
||||
from app.services.processing_orchestrator import (
|
||||
ProcessingOrchestrator, ProcessingConfig, ProcessingResult
|
||||
)
|
||||
from app.models.unified_document import (
|
||||
UnifiedDocument, DocumentMetadata,
|
||||
ProcessingTrack, ElementType, DocumentElement, Page, Dimensions,
|
||||
@@ -48,6 +55,7 @@ except ImportError as e:
|
||||
logging.getLogger(__name__).warning(f"Dual-track components not available: {e}")
|
||||
DUAL_TRACK_AVAILABLE = False
|
||||
UnifiedDocumentExporter = None
|
||||
ProcessingOrchestrator = None
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -98,11 +106,16 @@ class OCRService:
|
||||
)
|
||||
self.ocr_to_unified_converter = OCRToUnifiedConverter()
|
||||
self.dual_track_enabled = True
|
||||
logger.info("Dual-track processing enabled")
|
||||
|
||||
# Initialize ProcessingOrchestrator for cleaner flow control
|
||||
self._orchestrator = ProcessingOrchestrator()
|
||||
self._orchestrator.set_ocr_service(self) # Dependency injection
|
||||
logger.info("Dual-track processing enabled (with ProcessingOrchestrator)")
|
||||
else:
|
||||
self.document_detector = None
|
||||
self.direct_extraction_engine = None
|
||||
self.ocr_to_unified_converter = None
|
||||
self._orchestrator = None
|
||||
self.dual_track_enabled = False
|
||||
logger.info("Dual-track processing not available, using OCR-only mode")
|
||||
|
||||
@@ -115,22 +128,39 @@ class OCRService:
|
||||
self._model_last_used = {} # Track last usage time for each model
|
||||
self._memory_warning_logged = False
|
||||
|
||||
# Initialize MemoryGuard for enhanced memory monitoring
|
||||
# Initialize memory management (use new MemoryPolicyEngine)
|
||||
self._memory_guard = None
|
||||
self._memory_policy_engine = None
|
||||
if settings.enable_model_lifecycle_management:
|
||||
try:
|
||||
memory_config = MemoryConfig(
|
||||
# Use new MemoryPolicyEngine (simplified, consolidated)
|
||||
policy_config = MemoryPolicyConfig(
|
||||
warning_threshold=settings.memory_warning_threshold,
|
||||
critical_threshold=settings.memory_critical_threshold,
|
||||
emergency_threshold=settings.memory_emergency_threshold,
|
||||
model_idle_timeout_seconds=settings.pp_structure_idle_timeout_seconds,
|
||||
gpu_memory_limit_mb=settings.gpu_memory_limit_mb,
|
||||
enable_cpu_fallback=settings.enable_cpu_fallback,
|
||||
max_concurrent_predictions=2,
|
||||
prediction_timeout_seconds=settings.service_acquire_timeout_seconds,
|
||||
)
|
||||
self._memory_guard = MemoryGuard(memory_config)
|
||||
logger.debug("MemoryGuard initialized for OCRService")
|
||||
self._memory_policy_engine = get_memory_policy_engine(policy_config)
|
||||
logger.info("MemoryPolicyEngine initialized for OCRService")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to initialize MemoryGuard: {e}")
|
||||
logger.warning(f"Failed to initialize MemoryPolicyEngine: {e}")
|
||||
# Fallback to legacy MemoryGuard
|
||||
try:
|
||||
memory_config = MemoryConfig(
|
||||
warning_threshold=settings.memory_warning_threshold,
|
||||
critical_threshold=settings.memory_critical_threshold,
|
||||
emergency_threshold=settings.memory_emergency_threshold,
|
||||
model_idle_timeout_seconds=settings.pp_structure_idle_timeout_seconds,
|
||||
gpu_memory_limit_mb=settings.gpu_memory_limit_mb,
|
||||
enable_cpu_fallback=settings.enable_cpu_fallback,
|
||||
)
|
||||
self._memory_guard = MemoryGuard(memory_config)
|
||||
logger.debug("Fallback: MemoryGuard initialized for OCRService")
|
||||
except Exception as e2:
|
||||
logger.warning(f"Failed to initialize MemoryGuard fallback: {e2}")
|
||||
|
||||
# Track if CPU fallback was activated
|
||||
self._cpu_fallback_active = False
|
||||
@@ -262,9 +292,9 @@ class OCRService:
|
||||
return
|
||||
|
||||
try:
|
||||
# Use MemoryGuard if available for better monitoring
|
||||
if self._memory_guard:
|
||||
stats = self._memory_guard.get_memory_stats()
|
||||
# Use MemoryPolicyEngine (preferred) or MemoryGuard for monitoring
|
||||
if self._memory_policy_engine:
|
||||
stats = self._memory_policy_engine.get_memory_stats()
|
||||
|
||||
# Log based on usage ratio
|
||||
if stats.gpu_used_ratio > 0.90 and not self._memory_warning_logged:
|
||||
@@ -278,15 +308,33 @@ class OCRService:
|
||||
# Trigger emergency cleanup if enabled
|
||||
if settings.enable_emergency_cleanup:
|
||||
self._cleanup_unused_models()
|
||||
self._memory_guard.clear_gpu_cache()
|
||||
self._memory_policy_engine.clear_cache()
|
||||
|
||||
elif stats.gpu_used_ratio > 0.75:
|
||||
logger.info(
|
||||
f"GPU memory: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
|
||||
f"({stats.gpu_used_ratio*100:.1f}%)"
|
||||
)
|
||||
elif self._memory_guard:
|
||||
# Fallback to legacy MemoryGuard
|
||||
stats = self._memory_guard.get_memory_stats()
|
||||
|
||||
if stats.gpu_used_ratio > 0.90 and not self._memory_warning_logged:
|
||||
logger.warning(
|
||||
f"GPU memory usage critical: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
|
||||
f"({stats.gpu_used_ratio*100:.1f}%)"
|
||||
)
|
||||
self._memory_warning_logged = True
|
||||
if settings.enable_emergency_cleanup:
|
||||
self._cleanup_unused_models()
|
||||
self._memory_guard.clear_gpu_cache()
|
||||
elif stats.gpu_used_ratio > 0.75:
|
||||
logger.info(
|
||||
f"GPU memory: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
|
||||
f"({stats.gpu_used_ratio*100:.1f}%)"
|
||||
)
|
||||
else:
|
||||
# Fallback to original implementation
|
||||
# No memory monitoring available - use direct paddle query
|
||||
device_id = self.gpu_info.get('device_id', 0)
|
||||
memory_allocated = paddle.device.cuda.memory_allocated(device_id)
|
||||
memory_allocated_mb = memory_allocated / (1024**2)
|
||||
@@ -296,7 +344,6 @@ class OCRService:
|
||||
|
||||
if utilization > 90 and not self._memory_warning_logged:
|
||||
logger.warning(f"GPU memory usage high: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")
|
||||
logger.warning("Consider enabling auto_unload_unused_models or reducing batch size")
|
||||
self._memory_warning_logged = True
|
||||
elif utilization > 75:
|
||||
logger.info(f"GPU memory: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")
|
||||
@@ -830,8 +877,50 @@ class OCRService:
|
||||
return True
|
||||
|
||||
try:
|
||||
# Use MemoryGuard if available for accurate multi-backend memory queries
|
||||
if self._memory_guard:
|
||||
# Use MemoryPolicyEngine (preferred) or MemoryGuard for memory checks
|
||||
if self._memory_policy_engine:
|
||||
is_available, msg = self._memory_policy_engine.check_memory(required_mb)
|
||||
|
||||
if not is_available:
|
||||
stats = self._memory_policy_engine.get_memory_stats()
|
||||
logger.warning(
|
||||
f"GPU memory check failed: {stats.gpu_free_mb:.0f}MB free, "
|
||||
f"{required_mb}MB required ({stats.gpu_used_ratio*100:.1f}% used)"
|
||||
)
|
||||
|
||||
# Try to free memory
|
||||
logger.info("Attempting memory cleanup before retry...")
|
||||
self._cleanup_unused_models()
|
||||
self._memory_policy_engine.clear_cache()
|
||||
|
||||
# Check again
|
||||
is_available, msg = self._memory_policy_engine.check_memory(required_mb)
|
||||
|
||||
if not is_available:
|
||||
stats = self._memory_policy_engine.get_memory_stats()
|
||||
if enable_fallback and settings.enable_cpu_fallback:
|
||||
logger.warning(
|
||||
f"Insufficient GPU memory ({stats.gpu_free_mb:.0f}MB) after cleanup. "
|
||||
f"Activating CPU fallback mode."
|
||||
)
|
||||
self._activate_cpu_fallback()
|
||||
return True
|
||||
else:
|
||||
logger.error(
|
||||
f"Insufficient GPU memory: {stats.gpu_free_mb:.0f}MB available, "
|
||||
f"{required_mb}MB required"
|
||||
)
|
||||
return False
|
||||
|
||||
stats = self._memory_policy_engine.get_memory_stats()
|
||||
logger.debug(
|
||||
f"GPU memory check passed: {stats.gpu_free_mb:.0f}MB free "
|
||||
f"({stats.gpu_used_ratio*100:.1f}% used)"
|
||||
)
|
||||
return True
|
||||
|
||||
elif self._memory_guard:
|
||||
# Fallback to legacy MemoryGuard
|
||||
is_available, stats = self._memory_guard.check_memory(
|
||||
required_mb=required_mb,
|
||||
device_id=self.gpu_info.get('device_id', 0)
|
||||
@@ -843,23 +932,20 @@ class OCRService:
|
||||
f"{required_mb}MB required ({stats.gpu_used_ratio*100:.1f}% used)"
|
||||
)
|
||||
|
||||
# Try to free memory
|
||||
logger.info("Attempting memory cleanup before retry...")
|
||||
self._cleanup_unused_models()
|
||||
self._memory_guard.clear_gpu_cache()
|
||||
|
||||
# Check again
|
||||
is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
|
||||
|
||||
if not is_available:
|
||||
# Memory still insufficient after cleanup
|
||||
if enable_fallback and settings.enable_cpu_fallback:
|
||||
logger.warning(
|
||||
f"Insufficient GPU memory ({stats.gpu_free_mb:.0f}MB) after cleanup. "
|
||||
f"Activating CPU fallback mode."
|
||||
)
|
||||
self._activate_cpu_fallback()
|
||||
return True # Continue with CPU
|
||||
return True
|
||||
else:
|
||||
logger.error(
|
||||
f"Insufficient GPU memory: {stats.gpu_free_mb:.0f}MB available, "
|
||||
@@ -937,7 +1023,9 @@ class OCRService:
|
||||
self.gpu_info['fallback_reason'] = 'GPU memory insufficient'
|
||||
|
||||
# Clear GPU cache to free memory
|
||||
if self._memory_guard:
|
||||
if self._memory_policy_engine:
|
||||
self._memory_policy_engine.clear_cache()
|
||||
elif self._memory_guard:
|
||||
self._memory_guard.clear_gpu_cache()
|
||||
|
||||
def _restore_gpu_mode(self):
|
||||
@@ -952,7 +1040,17 @@ class OCRService:
|
||||
return
|
||||
|
||||
# Check if GPU memory is now available
|
||||
if self._memory_guard:
|
||||
if self._memory_policy_engine:
|
||||
is_available, msg = self._memory_policy_engine.check_memory(
|
||||
settings.structure_model_memory_mb
|
||||
)
|
||||
if is_available:
|
||||
logger.info("GPU memory available, restoring GPU mode")
|
||||
self._cpu_fallback_active = False
|
||||
self.use_gpu = True
|
||||
self.gpu_info.pop('cpu_fallback', None)
|
||||
self.gpu_info.pop('fallback_reason', None)
|
||||
elif self._memory_guard:
|
||||
is_available, stats = self._memory_guard.check_memory(
|
||||
required_mb=settings.structure_model_memory_mb
|
||||
)
|
||||
@@ -2204,6 +2302,81 @@ class OCRService:
|
||||
file_path, lang, detect_layout, confidence_threshold, output_dir
|
||||
)
|
||||
|
||||
@property
|
||||
def orchestrator(self) -> Optional['ProcessingOrchestrator']:
|
||||
"""Get the ProcessingOrchestrator instance (if available)."""
|
||||
return self._orchestrator
|
||||
|
||||
def process_with_orchestrator(
|
||||
self,
|
||||
file_path: Path,
|
||||
lang: str = 'ch',
|
||||
detect_layout: bool = True,
|
||||
confidence_threshold: Optional[float] = None,
|
||||
output_dir: Optional[Path] = None,
|
||||
force_track: Optional[str] = None,
|
||||
layout_model: Optional[str] = None,
|
||||
preprocessing_mode: Optional[PreprocessingModeEnum] = None,
|
||||
preprocessing_config: Optional[PreprocessingConfig] = None,
|
||||
table_detection_config: Optional[TableDetectionConfig] = None
|
||||
) -> Union[UnifiedDocument, Dict]:
|
||||
"""
|
||||
Process document using the ProcessingOrchestrator.
|
||||
|
||||
This method provides a cleaner separation of concerns by delegating
|
||||
to the orchestrator, which coordinates the processing pipelines.
|
||||
|
||||
Args:
|
||||
file_path: Path to document file
|
||||
lang: Language for OCR (if needed)
|
||||
detect_layout: Whether to perform layout analysis
|
||||
confidence_threshold: Minimum confidence threshold
|
||||
output_dir: Optional output directory
|
||||
force_track: Force specific track ("ocr" or "direct")
|
||||
layout_model: Layout detection model
|
||||
preprocessing_mode: Layout preprocessing mode
|
||||
preprocessing_config: Manual preprocessing config
|
||||
table_detection_config: Table detection config
|
||||
|
||||
Returns:
|
||||
UnifiedDocument with processed results
|
||||
"""
|
||||
if not self._orchestrator:
|
||||
logger.warning("ProcessingOrchestrator not available, falling back to legacy processing")
|
||||
return self.process_with_dual_track(
|
||||
file_path, lang, detect_layout, confidence_threshold, output_dir,
|
||||
force_track, layout_model, preprocessing_mode, preprocessing_config, table_detection_config
|
||||
)
|
||||
|
||||
# Build ProcessingConfig
|
||||
config = ProcessingConfig(
|
||||
detect_layout=detect_layout,
|
||||
confidence_threshold=confidence_threshold or self.confidence_threshold,
|
||||
output_dir=Path(output_dir) if output_dir else None,
|
||||
lang=lang,
|
||||
layout_model=layout_model or "default",
|
||||
preprocessing_mode=preprocessing_mode.value if preprocessing_mode else "auto",
|
||||
preprocessing_config=preprocessing_config.dict() if preprocessing_config else None,
|
||||
table_detection_config=table_detection_config.dict() if table_detection_config else None,
|
||||
force_track=force_track,
|
||||
use_dual_track=True
|
||||
)
|
||||
|
||||
# Process using orchestrator
|
||||
result = self._orchestrator.process(Path(file_path), config)
|
||||
|
||||
if result.success and result.document:
|
||||
return result.document
|
||||
elif result.legacy_result:
|
||||
return result.legacy_result
|
||||
else:
|
||||
logger.error(f"Orchestrator processing failed: {result.error}")
|
||||
# Fallback to legacy processing
|
||||
return self.process_with_dual_track(
|
||||
file_path, lang, detect_layout, confidence_threshold, output_dir,
|
||||
force_track, layout_model, preprocessing_mode, preprocessing_config, table_detection_config
|
||||
)
|
||||
|
||||
def get_track_recommendation(self, file_path: Path) -> Optional[ProcessingTrackRecommendation]:
|
||||
"""
|
||||
Get processing track recommendation for a file.
|
||||
|
||||
Reference in New Issue
Block a user