feat: refactor dual-track architecture (Phase 1-5)

## Backend Changes
- **Service Layer Refactoring**:
  - Add ProcessingOrchestrator for unified document processing
  - Add PDFTableRenderer for table rendering extraction
  - Add PDFFontManager for font management with CJK support
  - Add MemoryPolicyEngine (73% code reduction from MemoryGuard)

- **Bug Fixes**:
  - Fix Direct Track table row span calculation
  - Fix OCR Track image path handling
  - Add cell_boxes coordinate validation
  - Filter out small decorative images
  - Add covering image detection

## Frontend Changes
- **State Management**:
  - Add TaskStore for centralized task state management
  - Add localStorage persistence for recent tasks
  - Add processing state tracking

- **Type Consolidation**:
  - Merge shared types from api.ts to apiV2.ts
  - Update imports in authStore, uploadStore, ResultsTable, SettingsPage

- **Page Integration**:
  - Integrate TaskStore in ProcessingPage and TaskDetailPage
  - Update useTaskValidation hook with cache sync

## Testing
- Direct Track: edit.pdf (3 pages, 1.281s), edit3.pdf (2 pages, 0.203s)
- Cell boxes validation: 43 valid, 0 invalid
- Table merging: 12 merged cells verified

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-12-07 07:18:27 +08:00
parent 8265be1741
commit eff9b0bcd5
19 changed files with 3637 additions and 173 deletions

View File

@@ -26,6 +26,10 @@ except ImportError:
from app.core.config import settings
from app.services.office_converter import OfficeConverter, OfficeConverterError
from app.services.memory_manager import get_model_manager, MemoryConfig, MemoryGuard, prediction_context
from app.services.memory_policy_engine import (
MemoryPolicyEngine, MemoryPolicyConfig, get_memory_policy_engine,
prediction_context as new_prediction_context
)
from app.services.layout_preprocessing_service import (
get_layout_preprocessing_service,
LayoutPreprocessingService,
@@ -38,6 +42,9 @@ try:
from app.services.direct_extraction_engine import DirectExtractionEngine
from app.services.ocr_to_unified_converter import OCRToUnifiedConverter
from app.services.unified_document_exporter import UnifiedDocumentExporter
from app.services.processing_orchestrator import (
ProcessingOrchestrator, ProcessingConfig, ProcessingResult
)
from app.models.unified_document import (
UnifiedDocument, DocumentMetadata,
ProcessingTrack, ElementType, DocumentElement, Page, Dimensions,
@@ -48,6 +55,7 @@ except ImportError as e:
logging.getLogger(__name__).warning(f"Dual-track components not available: {e}")
DUAL_TRACK_AVAILABLE = False
UnifiedDocumentExporter = None
ProcessingOrchestrator = None
logger = logging.getLogger(__name__)
@@ -98,11 +106,16 @@ class OCRService:
)
self.ocr_to_unified_converter = OCRToUnifiedConverter()
self.dual_track_enabled = True
logger.info("Dual-track processing enabled")
# Initialize ProcessingOrchestrator for cleaner flow control
self._orchestrator = ProcessingOrchestrator()
self._orchestrator.set_ocr_service(self) # Dependency injection
logger.info("Dual-track processing enabled (with ProcessingOrchestrator)")
else:
self.document_detector = None
self.direct_extraction_engine = None
self.ocr_to_unified_converter = None
self._orchestrator = None
self.dual_track_enabled = False
logger.info("Dual-track processing not available, using OCR-only mode")
@@ -115,22 +128,39 @@ class OCRService:
self._model_last_used = {} # Track last usage time for each model
self._memory_warning_logged = False
# Initialize MemoryGuard for enhanced memory monitoring
# Initialize memory management (use new MemoryPolicyEngine)
self._memory_guard = None
self._memory_policy_engine = None
if settings.enable_model_lifecycle_management:
try:
memory_config = MemoryConfig(
# Use new MemoryPolicyEngine (simplified, consolidated)
policy_config = MemoryPolicyConfig(
warning_threshold=settings.memory_warning_threshold,
critical_threshold=settings.memory_critical_threshold,
emergency_threshold=settings.memory_emergency_threshold,
model_idle_timeout_seconds=settings.pp_structure_idle_timeout_seconds,
gpu_memory_limit_mb=settings.gpu_memory_limit_mb,
enable_cpu_fallback=settings.enable_cpu_fallback,
max_concurrent_predictions=2,
prediction_timeout_seconds=settings.service_acquire_timeout_seconds,
)
self._memory_guard = MemoryGuard(memory_config)
logger.debug("MemoryGuard initialized for OCRService")
self._memory_policy_engine = get_memory_policy_engine(policy_config)
logger.info("MemoryPolicyEngine initialized for OCRService")
except Exception as e:
logger.warning(f"Failed to initialize MemoryGuard: {e}")
logger.warning(f"Failed to initialize MemoryPolicyEngine: {e}")
# Fallback to legacy MemoryGuard
try:
memory_config = MemoryConfig(
warning_threshold=settings.memory_warning_threshold,
critical_threshold=settings.memory_critical_threshold,
emergency_threshold=settings.memory_emergency_threshold,
model_idle_timeout_seconds=settings.pp_structure_idle_timeout_seconds,
gpu_memory_limit_mb=settings.gpu_memory_limit_mb,
enable_cpu_fallback=settings.enable_cpu_fallback,
)
self._memory_guard = MemoryGuard(memory_config)
logger.debug("Fallback: MemoryGuard initialized for OCRService")
except Exception as e2:
logger.warning(f"Failed to initialize MemoryGuard fallback: {e2}")
# Track if CPU fallback was activated
self._cpu_fallback_active = False
@@ -262,9 +292,9 @@ class OCRService:
return
try:
# Use MemoryGuard if available for better monitoring
if self._memory_guard:
stats = self._memory_guard.get_memory_stats()
# Use MemoryPolicyEngine (preferred) or MemoryGuard for monitoring
if self._memory_policy_engine:
stats = self._memory_policy_engine.get_memory_stats()
# Log based on usage ratio
if stats.gpu_used_ratio > 0.90 and not self._memory_warning_logged:
@@ -278,15 +308,33 @@ class OCRService:
# Trigger emergency cleanup if enabled
if settings.enable_emergency_cleanup:
self._cleanup_unused_models()
self._memory_guard.clear_gpu_cache()
self._memory_policy_engine.clear_cache()
elif stats.gpu_used_ratio > 0.75:
logger.info(
f"GPU memory: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
f"({stats.gpu_used_ratio*100:.1f}%)"
)
elif self._memory_guard:
# Fallback to legacy MemoryGuard
stats = self._memory_guard.get_memory_stats()
if stats.gpu_used_ratio > 0.90 and not self._memory_warning_logged:
logger.warning(
f"GPU memory usage critical: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
f"({stats.gpu_used_ratio*100:.1f}%)"
)
self._memory_warning_logged = True
if settings.enable_emergency_cleanup:
self._cleanup_unused_models()
self._memory_guard.clear_gpu_cache()
elif stats.gpu_used_ratio > 0.75:
logger.info(
f"GPU memory: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
f"({stats.gpu_used_ratio*100:.1f}%)"
)
else:
# Fallback to original implementation
# No memory monitoring available - use direct paddle query
device_id = self.gpu_info.get('device_id', 0)
memory_allocated = paddle.device.cuda.memory_allocated(device_id)
memory_allocated_mb = memory_allocated / (1024**2)
@@ -296,7 +344,6 @@ class OCRService:
if utilization > 90 and not self._memory_warning_logged:
logger.warning(f"GPU memory usage high: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")
logger.warning("Consider enabling auto_unload_unused_models or reducing batch size")
self._memory_warning_logged = True
elif utilization > 75:
logger.info(f"GPU memory: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")
@@ -830,8 +877,50 @@ class OCRService:
return True
try:
# Use MemoryGuard if available for accurate multi-backend memory queries
if self._memory_guard:
# Use MemoryPolicyEngine (preferred) or MemoryGuard for memory checks
if self._memory_policy_engine:
is_available, msg = self._memory_policy_engine.check_memory(required_mb)
if not is_available:
stats = self._memory_policy_engine.get_memory_stats()
logger.warning(
f"GPU memory check failed: {stats.gpu_free_mb:.0f}MB free, "
f"{required_mb}MB required ({stats.gpu_used_ratio*100:.1f}% used)"
)
# Try to free memory
logger.info("Attempting memory cleanup before retry...")
self._cleanup_unused_models()
self._memory_policy_engine.clear_cache()
# Check again
is_available, msg = self._memory_policy_engine.check_memory(required_mb)
if not is_available:
stats = self._memory_policy_engine.get_memory_stats()
if enable_fallback and settings.enable_cpu_fallback:
logger.warning(
f"Insufficient GPU memory ({stats.gpu_free_mb:.0f}MB) after cleanup. "
f"Activating CPU fallback mode."
)
self._activate_cpu_fallback()
return True
else:
logger.error(
f"Insufficient GPU memory: {stats.gpu_free_mb:.0f}MB available, "
f"{required_mb}MB required"
)
return False
stats = self._memory_policy_engine.get_memory_stats()
logger.debug(
f"GPU memory check passed: {stats.gpu_free_mb:.0f}MB free "
f"({stats.gpu_used_ratio*100:.1f}% used)"
)
return True
elif self._memory_guard:
# Fallback to legacy MemoryGuard
is_available, stats = self._memory_guard.check_memory(
required_mb=required_mb,
device_id=self.gpu_info.get('device_id', 0)
@@ -843,23 +932,20 @@ class OCRService:
f"{required_mb}MB required ({stats.gpu_used_ratio*100:.1f}% used)"
)
# Try to free memory
logger.info("Attempting memory cleanup before retry...")
self._cleanup_unused_models()
self._memory_guard.clear_gpu_cache()
# Check again
is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
if not is_available:
# Memory still insufficient after cleanup
if enable_fallback and settings.enable_cpu_fallback:
logger.warning(
f"Insufficient GPU memory ({stats.gpu_free_mb:.0f}MB) after cleanup. "
f"Activating CPU fallback mode."
)
self._activate_cpu_fallback()
return True # Continue with CPU
return True
else:
logger.error(
f"Insufficient GPU memory: {stats.gpu_free_mb:.0f}MB available, "
@@ -937,7 +1023,9 @@ class OCRService:
self.gpu_info['fallback_reason'] = 'GPU memory insufficient'
# Clear GPU cache to free memory
if self._memory_guard:
if self._memory_policy_engine:
self._memory_policy_engine.clear_cache()
elif self._memory_guard:
self._memory_guard.clear_gpu_cache()
def _restore_gpu_mode(self):
@@ -952,7 +1040,17 @@ class OCRService:
return
# Check if GPU memory is now available
if self._memory_guard:
if self._memory_policy_engine:
is_available, msg = self._memory_policy_engine.check_memory(
settings.structure_model_memory_mb
)
if is_available:
logger.info("GPU memory available, restoring GPU mode")
self._cpu_fallback_active = False
self.use_gpu = True
self.gpu_info.pop('cpu_fallback', None)
self.gpu_info.pop('fallback_reason', None)
elif self._memory_guard:
is_available, stats = self._memory_guard.check_memory(
required_mb=settings.structure_model_memory_mb
)
@@ -2204,6 +2302,81 @@ class OCRService:
file_path, lang, detect_layout, confidence_threshold, output_dir
)
@property
def orchestrator(self) -> Optional['ProcessingOrchestrator']:
"""Get the ProcessingOrchestrator instance (if available)."""
return self._orchestrator
def process_with_orchestrator(
self,
file_path: Path,
lang: str = 'ch',
detect_layout: bool = True,
confidence_threshold: Optional[float] = None,
output_dir: Optional[Path] = None,
force_track: Optional[str] = None,
layout_model: Optional[str] = None,
preprocessing_mode: Optional[PreprocessingModeEnum] = None,
preprocessing_config: Optional[PreprocessingConfig] = None,
table_detection_config: Optional[TableDetectionConfig] = None
) -> Union[UnifiedDocument, Dict]:
"""
Process document using the ProcessingOrchestrator.
This method provides a cleaner separation of concerns by delegating
to the orchestrator, which coordinates the processing pipelines.
Args:
file_path: Path to document file
lang: Language for OCR (if needed)
detect_layout: Whether to perform layout analysis
confidence_threshold: Minimum confidence threshold
output_dir: Optional output directory
force_track: Force specific track ("ocr" or "direct")
layout_model: Layout detection model
preprocessing_mode: Layout preprocessing mode
preprocessing_config: Manual preprocessing config
table_detection_config: Table detection config
Returns:
UnifiedDocument with processed results
"""
if not self._orchestrator:
logger.warning("ProcessingOrchestrator not available, falling back to legacy processing")
return self.process_with_dual_track(
file_path, lang, detect_layout, confidence_threshold, output_dir,
force_track, layout_model, preprocessing_mode, preprocessing_config, table_detection_config
)
# Build ProcessingConfig
config = ProcessingConfig(
detect_layout=detect_layout,
confidence_threshold=confidence_threshold or self.confidence_threshold,
output_dir=Path(output_dir) if output_dir else None,
lang=lang,
layout_model=layout_model or "default",
preprocessing_mode=preprocessing_mode.value if preprocessing_mode else "auto",
preprocessing_config=preprocessing_config.dict() if preprocessing_config else None,
table_detection_config=table_detection_config.dict() if table_detection_config else None,
force_track=force_track,
use_dual_track=True
)
# Process using orchestrator
result = self._orchestrator.process(Path(file_path), config)
if result.success and result.document:
return result.document
elif result.legacy_result:
return result.legacy_result
else:
logger.error(f"Orchestrator processing failed: {result.error}")
# Fallback to legacy processing
return self.process_with_dual_track(
file_path, lang, detect_layout, confidence_threshold, output_dir,
force_track, layout_model, preprocessing_mode, preprocessing_config, table_detection_config
)
def get_track_recommendation(self, file_path: Path) -> Optional[ProcessingTrackRecommendation]:
"""
Get processing track recommendation for a file.