feat: implement hybrid image extraction and memory management

Backend: - Add hybrid image extraction for Direct track (inline image blocks) - Add render_inline_image_regions() fallback when OCR doesn't find images - Add check_document_for_missing_images() for detecting missing images - Add memory management system (MemoryGuard, ModelManager, ServicePool) - Update pdf_generator_service to handle HYBRID processing track - Add ElementType.LOGO for logo extraction Frontend: - Fix PDF viewer re-rendering issues with memoization - Add TaskNotFound component and useTaskValidation hook - Disable StrictMode due to react-pdf incompatibility - Fix task detail and results page loading states 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 10:56:22 +08:00
parent ba8ddf2b68
commit 1afdb822c3
26 changed files with 8273 additions and 366 deletions
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -25,6 +25,7 @@ except ImportError:

 from app.core.config import settings
 from app.services.office_converter import OfficeConverter, OfficeConverterError
+from app.services.memory_manager import get_model_manager, MemoryConfig, MemoryGuard, prediction_context

 # Import dual-track components
 try:
@@ -96,6 +97,26 @@ class OCRService:
        self._model_last_used = {}  # Track last usage time for each model
        self._memory_warning_logged = False

+        # Initialize MemoryGuard for enhanced memory monitoring
+        self._memory_guard = None
+        if settings.enable_model_lifecycle_management:
+            try:
+                memory_config = MemoryConfig(
+                    warning_threshold=settings.memory_warning_threshold,
+                    critical_threshold=settings.memory_critical_threshold,
+                    emergency_threshold=settings.memory_emergency_threshold,
+                    model_idle_timeout_seconds=settings.pp_structure_idle_timeout_seconds,
+                    gpu_memory_limit_mb=settings.gpu_memory_limit_mb,
+                    enable_cpu_fallback=settings.enable_cpu_fallback,
+                )
+                self._memory_guard = MemoryGuard(memory_config)
+                logger.debug("MemoryGuard initialized for OCRService")
+            except Exception as e:
+                logger.warning(f"Failed to initialize MemoryGuard: {e}")
+
+        # Track if CPU fallback was activated
+        self._cpu_fallback_active = False
+
        self._detect_and_configure_gpu()

        # Log GPU optimization settings
@@ -217,53 +238,91 @@ class OCRService:
    def _check_gpu_memory_usage(self):
        """
        Check GPU memory usage and log warnings if approaching limits.
-        Implements memory optimization for RTX 4060 8GB.
+        Uses MemoryGuard for enhanced monitoring with multiple backends.
        """
        if not self.use_gpu or not settings.enable_memory_optimization:
            return

        try:
-            device_id = self.gpu_info.get('device_id', 0)
-            memory_allocated = paddle.device.cuda.memory_allocated(device_id)
-            memory_allocated_mb = memory_allocated / (1024**2)
-            memory_limit_mb = settings.gpu_memory_limit_mb
+            # Use MemoryGuard if available for better monitoring
+            if self._memory_guard:
+                stats = self._memory_guard.get_memory_stats()

-            utilization = (memory_allocated_mb / memory_limit_mb * 100) if memory_limit_mb > 0 else 0
+                # Log based on usage ratio
+                if stats.gpu_used_ratio > 0.90 and not self._memory_warning_logged:
+                    logger.warning(
+                        f"GPU memory usage critical: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
+                        f"({stats.gpu_used_ratio*100:.1f}%)"
+                    )
+                    logger.warning("Consider enabling auto_unload_unused_models or reducing batch size")
+                    self._memory_warning_logged = True

-            if utilization > 90 and not self._memory_warning_logged:
-                logger.warning(f"GPU memory usage high: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")
-                logger.warning("Consider enabling auto_unload_unused_models or reducing batch size")
-                self._memory_warning_logged = True
-            elif utilization > 75:
-                logger.info(f"GPU memory: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")
+                    # Trigger emergency cleanup if enabled
+                    if settings.enable_emergency_cleanup:
+                        self._cleanup_unused_models()
+                        self._memory_guard.clear_gpu_cache()
+
+                elif stats.gpu_used_ratio > 0.75:
+                    logger.info(
+                        f"GPU memory: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
+                        f"({stats.gpu_used_ratio*100:.1f}%)"
+                    )
+            else:
+                # Fallback to original implementation
+                device_id = self.gpu_info.get('device_id', 0)
+                memory_allocated = paddle.device.cuda.memory_allocated(device_id)
+                memory_allocated_mb = memory_allocated / (1024**2)
+                memory_limit_mb = settings.gpu_memory_limit_mb
+
+                utilization = (memory_allocated_mb / memory_limit_mb * 100) if memory_limit_mb > 0 else 0
+
+                if utilization > 90 and not self._memory_warning_logged:
+                    logger.warning(f"GPU memory usage high: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")
+                    logger.warning("Consider enabling auto_unload_unused_models or reducing batch size")
+                    self._memory_warning_logged = True
+                elif utilization > 75:
+                    logger.info(f"GPU memory: {memory_allocated_mb:.0f}MB / {memory_limit_mb}MB ({utilization:.1f}%)")

        except Exception as e:
            logger.debug(f"Memory check failed: {e}")

    def _cleanup_unused_models(self):
        """
-        Clean up unused language models to free GPU memory.
+        Clean up unused models (including PP-StructureV3) to free GPU memory.
        Models idle longer than model_idle_timeout_seconds will be unloaded.
+
+        Note: PP-StructureV3 is NO LONGER exempted from cleanup - it will be
+        unloaded based on pp_structure_idle_timeout_seconds configuration.
        """
        if not settings.auto_unload_unused_models:
            return

        current_time = datetime.now()
-        timeout = settings.model_idle_timeout_seconds
        models_to_remove = []

        for lang, last_used in self._model_last_used.items():
-            if lang == 'structure':  # Don't unload structure engine
-                continue
+            # Use different timeout for structure engine vs language models
+            if lang == 'structure':
+                timeout = settings.pp_structure_idle_timeout_seconds
+            else:
+                timeout = settings.model_idle_timeout_seconds
+
            idle_seconds = (current_time - last_used).total_seconds()
            if idle_seconds > timeout:
                models_to_remove.append(lang)

-        for lang in models_to_remove:
-            if lang in self.ocr_engines:
-                logger.info(f"Unloading idle OCR engine for {lang} (idle {timeout}s)")
-                del self.ocr_engines[lang]
-                del self._model_last_used[lang]
+        for model_key in models_to_remove:
+            if model_key == 'structure':
+                if self.structure_engine is not None:
+                    logger.info(f"Unloading idle PP-StructureV3 engine (idle {settings.pp_structure_idle_timeout_seconds}s)")
+                    self._unload_structure_engine()
+                    if model_key in self._model_last_used:
+                        del self._model_last_used[model_key]
+            elif model_key in self.ocr_engines:
+                logger.info(f"Unloading idle OCR engine for {model_key} (idle {settings.model_idle_timeout_seconds}s)")
+                del self.ocr_engines[model_key]
+                if model_key in self._model_last_used:
+                    del self._model_last_used[model_key]

        if models_to_remove and self.use_gpu:
            # Clear CUDA cache
@@ -273,6 +332,41 @@ class OCRService:
            except Exception as e:
                logger.debug(f"Cache clear failed: {e}")

+    def _unload_structure_engine(self):
+        """
+        Properly unload PP-StructureV3 engine and free GPU memory.
+        """
+        if self.structure_engine is None:
+            return
+
+        try:
+            # Clear internal engine components
+            if hasattr(self.structure_engine, 'table_engine'):
+                self.structure_engine.table_engine = None
+            if hasattr(self.structure_engine, 'text_detector'):
+                self.structure_engine.text_detector = None
+            if hasattr(self.structure_engine, 'text_recognizer'):
+                self.structure_engine.text_recognizer = None
+            if hasattr(self.structure_engine, 'layout_predictor'):
+                self.structure_engine.layout_predictor = None
+
+            # Delete the engine
+            del self.structure_engine
+            self.structure_engine = None
+
+            # Force garbage collection
+            gc.collect()
+
+            # Clear GPU cache
+            if self.use_gpu:
+                paddle.device.cuda.empty_cache()
+
+            logger.info("PP-StructureV3 engine unloaded successfully")
+
+        except Exception as e:
+            logger.warning(f"Error unloading PP-StructureV3: {e}")
+            self.structure_engine = None
+
    def clear_gpu_cache(self):
        """
        Manually clear GPU memory cache.
@@ -519,46 +613,160 @@ class OCRService:
            logger.warning(f"GPU memory cleanup failed (non-critical): {e}")
            # Don't fail the processing if cleanup fails

-    def check_gpu_memory(self, required_mb: int = 2000) -> bool:
+    def check_gpu_memory(self, required_mb: int = 2000, enable_fallback: bool = True) -> bool:
        """
-        Check if sufficient GPU memory is available.
+        Check if sufficient GPU memory is available using MemoryGuard.
+
+        This method now uses MemoryGuard for accurate memory queries across
+        multiple backends (pynvml, torch, paddle) instead of returning True
+        blindly for PaddlePaddle-only environments.

        Args:
            required_mb: Required memory in MB (default 2000MB for OCR models)
+            enable_fallback: If True and CPU fallback is enabled, switch to CPU mode
+                           when memory is insufficient instead of returning False

        Returns:
-            True if sufficient memory is available or GPU is not used
+            True if sufficient memory is available, GPU is not used, or CPU fallback activated
        """
-        try:
-            # Check GPU memory using torch if available, otherwise use PaddlePaddle
-            free_memory = None
+        # If not using GPU, always return True
+        if not self.use_gpu:
+            return True

-            if TORCH_AVAILABLE and torch.cuda.is_available():
-                free_memory = torch.cuda.mem_get_info()[0] / 1024**2
-            elif paddle.device.is_compiled_with_cuda():
-                # PaddlePaddle doesn't have direct API to get free memory,
-                # so we rely on cleanup and continue
-                logger.debug("Using PaddlePaddle GPU, memory info not directly available")
+        try:
+            # Use MemoryGuard if available for accurate multi-backend memory queries
+            if self._memory_guard:
+                is_available, stats = self._memory_guard.check_memory(
+                    required_mb=required_mb,
+                    device_id=self.gpu_info.get('device_id', 0)
+                )
+
+                if not is_available:
+                    logger.warning(
+                        f"GPU memory check failed: {stats.gpu_free_mb:.0f}MB free, "
+                        f"{required_mb}MB required ({stats.gpu_used_ratio*100:.1f}% used)"
+                    )
+
+                    # Try to free memory
+                    logger.info("Attempting memory cleanup before retry...")
+                    self._cleanup_unused_models()
+                    self._memory_guard.clear_gpu_cache()
+
+                    # Check again
+                    is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
+
+                    if not is_available:
+                        # Memory still insufficient after cleanup
+                        if enable_fallback and settings.enable_cpu_fallback:
+                            logger.warning(
+                                f"Insufficient GPU memory ({stats.gpu_free_mb:.0f}MB) after cleanup. "
+                                f"Activating CPU fallback mode."
+                            )
+                            self._activate_cpu_fallback()
+                            return True  # Continue with CPU
+                        else:
+                            logger.error(
+                                f"Insufficient GPU memory: {stats.gpu_free_mb:.0f}MB available, "
+                                f"{required_mb}MB required"
+                            )
+                            return False
+
+                logger.debug(
+                    f"GPU memory check passed: {stats.gpu_free_mb:.0f}MB free "
+                    f"({stats.gpu_used_ratio*100:.1f}% used)"
+                )
                return True

-            if free_memory is not None:
-                if free_memory < required_mb:
-                    logger.warning(f"Low GPU memory: {free_memory:.0f}MB available, {required_mb}MB required")
-                    # Try to free memory
-                    self.cleanup_gpu_memory()
-                    # Check again
-                    if TORCH_AVAILABLE and torch.cuda.is_available():
-                        free_memory = torch.cuda.mem_get_info()[0] / 1024**2
-                        if free_memory < required_mb:
-                            logger.error(f"Insufficient GPU memory after cleanup: {free_memory:.0f}MB")
-                            return False
-                logger.debug(f"GPU memory check passed: {free_memory:.0f}MB available")
+            else:
+                # Fallback to original implementation
+                free_memory = None
+
+                if TORCH_AVAILABLE and torch.cuda.is_available():
+                    free_memory = torch.cuda.mem_get_info()[0] / 1024**2
+                elif paddle.device.is_compiled_with_cuda():
+                    # PaddlePaddle doesn't have direct API to get free memory,
+                    # use allocated memory to estimate
+                    device_id = self.gpu_info.get('device_id', 0)
+                    allocated = paddle.device.cuda.memory_allocated(device_id) / (1024**2)
+                    total = settings.gpu_memory_limit_mb
+                    free_memory = max(0, total - allocated)
+                    logger.debug(f"Estimated free GPU memory: {free_memory:.0f}MB (total: {total}MB, allocated: {allocated:.0f}MB)")
+
+                if free_memory is not None:
+                    if free_memory < required_mb:
+                        logger.warning(f"Low GPU memory: {free_memory:.0f}MB available, {required_mb}MB required")
+                        self.cleanup_gpu_memory()
+
+                        # Recheck
+                        if TORCH_AVAILABLE and torch.cuda.is_available():
+                            free_memory = torch.cuda.mem_get_info()[0] / 1024**2
+                        else:
+                            allocated = paddle.device.cuda.memory_allocated(device_id) / (1024**2)
+                            free_memory = max(0, total - allocated)
+
+                        if free_memory < required_mb:
+                            if enable_fallback and settings.enable_cpu_fallback:
+                                logger.warning(f"Insufficient GPU memory after cleanup. Activating CPU fallback.")
+                                self._activate_cpu_fallback()
+                                return True
+                            else:
+                                logger.error(f"Insufficient GPU memory after cleanup: {free_memory:.0f}MB")
+                                return False
+
+                    logger.debug(f"GPU memory check passed: {free_memory:.0f}MB available")
+
+                return True

-            return True
        except Exception as e:
            logger.warning(f"GPU memory check failed: {e}")
            return True  # Continue processing even if check fails

+    def _activate_cpu_fallback(self):
+        """
+        Activate CPU fallback mode when GPU memory is insufficient.
+        This disables GPU usage for the current service instance.
+        """
+        if self._cpu_fallback_active:
+            return  # Already in CPU mode
+
+        logger.warning("=== CPU FALLBACK MODE ACTIVATED ===")
+        logger.warning("GPU memory insufficient, switching to CPU processing")
+        logger.warning("Performance will be significantly reduced")
+
+        self._cpu_fallback_active = True
+        self.use_gpu = False
+
+        # Update GPU info to reflect fallback
+        self.gpu_info['cpu_fallback'] = True
+        self.gpu_info['fallback_reason'] = 'GPU memory insufficient'
+
+        # Clear GPU cache to free memory
+        if self._memory_guard:
+            self._memory_guard.clear_gpu_cache()
+
+    def _restore_gpu_mode(self):
+        """
+        Attempt to restore GPU mode after CPU fallback.
+        Called when memory pressure has been relieved.
+        """
+        if not self._cpu_fallback_active:
+            return
+
+        if not self.gpu_available:
+            return
+
+        # Check if GPU memory is now available
+        if self._memory_guard:
+            is_available, stats = self._memory_guard.check_memory(
+                required_mb=settings.structure_model_memory_mb
+            )
+            if is_available:
+                logger.info("GPU memory available, restoring GPU mode")
+                self._cpu_fallback_active = False
+                self.use_gpu = True
+                self.gpu_info.pop('cpu_fallback', None)
+                self.gpu_info.pop('fallback_reason', None)
+
    def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]:
        """
        Convert PDF to images (one per page)
@@ -626,6 +834,24 @@ class OCRService:
        threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold

        try:
+            # Pre-operation memory check: Try to restore GPU if in fallback and memory available
+            if self._cpu_fallback_active:
+                self._restore_gpu_mode()
+                if not self._cpu_fallback_active:
+                    logger.info("GPU mode restored for processing")
+
+            # Initial memory check before starting any heavy processing
+            # Estimate memory requirement based on image type
+            estimated_memory_mb = 2500  # Conservative estimate for full OCR + layout
+            if detect_layout:
+                estimated_memory_mb += 500  # Additional for PP-StructureV3
+
+            if not self.check_gpu_memory(required_mb=estimated_memory_mb, enable_fallback=True):
+                logger.warning(
+                    f"Pre-operation memory check failed ({estimated_memory_mb}MB required). "
+                    f"Processing will attempt to proceed but may encounter issues."
+                )
+
            # Check if file is Office document
            if self.office_converter.is_office_document(image_path):
                logger.info(f"Detected Office document: {image_path.name}, converting to PDF")
@@ -748,9 +974,12 @@ class OCRService:
            # Get OCR engine (for non-PDF images)
            ocr_engine = self.get_ocr_engine(lang)

-            # Check GPU memory before OCR processing
-            if not self.check_gpu_memory(required_mb=1500):
-                logger.warning("Insufficient GPU memory for OCR, attempting to proceed anyway")
+            # Secondary memory check before OCR processing
+            if not self.check_gpu_memory(required_mb=1500, enable_fallback=True):
+                logger.warning(
+                    f"OCR memory check: insufficient GPU memory (1500MB required). "
+                    f"Mode: {'CPU fallback' if self._cpu_fallback_active else 'GPU (low memory)'}"
+                )

            # Get the actual image dimensions that OCR will use
            from PIL import Image
@@ -950,6 +1179,18 @@ class OCRService:
            Tuple of (layout_data, images_metadata)
        """
        try:
+            # Pre-operation memory check for layout analysis
+            if self._cpu_fallback_active:
+                self._restore_gpu_mode()
+                if not self._cpu_fallback_active:
+                    logger.info("GPU mode restored for layout analysis")
+
+            if not self.check_gpu_memory(required_mb=2000, enable_fallback=True):
+                logger.warning(
+                    f"Layout analysis pre-check: insufficient GPU memory (2000MB required). "
+                    f"Mode: {'CPU fallback' if self._cpu_fallback_active else 'GPU'}"
+                )
+
            structure_engine = self._ensure_structure_engine(pp_structure_params)

            # Try enhanced processing first
@@ -998,11 +1239,21 @@ class OCRService:
            # Standard processing (original implementation)
            logger.info(f"Running standard layout analysis on {image_path.name}")

-            # Check GPU memory before processing
-            if not self.check_gpu_memory(required_mb=2000):
-                logger.warning("Insufficient GPU memory for PP-StructureV3, attempting to proceed anyway")
+            # Memory check before PP-StructureV3 processing
+            if not self.check_gpu_memory(required_mb=2000, enable_fallback=True):
+                logger.warning(
+                    f"PP-StructureV3 memory check: insufficient GPU memory (2000MB required). "
+                    f"Mode: {'CPU fallback' if self._cpu_fallback_active else 'GPU (low memory)'}"
+                )

-            results = structure_engine.predict(str(image_path))
+            # Use prediction semaphore to control concurrent predictions
+            # This prevents OOM errors from multiple simultaneous PP-StructureV3.predict() calls
+            with prediction_context(timeout=settings.service_acquire_timeout_seconds) as acquired:
+                if not acquired:
+                    logger.error("Failed to acquire prediction slot (timeout), returning empty layout")
+                    return None, []
+
+                results = structure_engine.predict(str(image_path))

            layout_elements = []
            images_metadata = []
@@ -1254,6 +1505,46 @@ class OCRService:
                    if temp_pdf_path:
                        unified_doc.metadata.original_filename = file_path.name

+                    # HYBRID MODE: Check if Direct track missed images (e.g., inline image blocks)
+                    # If so, use OCR to extract images and merge them into the Direct result
+                    pages_with_missing_images = self.direct_extraction_engine.check_document_for_missing_images(
+                        actual_file_path
+                    )
+                    if pages_with_missing_images:
+                        logger.info(f"Hybrid mode: Direct track missing images on pages {pages_with_missing_images}, using OCR to extract images")
+                        try:
+                            # Run OCR on the file to extract images
+                            ocr_result = self.process_file_traditional(
+                                actual_file_path, lang, detect_layout=True,
+                                confidence_threshold=confidence_threshold,
+                                output_dir=output_dir, pp_structure_params=pp_structure_params
+                            )
+
+                            # Convert OCR result to extract images
+                            ocr_unified = self.ocr_to_unified_converter.convert(
+                                ocr_result, actual_file_path, 0.0, lang
+                            )
+
+                            # Merge OCR-extracted images into Direct track result
+                            images_added = self._merge_ocr_images_into_direct(
+                                unified_doc, ocr_unified, pages_with_missing_images
+                            )
+                            if images_added > 0:
+                                logger.info(f"Hybrid mode: Added {images_added} images from OCR to Direct track result")
+                                unified_doc.metadata.processing_track = ProcessingTrack.HYBRID
+                            else:
+                                # Fallback: OCR didn't find images either, render inline image blocks directly
+                                logger.info("Hybrid mode: OCR didn't find images, falling back to inline image rendering")
+                                images_added = self.direct_extraction_engine.render_inline_image_regions(
+                                    actual_file_path, unified_doc, pages_with_missing_images, output_dir
+                                )
+                                if images_added > 0:
+                                    logger.info(f"Hybrid mode: Rendered {images_added} inline image regions")
+                                    unified_doc.metadata.processing_track = ProcessingTrack.HYBRID
+                        except Exception as e:
+                            logger.warning(f"Hybrid mode image extraction failed: {e}")
+                            # Continue with Direct track result without images
+
            # Use OCR track (either by recommendation or fallback)
            if recommendation.track == "ocr":
                # Use OCR for scanned documents, images, etc.
@@ -1269,17 +1560,19 @@ class OCRService:
                )
                unified_doc.document_id = document_id

-            # Update processing track metadata
-            unified_doc.metadata.processing_track = (
-                ProcessingTrack.DIRECT if recommendation.track == "direct"
-                else ProcessingTrack.OCR
-            )
+            # Update processing track metadata (only if not already set to HYBRID)
+            if unified_doc.metadata.processing_track != ProcessingTrack.HYBRID:
+                unified_doc.metadata.processing_track = (
+                    ProcessingTrack.DIRECT if recommendation.track == "direct"
+                    else ProcessingTrack.OCR
+                )

            # Calculate total processing time
            processing_time = (datetime.now() - start_time).total_seconds()
            unified_doc.metadata.processing_time = processing_time

-            logger.info(f"Document processing completed in {processing_time:.2f}s using {recommendation.track} track")
+            actual_track = unified_doc.metadata.processing_track.value
+            logger.info(f"Document processing completed in {processing_time:.2f}s using {actual_track} track")

            return unified_doc

@@ -1290,6 +1583,75 @@ class OCRService:
                file_path, lang, detect_layout, confidence_threshold, output_dir, pp_structure_params
            )

+    def _merge_ocr_images_into_direct(
+        self,
+        direct_doc: 'UnifiedDocument',
+        ocr_doc: 'UnifiedDocument',
+        pages_with_missing_images: List[int]
+    ) -> int:
+        """
+        Merge OCR-extracted images into Direct track result.
+
+        This is used in hybrid mode when Direct track couldn't extract certain
+        images (like logos composed of inline image blocks).
+
+        Args:
+            direct_doc: UnifiedDocument from Direct track
+            ocr_doc: UnifiedDocument from OCR track
+            pages_with_missing_images: List of page numbers (1-indexed) that need images
+
+        Returns:
+            Number of images added
+        """
+        images_added = 0
+
+        try:
+            # Get image element types to look for
+            image_types = {ElementType.FIGURE, ElementType.IMAGE, ElementType.LOGO}
+
+            for page_num in pages_with_missing_images:
+                # Find the target page in direct_doc
+                direct_page = None
+                for page in direct_doc.pages:
+                    if page.page_number == page_num:
+                        direct_page = page
+                        break
+
+                if not direct_page:
+                    continue
+
+                # Find the source page in ocr_doc
+                ocr_page = None
+                for page in ocr_doc.pages:
+                    if page.page_number == page_num:
+                        ocr_page = page
+                        break
+
+                if not ocr_page:
+                    continue
+
+                # Extract image elements from OCR page
+                for element in ocr_page.elements:
+                    if element.type in image_types:
+                        # Assign new element ID to avoid conflicts
+                        new_element_id = f"hybrid_{element.element_id}"
+                        element.element_id = new_element_id
+
+                        # Add to direct page
+                        direct_page.elements.append(element)
+                        images_added += 1
+                        logger.debug(f"Added image element {new_element_id} to page {page_num}")
+
+            # Update image count in direct_doc metadata
+            if images_added > 0:
+                current_images = direct_doc.metadata.total_images or 0
+                direct_doc.metadata.total_images = current_images + images_added
+
+        except Exception as e:
+            logger.error(f"Error merging OCR images into Direct track: {e}")
+
+        return images_added
+
    def process_file_traditional(
        self,
        file_path: Path,
@@ -1441,13 +1803,16 @@ class OCRService:
            UnifiedDocument if dual-track is enabled and use_dual_track=True,
            Dict with legacy format otherwise
        """
-        if use_dual_track and self.dual_track_enabled:
-            # Use dual-track processing
+        # Use dual-track processing if:
+        # 1. use_dual_track is True (auto-detection), OR
+        # 2. force_track is specified (explicit track selection)
+        if (use_dual_track or force_track) and self.dual_track_enabled:
+            # Use dual-track processing (or forced track)
            return self.process_with_dual_track(
                file_path, lang, detect_layout, confidence_threshold, output_dir, force_track, pp_structure_params
            )
        else:
-            # Use traditional OCR processing
+            # Use traditional OCR processing (no force_track support)
            return self.process_file_traditional(
                file_path, lang, detect_layout, confidence_threshold, output_dir, pp_structure_params
            )