feat: implement hybrid image extraction and memory management

Backend: - Add hybrid image extraction for Direct track (inline image blocks) - Add render_inline_image_regions() fallback when OCR doesn't find images - Add check_document_for_missing_images() for detecting missing images - Add memory management system (MemoryGuard, ModelManager, ServicePool) - Update pdf_generator_service to handle HYBRID processing track - Add ElementType.LOGO for logo extraction Frontend: - Fix PDF viewer re-rendering issues with memoization - Add TaskNotFound component and useTaskValidation hook - Disable StrictMode due to react-pdf incompatibility - Fix task detail and results page loading states 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 10:56:22 +08:00
parent ba8ddf2b68
commit 1afdb822c3
26 changed files with 8273 additions and 366 deletions
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -104,6 +104,37 @@ class Settings(BaseSettings):
    enable_cudnn_benchmark: bool = Field(default=True)  # Optimize convolution algorithms
    num_threads: int = Field(default=4)  # CPU threads for preprocessing
    # ===== Enhanced Memory Management Configuration =====
    # Memory thresholds (as ratio of total GPU memory)
    memory_warning_threshold: float = Field(default=0.80)  # 80% - start warning
    memory_critical_threshold: float = Field(default=0.95)  # 95% - throttle operations
    memory_emergency_threshold: float = Field(default=0.98)  # 98% - emergency cleanup
    # Memory monitoring
    memory_check_interval_seconds: int = Field(default=30)  # Background check interval
    enable_memory_alerts: bool = Field(default=True)  # Enable memory alerts
    # Model lifecycle management
    enable_model_lifecycle_management: bool = Field(default=True)  # Use ModelManager
    pp_structure_idle_timeout_seconds: int = Field(default=300)  # Unload PP-Structure after idle
    structure_model_memory_mb: int = Field(default=2000)  # Estimated memory for PP-StructureV3
    ocr_model_memory_mb: int = Field(default=500)  # Estimated memory per OCR language model
    # Service pool configuration
    enable_service_pool: bool = Field(default=True)  # Use OCRServicePool
    max_services_per_device: int = Field(default=1)  # Max OCRService per GPU
    max_total_services: int = Field(default=2)  # Max total OCRService instances
    service_acquire_timeout_seconds: float = Field(default=300.0)  # Timeout for acquiring service
    max_queue_size: int = Field(default=50)  # Max pending tasks per device
    # Concurrency control
    max_concurrent_predictions: int = Field(default=2)  # Max concurrent PP-StructureV3 predictions
    enable_cpu_fallback: bool = Field(default=True)  # Fall back to CPU when GPU memory low
    # Emergency recovery
    enable_emergency_cleanup: bool = Field(default=True)  # Auto-cleanup on memory pressure
    enable_worker_restart: bool = Field(default=False)  # Restart workers on OOM (requires supervisor)
    # ===== File Upload Configuration =====
    max_upload_size: int = Field(default=52428800)  # 50MB
    allowed_extensions: str = Field(default="png,jpg,jpeg,pdf,bmp,tiff,doc,docx,ppt,pptx")
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -7,10 +7,103 @@ from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from contextlib import asynccontextmanager
 import logging
 import signal
 import sys
 import asyncio
 from pathlib import Path
 from typing import Optional
 from app.core.config import settings
 # =============================================================================
 # Section 6.1: Signal Handlers
 # =============================================================================
 # Flag to indicate graceful shutdown is in progress
 _shutdown_requested = False
 _shutdown_complete = asyncio.Event()
 # Track active connections for draining
 _active_connections = 0
 _connection_lock = asyncio.Lock()
 async def increment_connections():
    """Track active connection count"""
    global _active_connections
    async with _connection_lock:
        _active_connections += 1
 async def decrement_connections():
    """Track active connection count"""
    global _active_connections
    async with _connection_lock:
        _active_connections -= 1
 def get_active_connections() -> int:
    """Get current active connection count"""
    return _active_connections
 def is_shutdown_requested() -> bool:
    """Check if graceful shutdown has been requested"""
    return _shutdown_requested
 def _signal_handler(signum: int, frame) -> None:
    """
    Signal handler for SIGTERM and SIGINT.
    Initiates graceful shutdown by setting the shutdown flag.
    The actual cleanup is handled by the lifespan context manager.
    """
    global _shutdown_requested
    signal_name = signal.Signals(signum).name
    logger = logging.getLogger(__name__)
    if _shutdown_requested:
        logger.warning(f"Received {signal_name} again, forcing immediate exit...")
        sys.exit(1)
    logger.info(f"Received {signal_name}, initiating graceful shutdown...")
    _shutdown_requested = True
    # Try to stop the event loop gracefully
    try:
        loop = asyncio.get_event_loop()
        if loop.is_running():
            # Schedule shutdown event
            loop.call_soon_threadsafe(_shutdown_complete.set)
    except RuntimeError:
        pass  # No event loop running
 def setup_signal_handlers():
    """
    Set up signal handlers for graceful shutdown.
    Handles:
    - SIGTERM: Standard termination signal (from systemd, docker, etc.)
    - SIGINT: Keyboard interrupt (Ctrl+C)
    """
    logger = logging.getLogger(__name__)
    try:
        # SIGTERM - Standard termination signal
        signal.signal(signal.SIGTERM, _signal_handler)
        logger.info("SIGTERM handler installed")
        # SIGINT - Keyboard interrupt
        signal.signal(signal.SIGINT, _signal_handler)
        logger.info("SIGINT handler installed")
    except (ValueError, OSError) as e:
        # Signal handling may not be available in all contexts
        logger.warning(f"Could not install signal handlers: {e}")
 # Ensure log directory exists before configuring logging
 Path(settings.log_file).parent.mkdir(parents=True, exist_ok=True)
@@ -38,16 +131,91 @@ for logger_name in ['app', 'app.services', 'app.services.pdf_generator_service',
 logger = logging.getLogger(__name__)
 async def drain_connections(timeout: float = 30.0):
    """
    Wait for active connections to complete (connection draining).
    Args:
        timeout: Maximum time to wait for connections to drain
    """
    logger.info(f"Draining connections (timeout={timeout}s)...")
    start_time = asyncio.get_event_loop().time()
    while get_active_connections() > 0:
        elapsed = asyncio.get_event_loop().time() - start_time
        if elapsed >= timeout:
            logger.warning(
                f"Connection drain timeout after {timeout}s. "
                f"{get_active_connections()} connections still active."
            )
            break
        logger.info(f"Waiting for {get_active_connections()} active connections...")
        await asyncio.sleep(1.0)
    if get_active_connections() == 0:
        logger.info("All connections drained successfully")
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Application lifespan events"""
    # Startup
    logger.info("Starting Tool_OCR V2 application...")
    # Set up signal handlers for graceful shutdown
    setup_signal_handlers()
    # Ensure all directories exist
    settings.ensure_directories()
    logger.info("All directories created/verified")
    # Initialize memory management if enabled
    if settings.enable_model_lifecycle_management:
        try:
            from app.services.memory_manager import get_model_manager, MemoryConfig
            memory_config = MemoryConfig(
                warning_threshold=settings.memory_warning_threshold,
                critical_threshold=settings.memory_critical_threshold,
                emergency_threshold=settings.memory_emergency_threshold,
                model_idle_timeout_seconds=settings.pp_structure_idle_timeout_seconds,
                memory_check_interval_seconds=settings.memory_check_interval_seconds,
                enable_auto_cleanup=settings.enable_memory_optimization,
                enable_emergency_cleanup=settings.enable_emergency_cleanup,
                max_concurrent_predictions=settings.max_concurrent_predictions,
                enable_cpu_fallback=settings.enable_cpu_fallback,
                gpu_memory_limit_mb=settings.gpu_memory_limit_mb,
            )
            get_model_manager(memory_config)
            logger.info("Memory management initialized")
        except Exception as e:
            logger.warning(f"Failed to initialize memory management: {e}")
    # Initialize service pool if enabled
    if settings.enable_service_pool:
        try:
            from app.services.service_pool import get_service_pool, PoolConfig
            pool_config = PoolConfig(
                max_services_per_device=settings.max_services_per_device,
                max_total_services=settings.max_total_services,
                acquire_timeout_seconds=settings.service_acquire_timeout_seconds,
                max_queue_size=settings.max_queue_size,
            )
            get_service_pool(pool_config)
            logger.info("OCR service pool initialized")
        except Exception as e:
            logger.warning(f"Failed to initialize service pool: {e}")
    # Initialize prediction semaphore for controlling concurrent PP-StructureV3 predictions
    try:
        from app.services.memory_manager import get_prediction_semaphore
        get_prediction_semaphore(max_concurrent=settings.max_concurrent_predictions)
        logger.info(f"Prediction semaphore initialized (max_concurrent={settings.max_concurrent_predictions})")
    except Exception as e:
        logger.warning(f"Failed to initialize prediction semaphore: {e}")
    logger.info("Application startup complete")
    yield
@@ -55,6 +223,45 @@ async def lifespan(app: FastAPI):
    # Shutdown
    logger.info("Shutting down Tool_OCR application...")
    # Connection draining - wait for active requests to complete
    await drain_connections(timeout=30.0)
    # Shutdown recovery manager if initialized
    try:
        from app.services.memory_manager import shutdown_recovery_manager
        shutdown_recovery_manager()
        logger.info("Recovery manager shutdown complete")
    except Exception as e:
        logger.debug(f"Recovery manager shutdown skipped: {e}")
    # Shutdown service pool
    if settings.enable_service_pool:
        try:
            from app.services.service_pool import shutdown_service_pool
            shutdown_service_pool()
            logger.info("Service pool shutdown complete")
        except Exception as e:
            logger.warning(f"Error shutting down service pool: {e}")
    # Shutdown prediction semaphore
    try:
        from app.services.memory_manager import shutdown_prediction_semaphore
        shutdown_prediction_semaphore()
        logger.info("Prediction semaphore shutdown complete")
    except Exception as e:
        logger.warning(f"Error shutting down prediction semaphore: {e}")
    # Shutdown memory manager
    if settings.enable_model_lifecycle_management:
        try:
            from app.services.memory_manager import shutdown_model_manager
            shutdown_model_manager()
            logger.info("Memory manager shutdown complete")
        except Exception as e:
            logger.warning(f"Error shutting down memory manager: {e}")
    logger.info("Tool_OCR shutdown complete")
 # Create FastAPI application
 app = FastAPI(
@@ -77,9 +284,7 @@ app.add_middleware(
 # Health check endpoint
@app.get("/health")
 async def health_check():
-    """Health check endpoint with GPU status"""
+    """Health check endpoint with GPU status and memory management info"""
    from app.services.ocr_service import OCRService
    response = {
        "status": "healthy",
        "service": "Tool_OCR V2",
@@ -88,8 +293,29 @@ async def health_check():
    # Add GPU status information
    try:
-        # Create temporary OCRService instance to get GPU status
+        # Use service pool if available to avoid creating new instances
-        # In production, this should be a singleton service
+        gpu_status = None
        if settings.enable_service_pool:
            try:
                from app.services.service_pool import get_service_pool
                pool = get_service_pool()
                pool_stats = pool.get_pool_stats()
                response["service_pool"] = pool_stats
                # Get GPU status from first available service
                for device, services in pool.services.items():
                    for pooled in services:
                        if hasattr(pooled.service, 'get_gpu_status'):
                            gpu_status = pooled.service.get_gpu_status()
                            break
                    if gpu_status:
                        break
            except Exception as e:
                logger.debug(f"Could not get service pool stats: {e}")
        # Fallback: create temporary instance if no pool or no service available
        if gpu_status is None:
            from app.services.ocr_service import OCRService
            ocr_service = OCRService()
            gpu_status = ocr_service.get_gpu_status()
@@ -120,6 +346,15 @@ async def health_check():
            "error": str(e),
        }
    # Add memory management status
    if settings.enable_model_lifecycle_management:
        try:
            from app.services.memory_manager import get_model_manager
            model_manager = get_model_manager()
            response["memory_management"] = model_manager.get_model_stats()
        except Exception as e:
            logger.debug(f"Could not get memory management stats: {e}")
    return response
--- a/backend/app/models/unified_document.py
+++ b/backend/app/models/unified_document.py
@@ -212,26 +212,44 @@ class TableData:
        if self.caption:
            html.append(f"<caption>{self.caption}</caption>")
-        # Group cells by row
+        # Group cells by row and column for quick lookup
-        rows_data = {}
+        cell_map = {}
        for cell in self.cells:
-            if cell.row not in rows_data:
+            cell_map[(cell.row, cell.col)] = cell
                rows_data[cell.row] = []
            rows_data[cell.row].append(cell)
-        # Generate HTML
+        # Track which cells are covered by row/col spans
        covered = set()
        for cell in self.cells:
            if cell.row_span > 1 or cell.col_span > 1:
                for r in range(cell.row, cell.row + cell.row_span):
                    for c in range(cell.col, cell.col + cell.col_span):
                        if (r, c) != (cell.row, cell.col):
                            covered.add((r, c))
        # Generate HTML with proper column filling
        for row_idx in range(self.rows):
            html.append("<tr>")
-            if row_idx in rows_data:
+            for col_idx in range(self.cols):
-                for cell in sorted(rows_data[row_idx], key=lambda c: c.col):
+                # Skip cells covered by row/col spans
                if (row_idx, col_idx) in covered:
                    continue
                cell = cell_map.get((row_idx, col_idx))
                tag = "th" if row_idx == 0 and self.headers else "td"
                if cell:
                    span_attrs = []
                    if cell.row_span > 1:
                        span_attrs.append(f'rowspan="{cell.row_span}"')
                    if cell.col_span > 1:
                        span_attrs.append(f'colspan="{cell.col_span}"')
                    span_str = " ".join(span_attrs)
-                    tag = "th" if row_idx == 0 and self.headers else "td"
+                    content = cell.content if cell.content else ""
-                    html.append(f'<{tag} {span_str}>{cell.content}</{tag}>')
+                    html.append(f'<{tag} {span_str}>{content}</{tag}>')
                else:
                    # Fill in empty cell for missing positions
                    html.append(f'<{tag}></{tag}>')
            html.append("</tr>")
        html.append("</table>")
--- a/backend/app/routers/tasks.py
+++ b/backend/app/routers/tasks.py
@@ -39,6 +39,7 @@ from app.schemas.task import (
 from app.services.task_service import task_service
 from app.services.file_access_service import file_access_service
 from app.services.ocr_service import OCRService
 from app.services.service_pool import get_service_pool, PoolConfig
 # Import dual-track components
 try:
@@ -47,6 +48,13 @@ try:
 except ImportError:
    DUAL_TRACK_AVAILABLE = False
 # Service pool availability
 SERVICE_POOL_AVAILABLE = True
 try:
    from app.services.memory_manager import get_model_manager
 except ImportError:
    SERVICE_POOL_AVAILABLE = False
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/v2/tasks", tags=["Tasks"])
@@ -63,7 +71,10 @@ def process_task_ocr(
    pp_structure_params: Optional[dict] = None
 ):
    """
-    Background task to process OCR for a task with dual-track support
+    Background task to process OCR for a task with dual-track support.
    Uses OCRServicePool to acquire a shared service instance instead of
    creating a new one, preventing GPU memory proliferation.
    Args:
        task_id: Task UUID string
@@ -80,6 +91,7 @@ def process_task_ocr(
    db = SessionLocal()
    start_time = datetime.now()
    pooled_service = None
    try:
        logger.info(f"Starting OCR processing for task {task_id}, file: {filename}")
@@ -91,16 +103,39 @@ def process_task_ocr(
            logger.error(f"Task {task_id} not found in database")
            return
-        # Initialize OCR service
+        # Acquire OCR service from pool (or create new if pool disabled)
        ocr_service = None
        if settings.enable_service_pool and SERVICE_POOL_AVAILABLE:
            try:
                service_pool = get_service_pool()
                pooled_service = service_pool.acquire(
                    device="GPU:0",
                    timeout=settings.service_acquire_timeout_seconds,
                    task_id=task_id
                )
                if pooled_service:
                    ocr_service = pooled_service.service
                    logger.info(f"Acquired OCR service from pool for task {task_id}")
                else:
                    logger.warning(f"Timeout acquiring service from pool, creating new instance")
            except Exception as e:
                logger.warning(f"Failed to acquire service from pool: {e}, creating new instance")
        # Fallback: create new instance if pool acquisition failed
        if ocr_service is None:
            logger.info("Creating new OCRService instance (pool disabled or unavailable)")
            ocr_service = OCRService()
        # Create result directory before OCR processing (needed for saving extracted images)
        result_dir = Path(settings.result_dir) / task_id
        result_dir.mkdir(parents=True, exist_ok=True)
-        # Process the file with OCR (use dual-track if available)
+        # Process the file with OCR
-        if use_dual_track and hasattr(ocr_service, 'process'):
+        # Use dual-track processing if:
-            # Use new dual-track processing
+        # 1. use_dual_track is True (auto-detection)
        # 2. OR force_track is specified (explicit track selection)
        if (use_dual_track or force_track) and hasattr(ocr_service, 'process'):
            # Use new dual-track processing (or forced track)
            ocr_result = ocr_service.process(
                file_path=Path(file_path),
                lang=language,
@@ -111,7 +146,7 @@ def process_task_ocr(
                pp_structure_params=pp_structure_params
            )
        else:
-            # Fall back to traditional processing
+            # Fall back to traditional processing (no force_track support)
            ocr_result = ocr_service.process_image(
                image_path=Path(file_path),
                lang=language,
@@ -131,6 +166,16 @@ def process_task_ocr(
            source_file_path=Path(file_path)
        )
        # Release service back to pool (success case)
        if pooled_service:
            try:
                service_pool = get_service_pool()
                service_pool.release(pooled_service, error=None)
                logger.info(f"Released OCR service back to pool for task {task_id}")
                pooled_service = None  # Prevent double release in finally
            except Exception as e:
                logger.warning(f"Failed to release service to pool: {e}")
        # Close old session and create fresh one to avoid MySQL timeout
        # (long OCR processing may cause connection to become stale)
        db.close()
@@ -158,6 +203,15 @@ def process_task_ocr(
    except Exception as e:
        logger.exception(f"OCR processing failed for task {task_id}")
        # Release service back to pool with error
        if pooled_service:
            try:
                service_pool = get_service_pool()
                service_pool.release(pooled_service, error=e)
                pooled_service = None
            except Exception as release_error:
                logger.warning(f"Failed to release service to pool: {release_error}")
        # Update task status to failed (direct database update)
        try:
            task = db.query(Task).filter(Task.id == task_db_id).first()
@@ -170,6 +224,13 @@ def process_task_ocr(
            logger.error(f"Failed to update task status: {update_error}")
    finally:
        # Ensure service is released in case of any missed release
        if pooled_service:
            try:
                service_pool = get_service_pool()
                service_pool.release(pooled_service, error=None)
            except Exception:
                pass
        db.close()
@@ -330,7 +391,13 @@ async def get_task(
                with open(result_path) as f:
                    result_data = json.load(f)
                    metadata = result_data.get("metadata", {})
-                    processing_track = metadata.get("processing_track")
+                    track_str = metadata.get("processing_track")
                    # Convert string to enum to avoid Pydantic serialization warning
                    if track_str:
                        try:
                            processing_track = ProcessingTrackEnum(track_str)
                        except ValueError:
                            processing_track = None
        except Exception:
            pass  # Silently ignore errors reading the result file
--- a/backend/app/services/direct_extraction_engine.py
+++ b/backend/app/services/direct_extraction_engine.py
@@ -247,9 +247,11 @@ class DirectExtractionEngine:
            element_counter += len(image_elements)
        # Extract vector graphics (charts, diagrams) from drawing commands
        # Pass table_bboxes to filter out table border drawings before clustering
        if self.enable_image_extraction:
            vector_elements = self._extract_vector_graphics(
-                page, page_num, document_id, element_counter, output_dir
+                page, page_num, document_id, element_counter, output_dir,
                table_bboxes=table_bboxes
            )
            elements.extend(vector_elements)
            element_counter += len(vector_elements)
@@ -705,35 +707,47 @@ class DirectExtractionEngine:
                y1=bbox_data[3]
            )
-            # Extract column widths from table cells
+            # Extract column widths from table cells by analyzing X boundaries
            column_widths = []
            if hasattr(table, 'cells') and table.cells:
-                # Group cells by column
+                # Collect all unique X boundaries (both left and right edges)
-                cols_x = {}
+                x_boundaries = set()
                for cell in table.cells:
-                    col_idx = None
+                    x_boundaries.add(round(cell[0], 1))  # x0 (left edge)
-                    # Determine column index by x0 position
+                    x_boundaries.add(round(cell[2], 1))  # x1 (right edge)
                    for idx, x0 in enumerate(sorted(set(c[0] for c in table.cells))):
                        if abs(cell[0] - x0) < 1.0:  # Within 1pt tolerance
                            col_idx = idx
                            break
-                    if col_idx is not None:
+                # Sort boundaries to get column edges
-                        if col_idx not in cols_x:
+                sorted_x = sorted(x_boundaries)
                            cols_x[col_idx] = {'x0': cell[0], 'x1': cell[2]}
                        else:
                            cols_x[col_idx]['x1'] = max(cols_x[col_idx]['x1'], cell[2])
-                # Calculate width for each column
+                # Calculate column widths from adjacent boundaries
-                for col_idx in sorted(cols_x.keys()):
+                if len(sorted_x) >= 2:
-                    width = cols_x[col_idx]['x1'] - cols_x[col_idx]['x0']
+                    column_widths = [sorted_x[i+1] - sorted_x[i] for i in range(len(sorted_x)-1)]
-                    column_widths.append(width)
+                    logger.debug(f"Calculated column widths from {len(sorted_x)} boundaries: {column_widths}")
            # Extract row heights from table cells by analyzing Y boundaries
            row_heights = []
            if hasattr(table, 'cells') and table.cells:
                # Collect all unique Y boundaries (both top and bottom edges)
                y_boundaries = set()
                for cell in table.cells:
                    y_boundaries.add(round(cell[1], 1))  # y0 (top edge)
                    y_boundaries.add(round(cell[3], 1))  # y1 (bottom edge)
                # Sort boundaries to get row edges
                sorted_y = sorted(y_boundaries)
                # Calculate row heights from adjacent boundaries
                if len(sorted_y) >= 2:
                    row_heights = [sorted_y[i+1] - sorted_y[i] for i in range(len(sorted_y)-1)]
                    logger.debug(f"Calculated row heights from {len(sorted_y)} boundaries: {row_heights}")
            # Create table cells
            # Note: Include ALL cells (even empty ones) to preserve table structure
            # This is critical for correct HTML generation and PDF rendering
            cells = []
            for row_idx, row in enumerate(data):
                for col_idx, cell_text in enumerate(row):
-                    if cell_text:
+                    # Always add cell, even if empty, to maintain table structure
                    cells.append(TableCell(
                        row=row_idx,
                        col=col_idx,
@@ -748,8 +762,13 @@ class DirectExtractionEngine:
                headers=data[0] if data else None  # Assume first row is header
            )
-            # Store column widths in metadata
+            # Store column widths and row heights in metadata
-            metadata = {"column_widths": column_widths} if column_widths else None
+            metadata = {}
            if column_widths:
                metadata["column_widths"] = column_widths
            if row_heights:
                metadata["row_heights"] = row_heights
            metadata = metadata if metadata else None
            return DocumentElement(
                element_id=f"table_{page_num}_{counter}",
@@ -978,7 +997,9 @@ class DirectExtractionEngine:
                    image_filename = f"{document_id}_p{page_num}_img{img_idx}.png"
                    image_path = output_dir / image_filename
                    pix.save(str(image_path))
-                    image_data["saved_path"] = str(image_path)
+                    # Store relative filename only (consistent with OCR track)
                    # PDF generator will join with result_dir to get full path
                    image_data["saved_path"] = image_filename
                    logger.debug(f"Saved image to {image_path}")
                element = DocumentElement(
@@ -1001,12 +1022,272 @@ class DirectExtractionEngine:
        return elements
    def has_missing_images(self, page: fitz.Page) -> bool:
        """
        Detect if a page likely has images that weren't extracted.
        This checks for inline image blocks (type=1 in text dict) which indicate
        graphics composed of many small image blocks (like logos) that
        page.get_images() cannot detect.
        Args:
            page: PyMuPDF page object
        Returns:
            True if there are likely missing images that need OCR extraction
        """
        try:
            # Check if get_images found anything
            standard_images = page.get_images()
            if standard_images:
                return False  # Standard images were found, no need for fallback
            # Check for inline image blocks (type=1)
            text_dict = page.get_text("dict", sort=True)
            blocks = text_dict.get("blocks", [])
            image_block_count = sum(1 for b in blocks if b.get("type") == 1)
            # If there are many inline image blocks, likely there's a logo or graphic
            if image_block_count >= 10:
                logger.info(f"Detected {image_block_count} inline image blocks - may need OCR for image extraction")
                return True
            return False
        except Exception as e:
            logger.warning(f"Error checking for missing images: {e}")
            return False
    def check_document_for_missing_images(self, pdf_path: Path) -> List[int]:
        """
        Check a PDF document for pages that likely have missing images.
        This opens the PDF and checks each page for inline image blocks
        that weren't extracted by get_images().
        Args:
            pdf_path: Path to the PDF file
        Returns:
            List of page numbers (1-indexed) that have missing images
        """
        pages_with_missing_images = []
        try:
            doc = fitz.open(str(pdf_path))
            for page_num in range(len(doc)):
                page = doc[page_num]
                if self.has_missing_images(page):
                    pages_with_missing_images.append(page_num + 1)  # 1-indexed
            doc.close()
            if pages_with_missing_images:
                logger.info(f"Document has missing images on pages: {pages_with_missing_images}")
        except Exception as e:
            logger.error(f"Error checking document for missing images: {e}")
        return pages_with_missing_images
    def render_inline_image_regions(
        self,
        pdf_path: Path,
        unified_doc: 'UnifiedDocument',
        pages: List[int],
        output_dir: Optional[Path] = None
    ) -> int:
        """
        Render inline image regions and add them to the unified document.
        This is a fallback when OCR doesn't detect images. It clusters inline
        image blocks (type=1) and renders them as images.
        Args:
            pdf_path: Path to the PDF file
            unified_doc: UnifiedDocument to add images to
            pages: List of page numbers (1-indexed) to process
            output_dir: Directory to save rendered images
        Returns:
            Number of images added
        """
        images_added = 0
        try:
            doc = fitz.open(str(pdf_path))
            for page_num in pages:
                if page_num < 1 or page_num > len(doc):
                    continue
                page = doc[page_num - 1]  # 0-indexed
                page_rect = page.rect
                # Get inline image blocks
                text_dict = page.get_text("dict", sort=True)
                blocks = text_dict.get("blocks", [])
                image_blocks = []
                for block in blocks:
                    if block.get("type") == 1:  # Image block
                        bbox = block.get("bbox")
                        if bbox:
                            image_blocks.append(fitz.Rect(bbox))
                if len(image_blocks) < 5:  # Reduced from 10
                    logger.debug(f"Page {page_num}: Only {len(image_blocks)} inline image blocks, skipping")
                    continue
                logger.info(f"Page {page_num}: Found {len(image_blocks)} inline image blocks")
                # Cluster nearby image blocks
                regions = self._cluster_nearby_rects(image_blocks, tolerance=5.0)
                logger.info(f"Page {page_num}: Clustered into {len(regions)} regions")
                # Find the corresponding page in unified_doc
                target_page = None
                for p in unified_doc.pages:
                    if p.page_number == page_num:
                        target_page = p
                        break
                if not target_page:
                    continue
                for region_idx, region_rect in enumerate(regions):
                    logger.info(f"Page {page_num} region {region_idx}: {region_rect} (w={region_rect.width:.1f}, h={region_rect.height:.1f})")
                    # Skip very small regions
                    if region_rect.width < 30 or region_rect.height < 30:
                        logger.info(f"  -> Skipped: too small (min 30x30)")
                        continue
                    # Skip regions that are primarily in the table area (below top 40%)
                    # But allow regions that START in the top portion
                    page_30_pct = page_rect.height * 0.3
                    page_40_pct = page_rect.height * 0.4
                    if region_rect.y0 > page_40_pct:
                        logger.info(f"  -> Skipped: y0={region_rect.y0:.1f} > 40% of page ({page_40_pct:.1f})")
                        continue
                    logger.info(f"Rendering inline image region {region_idx} on page {page_num}: {region_rect}")
                    try:
                        # Add small padding
                        clip_rect = region_rect + (-2, -2, 2, 2)
                        clip_rect.intersect(page_rect)
                        # Render at 2x resolution
                        mat = fitz.Matrix(2, 2)
                        pix = page.get_pixmap(clip=clip_rect, matrix=mat, alpha=False)
                        # Create bounding box
                        bbox = BoundingBox(
                            x0=clip_rect.x0,
                            y0=clip_rect.y0,
                            x1=clip_rect.x1,
                            y1=clip_rect.y1
                        )
                        image_data = {
                            "width": pix.width,
                            "height": pix.height,
                            "colorspace": "rgb",
                            "type": "inline_region"
                        }
                        # Save image if output directory provided
                        if output_dir:
                            output_dir.mkdir(parents=True, exist_ok=True)
                            doc_id = unified_doc.document_id or "unknown"
                            image_filename = f"{doc_id}_p{page_num}_logo{region_idx}.png"
                            image_path = output_dir / image_filename
                            pix.save(str(image_path))
                            image_data["saved_path"] = image_filename
                            logger.info(f"Saved inline image region to {image_path}")
                        element = DocumentElement(
                            element_id=f"logo_{page_num}_{region_idx}",
                            type=ElementType.LOGO,
                            content=image_data,
                            bbox=bbox,
                            confidence=0.9,
                            metadata={
                                "region_type": "inline_image_blocks",
                                "block_count": len(image_blocks)
                            }
                        )
                        target_page.elements.append(element)
                        images_added += 1
                        pix = None  # Free memory
                    except Exception as e:
                        logger.error(f"Error rendering inline image region {region_idx}: {e}")
            doc.close()
            if images_added > 0:
                current_images = unified_doc.metadata.total_images or 0
                unified_doc.metadata.total_images = current_images + images_added
                logger.info(f"Added {images_added} inline image regions to document")
        except Exception as e:
            logger.error(f"Error rendering inline image regions: {e}")
        return images_added
    def _cluster_nearby_rects(self, rects: List[fitz.Rect], tolerance: float = 5.0) -> List[fitz.Rect]:
        """Cluster nearby rectangles into regions."""
        if not rects:
            return []
        sorted_rects = sorted(rects, key=lambda r: (r.y0, r.x0))
        merged = []
        for rect in sorted_rects:
            merged_with_existing = False
            for i, region in enumerate(merged):
                expanded = region + (-tolerance, -tolerance, tolerance, tolerance)
                if expanded.intersects(rect):
                    merged[i] = region | rect
                    merged_with_existing = True
                    break
            if not merged_with_existing:
                merged.append(rect)
        # Second pass: merge any regions that now overlap
        changed = True
        while changed:
            changed = False
            new_merged = []
            skip = set()
            for i, r1 in enumerate(merged):
                if i in skip:
                    continue
                current = r1
                for j, r2 in enumerate(merged[i+1:], start=i+1):
                    if j in skip:
                        continue
                    expanded = current + (-tolerance, -tolerance, tolerance, tolerance)
                    if expanded.intersects(r2):
                        current = current | r2
                        skip.add(j)
                        changed = True
                new_merged.append(current)
            merged = new_merged
        return merged
    def _extract_vector_graphics(self,
                                 page: fitz.Page,
                                 page_num: int,
                                 document_id: str,
                                 counter: int,
-                                 output_dir: Optional[Path]) -> List[DocumentElement]:
+                                 output_dir: Optional[Path],
                                 table_bboxes: Optional[List[BoundingBox]] = None) -> List[DocumentElement]:
        """
        Extract vector graphics (charts, diagrams) from page.
@@ -1020,6 +1301,7 @@ class DirectExtractionEngine:
            document_id: Unique document identifier
            counter: Starting counter for element IDs
            output_dir: Directory to save rendered graphics
            table_bboxes: List of table bounding boxes to exclude table border drawings
        Returns:
            List of DocumentElement objects representing vector graphics
@@ -1034,16 +1316,25 @@ class DirectExtractionEngine:
            logger.debug(f"Page {page_num} contains {len(drawings)} vector drawing commands")
            # Filter out drawings that are likely table borders
            # Table borders are typically thin rectangular lines within table regions
            non_table_drawings = self._filter_table_border_drawings(drawings, table_bboxes)
            logger.debug(f"After filtering table borders: {len(non_table_drawings)} drawings remain")
            if not non_table_drawings:
                logger.debug("All drawings appear to be table borders, no vector graphics to extract")
                return elements
            # Cluster drawings into groups (charts, diagrams, etc.)
            try:
-                # PyMuPDF's cluster_drawings() groups nearby drawings automatically
+                # Use custom clustering that only considers non-table drawings
-                drawing_clusters = page.cluster_drawings()
+                drawing_clusters = self._cluster_non_table_drawings(page, non_table_drawings)
                logger.debug(f"Clustered into {len(drawing_clusters)} groups")
            except (AttributeError, TypeError) as e:
                # cluster_drawings not available or has different signature
                # Fallback: try to identify charts by analyzing drawing density
-                logger.warning(f"cluster_drawings() failed ({e}), using fallback method")
+                logger.warning(f"Custom clustering failed ({e}), using fallback method")
-                drawing_clusters = self._cluster_drawings_fallback(page, drawings)
+                drawing_clusters = self._cluster_drawings_fallback(page, non_table_drawings)
            for cluster_idx, bbox in enumerate(drawing_clusters):
                # Ignore small regions (likely noise or separator lines)
@@ -1148,6 +1439,124 @@ class DirectExtractionEngine:
        return filtered_clusters
    def _filter_table_border_drawings(self, drawings: list, table_bboxes: Optional[List[BoundingBox]]) -> list:
        """
        Filter out drawings that are likely table borders.
        Table borders are typically:
        - Thin rectangular lines (height or width < 5pt)
        - Located within or on the edge of table bounding boxes
        Args:
            drawings: List of PyMuPDF drawing objects
            table_bboxes: List of table bounding boxes
        Returns:
            List of drawings that are NOT table borders (likely logos, charts, etc.)
        """
        if not table_bboxes:
            return drawings
        non_table_drawings = []
        table_border_count = 0
        for drawing in drawings:
            rect = drawing.get('rect')
            if not rect:
                continue
            draw_rect = fitz.Rect(rect)
            # Check if this drawing is a thin line (potential table border)
            is_thin_line = draw_rect.width < 5 or draw_rect.height < 5
            # Check if drawing overlaps significantly with any table
            overlaps_table = False
            for table_bbox in table_bboxes:
                table_rect = fitz.Rect(table_bbox.x0, table_bbox.y0, table_bbox.x1, table_bbox.y1)
                # Expand table rect slightly to include border lines on edges
                expanded_table = table_rect + (-5, -5, 5, 5)
                if expanded_table.contains(draw_rect) or expanded_table.intersects(draw_rect):
                    # Calculate overlap ratio
                    intersection = draw_rect & expanded_table
                    if not intersection.is_empty:
                        overlap_ratio = intersection.get_area() / draw_rect.get_area() if draw_rect.get_area() > 0 else 0
                        # If drawing is mostly inside table region, it's likely a border
                        if overlap_ratio > 0.8:
                            overlaps_table = True
                            break
            # Keep drawing if it's NOT (thin line AND overlapping table)
            # This keeps: logos (complex shapes), charts outside tables, etc.
            if is_thin_line and overlaps_table:
                table_border_count += 1
            else:
                non_table_drawings.append(drawing)
        if table_border_count > 0:
            logger.debug(f"Filtered out {table_border_count} table border drawings")
        return non_table_drawings
    def _cluster_non_table_drawings(self, page: fitz.Page, drawings: list) -> list:
        """
        Cluster non-table drawings into groups.
        This method clusters drawings that have been pre-filtered to exclude table borders.
        It uses a more conservative clustering approach suitable for logos and charts.
        Args:
            page: PyMuPDF page object
            drawings: Pre-filtered list of drawings (excluding table borders)
        Returns:
            List of fitz.Rect representing clustered drawing regions
        """
        if not drawings:
            return []
        # Collect all drawing bounding boxes
        bboxes = []
        for drawing in drawings:
            rect = drawing.get('rect')
            if rect:
                bboxes.append(fitz.Rect(rect))
        if not bboxes:
            return []
        # More conservative clustering with smaller tolerance
        # This prevents grouping distant graphics together
        clusters = []
        tolerance = 10  # Smaller tolerance than fallback (was 20)
        for bbox in bboxes:
            # Try to merge with existing cluster
            merged = False
            for i, cluster in enumerate(clusters):
                # Check if bbox is close to this cluster
                expanded_cluster = cluster + (-tolerance, -tolerance, tolerance, tolerance)
                if expanded_cluster.intersects(bbox):
                    # Merge bbox into cluster
                    clusters[i] = cluster | bbox  # Union of rectangles
                    merged = True
                    break
            if not merged:
                # Create new cluster
                clusters.append(bbox)
        # Filter out very small clusters (noise)
        # Keep minimum 30x30 for logos (smaller than default 50x50)
        filtered_clusters = [c for c in clusters if c.width >= 30 and c.height >= 30]
        logger.debug(f"Non-table clustering: {len(bboxes)} drawings -> {len(clusters)} clusters -> {len(filtered_clusters)} filtered")
        return filtered_clusters
    def _deduplicate_table_chart_overlap(self, elements: List[DocumentElement]) -> List[DocumentElement]:
        """
        Intelligently resolve TABLE-CHART overlaps based on table structure completeness.
--- a/backend/app/services/memory_manager.py
+++ b/backend/app/services/memory_manager.py
--- a/backend/app/services/ocr_service.py
+++ b/backend/app/services/ocr_service.py
@@ -25,6 +25,7 @@ except ImportError:
 from app.core.config import settings
 from app.services.office_converter import OfficeConverter, OfficeConverterError
 from app.services.memory_manager import get_model_manager, MemoryConfig, MemoryGuard, prediction_context
 # Import dual-track components
 try:
@@ -96,6 +97,26 @@ class OCRService:
        self._model_last_used = {}  # Track last usage time for each model
        self._memory_warning_logged = False
        # Initialize MemoryGuard for enhanced memory monitoring
        self._memory_guard = None
        if settings.enable_model_lifecycle_management:
            try:
                memory_config = MemoryConfig(
                    warning_threshold=settings.memory_warning_threshold,
                    critical_threshold=settings.memory_critical_threshold,
                    emergency_threshold=settings.memory_emergency_threshold,
                    model_idle_timeout_seconds=settings.pp_structure_idle_timeout_seconds,
                    gpu_memory_limit_mb=settings.gpu_memory_limit_mb,
                    enable_cpu_fallback=settings.enable_cpu_fallback,
                )
                self._memory_guard = MemoryGuard(memory_config)
                logger.debug("MemoryGuard initialized for OCRService")
            except Exception as e:
                logger.warning(f"Failed to initialize MemoryGuard: {e}")
        # Track if CPU fallback was activated
        self._cpu_fallback_active = False
        self._detect_and_configure_gpu()
        # Log GPU optimization settings
@@ -217,12 +238,37 @@ class OCRService:
    def _check_gpu_memory_usage(self):
        """
        Check GPU memory usage and log warnings if approaching limits.
-        Implements memory optimization for RTX 4060 8GB.
+        Uses MemoryGuard for enhanced monitoring with multiple backends.
        """
        if not self.use_gpu or not settings.enable_memory_optimization:
            return
        try:
            # Use MemoryGuard if available for better monitoring
            if self._memory_guard:
                stats = self._memory_guard.get_memory_stats()
                # Log based on usage ratio
                if stats.gpu_used_ratio > 0.90 and not self._memory_warning_logged:
                    logger.warning(
                        f"GPU memory usage critical: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
                        f"({stats.gpu_used_ratio*100:.1f}%)"
                    )
                    logger.warning("Consider enabling auto_unload_unused_models or reducing batch size")
                    self._memory_warning_logged = True
                    # Trigger emergency cleanup if enabled
                    if settings.enable_emergency_cleanup:
                        self._cleanup_unused_models()
                        self._memory_guard.clear_gpu_cache()
                elif stats.gpu_used_ratio > 0.75:
                    logger.info(
                        f"GPU memory: {stats.gpu_used_mb:.0f}MB / {stats.gpu_total_mb:.0f}MB "
                        f"({stats.gpu_used_ratio*100:.1f}%)"
                    )
            else:
                # Fallback to original implementation
                device_id = self.gpu_info.get('device_id', 0)
                memory_allocated = paddle.device.cuda.memory_allocated(device_id)
                memory_allocated_mb = memory_allocated / (1024**2)
@@ -242,28 +288,41 @@ class OCRService:
    def _cleanup_unused_models(self):
        """
-        Clean up unused language models to free GPU memory.
+        Clean up unused models (including PP-StructureV3) to free GPU memory.
        Models idle longer than model_idle_timeout_seconds will be unloaded.
        Note: PP-StructureV3 is NO LONGER exempted from cleanup - it will be
        unloaded based on pp_structure_idle_timeout_seconds configuration.
        """
        if not settings.auto_unload_unused_models:
            return
        current_time = datetime.now()
        timeout = settings.model_idle_timeout_seconds
        models_to_remove = []
        for lang, last_used in self._model_last_used.items():
-            if lang == 'structure':  # Don't unload structure engine
+            # Use different timeout for structure engine vs language models
-                continue
+            if lang == 'structure':
                timeout = settings.pp_structure_idle_timeout_seconds
            else:
                timeout = settings.model_idle_timeout_seconds
            idle_seconds = (current_time - last_used).total_seconds()
            if idle_seconds > timeout:
                models_to_remove.append(lang)
-        for lang in models_to_remove:
+        for model_key in models_to_remove:
-            if lang in self.ocr_engines:
+            if model_key == 'structure':
-                logger.info(f"Unloading idle OCR engine for {lang} (idle {timeout}s)")
+                if self.structure_engine is not None:
-                del self.ocr_engines[lang]
+                    logger.info(f"Unloading idle PP-StructureV3 engine (idle {settings.pp_structure_idle_timeout_seconds}s)")
-                del self._model_last_used[lang]
+                    self._unload_structure_engine()
                    if model_key in self._model_last_used:
                        del self._model_last_used[model_key]
            elif model_key in self.ocr_engines:
                logger.info(f"Unloading idle OCR engine for {model_key} (idle {settings.model_idle_timeout_seconds}s)")
                del self.ocr_engines[model_key]
                if model_key in self._model_last_used:
                    del self._model_last_used[model_key]
        if models_to_remove and self.use_gpu:
            # Clear CUDA cache
@@ -273,6 +332,41 @@ class OCRService:
            except Exception as e:
                logger.debug(f"Cache clear failed: {e}")
    def _unload_structure_engine(self):
        """
        Properly unload PP-StructureV3 engine and free GPU memory.
        """
        if self.structure_engine is None:
            return
        try:
            # Clear internal engine components
            if hasattr(self.structure_engine, 'table_engine'):
                self.structure_engine.table_engine = None
            if hasattr(self.structure_engine, 'text_detector'):
                self.structure_engine.text_detector = None
            if hasattr(self.structure_engine, 'text_recognizer'):
                self.structure_engine.text_recognizer = None
            if hasattr(self.structure_engine, 'layout_predictor'):
                self.structure_engine.layout_predictor = None
            # Delete the engine
            del self.structure_engine
            self.structure_engine = None
            # Force garbage collection
            gc.collect()
            # Clear GPU cache
            if self.use_gpu:
                paddle.device.cuda.empty_cache()
            logger.info("PP-StructureV3 engine unloaded successfully")
        except Exception as e:
            logger.warning(f"Error unloading PP-StructureV3: {e}")
            self.structure_engine = None
    def clear_gpu_cache(self):
        """
        Manually clear GPU memory cache.
@@ -519,46 +613,160 @@ class OCRService:
            logger.warning(f"GPU memory cleanup failed (non-critical): {e}")
            # Don't fail the processing if cleanup fails
-    def check_gpu_memory(self, required_mb: int = 2000) -> bool:
+    def check_gpu_memory(self, required_mb: int = 2000, enable_fallback: bool = True) -> bool:
        """
-        Check if sufficient GPU memory is available.
+        Check if sufficient GPU memory is available using MemoryGuard.
        This method now uses MemoryGuard for accurate memory queries across
        multiple backends (pynvml, torch, paddle) instead of returning True
        blindly for PaddlePaddle-only environments.
        Args:
            required_mb: Required memory in MB (default 2000MB for OCR models)
            enable_fallback: If True and CPU fallback is enabled, switch to CPU mode
                           when memory is insufficient instead of returning False
        Returns:
-            True if sufficient memory is available or GPU is not used
+            True if sufficient memory is available, GPU is not used, or CPU fallback activated
        """
        # If not using GPU, always return True
        if not self.use_gpu:
            return True
        try:
-            # Check GPU memory using torch if available, otherwise use PaddlePaddle
+            # Use MemoryGuard if available for accurate multi-backend memory queries
            if self._memory_guard:
                is_available, stats = self._memory_guard.check_memory(
                    required_mb=required_mb,
                    device_id=self.gpu_info.get('device_id', 0)
                )
                if not is_available:
                    logger.warning(
                        f"GPU memory check failed: {stats.gpu_free_mb:.0f}MB free, "
                        f"{required_mb}MB required ({stats.gpu_used_ratio*100:.1f}% used)"
                    )
                    # Try to free memory
                    logger.info("Attempting memory cleanup before retry...")
                    self._cleanup_unused_models()
                    self._memory_guard.clear_gpu_cache()
                    # Check again
                    is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
                    if not is_available:
                        # Memory still insufficient after cleanup
                        if enable_fallback and settings.enable_cpu_fallback:
                            logger.warning(
                                f"Insufficient GPU memory ({stats.gpu_free_mb:.0f}MB) after cleanup. "
                                f"Activating CPU fallback mode."
                            )
                            self._activate_cpu_fallback()
                            return True  # Continue with CPU
                        else:
                            logger.error(
                                f"Insufficient GPU memory: {stats.gpu_free_mb:.0f}MB available, "
                                f"{required_mb}MB required"
                            )
                            return False
                logger.debug(
                    f"GPU memory check passed: {stats.gpu_free_mb:.0f}MB free "
                    f"({stats.gpu_used_ratio*100:.1f}% used)"
                )
                return True
            else:
                # Fallback to original implementation
                free_memory = None
                if TORCH_AVAILABLE and torch.cuda.is_available():
                    free_memory = torch.cuda.mem_get_info()[0] / 1024**2
                elif paddle.device.is_compiled_with_cuda():
                    # PaddlePaddle doesn't have direct API to get free memory,
-                # so we rely on cleanup and continue
+                    # use allocated memory to estimate
-                logger.debug("Using PaddlePaddle GPU, memory info not directly available")
+                    device_id = self.gpu_info.get('device_id', 0)
-                return True
+                    allocated = paddle.device.cuda.memory_allocated(device_id) / (1024**2)
                    total = settings.gpu_memory_limit_mb
                    free_memory = max(0, total - allocated)
                    logger.debug(f"Estimated free GPU memory: {free_memory:.0f}MB (total: {total}MB, allocated: {allocated:.0f}MB)")
                if free_memory is not None:
                    if free_memory < required_mb:
                        logger.warning(f"Low GPU memory: {free_memory:.0f}MB available, {required_mb}MB required")
                    # Try to free memory
                        self.cleanup_gpu_memory()
-                    # Check again
+
                        # Recheck
                        if TORCH_AVAILABLE and torch.cuda.is_available():
                            free_memory = torch.cuda.mem_get_info()[0] / 1024**2
                        else:
                            allocated = paddle.device.cuda.memory_allocated(device_id) / (1024**2)
                            free_memory = max(0, total - allocated)
                        if free_memory < required_mb:
                            if enable_fallback and settings.enable_cpu_fallback:
                                logger.warning(f"Insufficient GPU memory after cleanup. Activating CPU fallback.")
                                self._activate_cpu_fallback()
                                return True
                            else:
                                logger.error(f"Insufficient GPU memory after cleanup: {free_memory:.0f}MB")
                                return False
                    logger.debug(f"GPU memory check passed: {free_memory:.0f}MB available")
                return True
        except Exception as e:
            logger.warning(f"GPU memory check failed: {e}")
            return True  # Continue processing even if check fails
    def _activate_cpu_fallback(self):
        """
        Activate CPU fallback mode when GPU memory is insufficient.
        This disables GPU usage for the current service instance.
        """
        if self._cpu_fallback_active:
            return  # Already in CPU mode
        logger.warning("=== CPU FALLBACK MODE ACTIVATED ===")
        logger.warning("GPU memory insufficient, switching to CPU processing")
        logger.warning("Performance will be significantly reduced")
        self._cpu_fallback_active = True
        self.use_gpu = False
        # Update GPU info to reflect fallback
        self.gpu_info['cpu_fallback'] = True
        self.gpu_info['fallback_reason'] = 'GPU memory insufficient'
        # Clear GPU cache to free memory
        if self._memory_guard:
            self._memory_guard.clear_gpu_cache()
    def _restore_gpu_mode(self):
        """
        Attempt to restore GPU mode after CPU fallback.
        Called when memory pressure has been relieved.
        """
        if not self._cpu_fallback_active:
            return
        if not self.gpu_available:
            return
        # Check if GPU memory is now available
        if self._memory_guard:
            is_available, stats = self._memory_guard.check_memory(
                required_mb=settings.structure_model_memory_mb
            )
            if is_available:
                logger.info("GPU memory available, restoring GPU mode")
                self._cpu_fallback_active = False
                self.use_gpu = True
                self.gpu_info.pop('cpu_fallback', None)
                self.gpu_info.pop('fallback_reason', None)
    def convert_pdf_to_images(self, pdf_path: Path, output_dir: Path) -> List[Path]:
        """
        Convert PDF to images (one per page)
@@ -626,6 +834,24 @@ class OCRService:
        threshold = confidence_threshold if confidence_threshold is not None else self.confidence_threshold
        try:
            # Pre-operation memory check: Try to restore GPU if in fallback and memory available
            if self._cpu_fallback_active:
                self._restore_gpu_mode()
                if not self._cpu_fallback_active:
                    logger.info("GPU mode restored for processing")
            # Initial memory check before starting any heavy processing
            # Estimate memory requirement based on image type
            estimated_memory_mb = 2500  # Conservative estimate for full OCR + layout
            if detect_layout:
                estimated_memory_mb += 500  # Additional for PP-StructureV3
            if not self.check_gpu_memory(required_mb=estimated_memory_mb, enable_fallback=True):
                logger.warning(
                    f"Pre-operation memory check failed ({estimated_memory_mb}MB required). "
                    f"Processing will attempt to proceed but may encounter issues."
                )
            # Check if file is Office document
            if self.office_converter.is_office_document(image_path):
                logger.info(f"Detected Office document: {image_path.name}, converting to PDF")
@@ -748,9 +974,12 @@ class OCRService:
            # Get OCR engine (for non-PDF images)
            ocr_engine = self.get_ocr_engine(lang)
-            # Check GPU memory before OCR processing
+            # Secondary memory check before OCR processing
-            if not self.check_gpu_memory(required_mb=1500):
+            if not self.check_gpu_memory(required_mb=1500, enable_fallback=True):
-                logger.warning("Insufficient GPU memory for OCR, attempting to proceed anyway")
+                logger.warning(
                    f"OCR memory check: insufficient GPU memory (1500MB required). "
                    f"Mode: {'CPU fallback' if self._cpu_fallback_active else 'GPU (low memory)'}"
                )
            # Get the actual image dimensions that OCR will use
            from PIL import Image
@@ -950,6 +1179,18 @@ class OCRService:
            Tuple of (layout_data, images_metadata)
        """
        try:
            # Pre-operation memory check for layout analysis
            if self._cpu_fallback_active:
                self._restore_gpu_mode()
                if not self._cpu_fallback_active:
                    logger.info("GPU mode restored for layout analysis")
            if not self.check_gpu_memory(required_mb=2000, enable_fallback=True):
                logger.warning(
                    f"Layout analysis pre-check: insufficient GPU memory (2000MB required). "
                    f"Mode: {'CPU fallback' if self._cpu_fallback_active else 'GPU'}"
                )
            structure_engine = self._ensure_structure_engine(pp_structure_params)
            # Try enhanced processing first
@@ -998,9 +1239,19 @@ class OCRService:
            # Standard processing (original implementation)
            logger.info(f"Running standard layout analysis on {image_path.name}")
-            # Check GPU memory before processing
+            # Memory check before PP-StructureV3 processing
-            if not self.check_gpu_memory(required_mb=2000):
+            if not self.check_gpu_memory(required_mb=2000, enable_fallback=True):
-                logger.warning("Insufficient GPU memory for PP-StructureV3, attempting to proceed anyway")
+                logger.warning(
                    f"PP-StructureV3 memory check: insufficient GPU memory (2000MB required). "
                    f"Mode: {'CPU fallback' if self._cpu_fallback_active else 'GPU (low memory)'}"
                )
            # Use prediction semaphore to control concurrent predictions
            # This prevents OOM errors from multiple simultaneous PP-StructureV3.predict() calls
            with prediction_context(timeout=settings.service_acquire_timeout_seconds) as acquired:
                if not acquired:
                    logger.error("Failed to acquire prediction slot (timeout), returning empty layout")
                    return None, []
                results = structure_engine.predict(str(image_path))
@@ -1254,6 +1505,46 @@ class OCRService:
                    if temp_pdf_path:
                        unified_doc.metadata.original_filename = file_path.name
                    # HYBRID MODE: Check if Direct track missed images (e.g., inline image blocks)
                    # If so, use OCR to extract images and merge them into the Direct result
                    pages_with_missing_images = self.direct_extraction_engine.check_document_for_missing_images(
                        actual_file_path
                    )
                    if pages_with_missing_images:
                        logger.info(f"Hybrid mode: Direct track missing images on pages {pages_with_missing_images}, using OCR to extract images")
                        try:
                            # Run OCR on the file to extract images
                            ocr_result = self.process_file_traditional(
                                actual_file_path, lang, detect_layout=True,
                                confidence_threshold=confidence_threshold,
                                output_dir=output_dir, pp_structure_params=pp_structure_params
                            )
                            # Convert OCR result to extract images
                            ocr_unified = self.ocr_to_unified_converter.convert(
                                ocr_result, actual_file_path, 0.0, lang
                            )
                            # Merge OCR-extracted images into Direct track result
                            images_added = self._merge_ocr_images_into_direct(
                                unified_doc, ocr_unified, pages_with_missing_images
                            )
                            if images_added > 0:
                                logger.info(f"Hybrid mode: Added {images_added} images from OCR to Direct track result")
                                unified_doc.metadata.processing_track = ProcessingTrack.HYBRID
                            else:
                                # Fallback: OCR didn't find images either, render inline image blocks directly
                                logger.info("Hybrid mode: OCR didn't find images, falling back to inline image rendering")
                                images_added = self.direct_extraction_engine.render_inline_image_regions(
                                    actual_file_path, unified_doc, pages_with_missing_images, output_dir
                                )
                                if images_added > 0:
                                    logger.info(f"Hybrid mode: Rendered {images_added} inline image regions")
                                    unified_doc.metadata.processing_track = ProcessingTrack.HYBRID
                        except Exception as e:
                            logger.warning(f"Hybrid mode image extraction failed: {e}")
                            # Continue with Direct track result without images
            # Use OCR track (either by recommendation or fallback)
            if recommendation.track == "ocr":
                # Use OCR for scanned documents, images, etc.
@@ -1269,7 +1560,8 @@ class OCRService:
                )
                unified_doc.document_id = document_id
-            # Update processing track metadata
+            # Update processing track metadata (only if not already set to HYBRID)
            if unified_doc.metadata.processing_track != ProcessingTrack.HYBRID:
                unified_doc.metadata.processing_track = (
                    ProcessingTrack.DIRECT if recommendation.track == "direct"
                    else ProcessingTrack.OCR
@@ -1279,7 +1571,8 @@ class OCRService:
            processing_time = (datetime.now() - start_time).total_seconds()
            unified_doc.metadata.processing_time = processing_time
-            logger.info(f"Document processing completed in {processing_time:.2f}s using {recommendation.track} track")
+            actual_track = unified_doc.metadata.processing_track.value
            logger.info(f"Document processing completed in {processing_time:.2f}s using {actual_track} track")
            return unified_doc
@@ -1290,6 +1583,75 @@ class OCRService:
                file_path, lang, detect_layout, confidence_threshold, output_dir, pp_structure_params
            )
    def _merge_ocr_images_into_direct(
        self,
        direct_doc: 'UnifiedDocument',
        ocr_doc: 'UnifiedDocument',
        pages_with_missing_images: List[int]
    ) -> int:
        """
        Merge OCR-extracted images into Direct track result.
        This is used in hybrid mode when Direct track couldn't extract certain
        images (like logos composed of inline image blocks).
        Args:
            direct_doc: UnifiedDocument from Direct track
            ocr_doc: UnifiedDocument from OCR track
            pages_with_missing_images: List of page numbers (1-indexed) that need images
        Returns:
            Number of images added
        """
        images_added = 0
        try:
            # Get image element types to look for
            image_types = {ElementType.FIGURE, ElementType.IMAGE, ElementType.LOGO}
            for page_num in pages_with_missing_images:
                # Find the target page in direct_doc
                direct_page = None
                for page in direct_doc.pages:
                    if page.page_number == page_num:
                        direct_page = page
                        break
                if not direct_page:
                    continue
                # Find the source page in ocr_doc
                ocr_page = None
                for page in ocr_doc.pages:
                    if page.page_number == page_num:
                        ocr_page = page
                        break
                if not ocr_page:
                    continue
                # Extract image elements from OCR page
                for element in ocr_page.elements:
                    if element.type in image_types:
                        # Assign new element ID to avoid conflicts
                        new_element_id = f"hybrid_{element.element_id}"
                        element.element_id = new_element_id
                        # Add to direct page
                        direct_page.elements.append(element)
                        images_added += 1
                        logger.debug(f"Added image element {new_element_id} to page {page_num}")
            # Update image count in direct_doc metadata
            if images_added > 0:
                current_images = direct_doc.metadata.total_images or 0
                direct_doc.metadata.total_images = current_images + images_added
        except Exception as e:
            logger.error(f"Error merging OCR images into Direct track: {e}")
        return images_added
    def process_file_traditional(
        self,
        file_path: Path,
@@ -1441,13 +1803,16 @@ class OCRService:
            UnifiedDocument if dual-track is enabled and use_dual_track=True,
            Dict with legacy format otherwise
        """
-        if use_dual_track and self.dual_track_enabled:
+        # Use dual-track processing if:
-            # Use dual-track processing
+        # 1. use_dual_track is True (auto-detection), OR
        # 2. force_track is specified (explicit track selection)
        if (use_dual_track or force_track) and self.dual_track_enabled:
            # Use dual-track processing (or forced track)
            return self.process_with_dual_track(
                file_path, lang, detect_layout, confidence_threshold, output_dir, force_track, pp_structure_params
            )
        else:
-            # Use traditional OCR processing
+            # Use traditional OCR processing (no force_track support)
            return self.process_file_traditional(
                file_path, lang, detect_layout, confidence_threshold, output_dir, pp_structure_params
            )
--- a/backend/app/services/pdf_generator_service.py
+++ b/backend/app/services/pdf_generator_service.py
@@ -572,8 +572,10 @@ class PDFGeneratorService:
                    processing_track = unified_doc.metadata.get('processing_track')
            # Route to track-specific rendering method
-            is_direct_track = (processing_track == 'direct' or
+            # ProcessingTrack is (str, Enum), so comparing with enum value works for both string and enum
-                              processing_track == ProcessingTrack.DIRECT)
+            # HYBRID track uses Direct track rendering (Direct text/tables + OCR images)
            is_direct_track = (processing_track == ProcessingTrack.DIRECT or
                               processing_track == ProcessingTrack.HYBRID)
            logger.info(f"Processing track: {processing_track}, using {'Direct' if is_direct_track else 'OCR'} track rendering")
@@ -675,8 +677,11 @@ class PDFGeneratorService:
            logger.info("=== Direct Track PDF Generation ===")
            logger.info(f"Total pages: {len(unified_doc.pages)}")
-            # Set current track for helper methods
+            # Set current track for helper methods (may be DIRECT or HYBRID)
-            self.current_processing_track = 'direct'
+            if hasattr(unified_doc, 'metadata') and unified_doc.metadata:
                self.current_processing_track = unified_doc.metadata.processing_track
            else:
                self.current_processing_track = ProcessingTrack.DIRECT
            # Get page dimensions from first page (for canvas initialization)
            if not unified_doc.pages:
@@ -1074,6 +1079,11 @@ class PDFGeneratorService:
        # *** 優先級 1: 檢查 ocr_dimensions (UnifiedDocument 轉換來的) ***
        if 'ocr_dimensions' in ocr_data:
            dims = ocr_data['ocr_dimensions']
            # Handle both dict format {'width': w, 'height': h} and
            # list format [{'page': 1, 'width': w, 'height': h}, ...]
            if isinstance(dims, list) and len(dims) > 0:
                dims = dims[0]  # Use first page dimensions
            if isinstance(dims, dict):
                w = float(dims.get('width', 0))
                h = float(dims.get('height', 0))
                if w > 0 and h > 0:
@@ -1418,8 +1428,8 @@ class PDFGeneratorService:
            # Set font with track-specific styling
            # Note: OCR track has no StyleInfo (extracted from images), so no advanced formatting
            style_info = region.get('style')
-            is_direct_track = (self.current_processing_track == 'direct' or
+            is_direct_track = (self.current_processing_track == ProcessingTrack.DIRECT or
-                              self.current_processing_track == ProcessingTrack.DIRECT)
+                               self.current_processing_track == ProcessingTrack.HYBRID)
            if style_info and is_direct_track:
                # Direct track: Apply rich styling from StyleInfo
@@ -1661,10 +1671,15 @@ class PDFGeneratorService:
                return
            # Construct full path to image
            # saved_path is relative to result_dir (e.g., "imgs/element_id.png")
            image_path = result_dir / image_path_str
            # Fallback for legacy data
            if not image_path.exists():
-                logger.warning(f"Image not found: {image_path}")
+                image_path = result_dir / Path(image_path_str).name
            if not image_path.exists():
                logger.warning(f"Image not found: {image_path_str} (in {result_dir})")
                return
            # Get bbox for positioning
@@ -2289,12 +2304,30 @@ class PDFGeneratorService:
                col_widths = element.metadata['column_widths']
                logger.debug(f"Using extracted column widths: {col_widths}")
-            # Create table without rowHeights (will use canvas scaling instead)
+            # Use original row heights from extraction if available
-            t = Table(table_content, colWidths=col_widths)
+            # Row heights must match the number of data rows exactly
            row_heights_list = None
            if element.metadata and 'row_heights' in element.metadata:
                extracted_row_heights = element.metadata['row_heights']
                num_data_rows = len(table_content)
                num_height_rows = len(extracted_row_heights)
                if num_height_rows == num_data_rows:
                    row_heights_list = extracted_row_heights
                    logger.debug(f"Using extracted row heights ({num_height_rows} rows): {row_heights_list}")
                else:
                    # Row counts don't match - this can happen with merged cells or empty rows
                    logger.warning(f"Row height mismatch: {num_height_rows} heights for {num_data_rows} data rows, falling back to auto-sizing")
            # Create table with both column widths and row heights for accurate sizing
            t = Table(table_content, colWidths=col_widths, rowHeights=row_heights_list)
            # Apply style with minimal padding to reduce table extension
            # Use Chinese font to support special characters (℃, μm, ≦, ×, Ω, etc.)
            font_for_table = self.font_name if self.font_registered else 'Helvetica'
            style = TableStyle([
                ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
                ('FONTNAME', (0, 0), (-1, -1), font_for_table),
                ('FONTSIZE', (0, 0), (-1, -1), 8),
                ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
@@ -2307,8 +2340,8 @@ class PDFGeneratorService:
            ])
            t.setStyle(style)
-            # CRITICAL: Use canvas scaling to fit table within bbox
+            # Use canvas scaling as fallback to fit table within bbox
-            # This is more reliable than rowHeights which doesn't always work
+            # With proper row heights, scaling should be minimal (close to 1.0)
            # Step 1: Wrap to get actual rendered size
            actual_width, actual_height = t.wrapOn(pdf_canvas, table_width * 10, table_height * 10)
@@ -2358,11 +2391,16 @@ class PDFGeneratorService:
                logger.warning(f"No image path for element {element.element_id}")
                return
-            # Construct full path
+            # Construct full path to image
            # saved_path is relative to result_dir (e.g., "document_id_p1_img0.png")
            image_path = result_dir / image_path_str
            # Fallback for legacy data
            if not image_path.exists():
-                logger.warning(f"Image not found: {image_path}")
+                image_path = result_dir / Path(image_path_str).name
            if not image_path.exists():
                logger.warning(f"Image not found: {image_path_str} (in {result_dir})")
                return
            # Get bbox
@@ -2388,7 +2426,7 @@ class PDFGeneratorService:
                preserveAspectRatio=True
            )
-            logger.debug(f"Drew image: {image_path_str}")
+            logger.debug(f"Drew image: {image_path} (from: {original_path_str})")
        except Exception as e:
            logger.error(f"Failed to draw image element {element.element_id}: {e}")
--- a/backend/app/services/pp_structure_enhanced.py
+++ b/backend/app/services/pp_structure_enhanced.py
@@ -21,6 +21,8 @@ except ImportError:
 import paddle
 from paddleocr import PPStructureV3
 from app.models.unified_document import ElementType
 from app.core.config import settings
 from app.services.memory_manager import prediction_context
 logger = logging.getLogger(__name__)
@@ -96,7 +98,21 @@ class PPStructureEnhanced:
        try:
            logger.info(f"Enhanced PP-StructureV3 analysis on {image_path.name}")
-            # Perform structure analysis
+            # Perform structure analysis with semaphore control
            # This prevents OOM errors from multiple simultaneous predictions
            with prediction_context(timeout=settings.service_acquire_timeout_seconds) as acquired:
                if not acquired:
                    logger.error("Failed to acquire prediction slot (timeout), returning empty result")
                    return {
                        'has_parsing_res_list': False,
                        'elements': [],
                        'total_elements': 0,
                        'images': [],
                        'tables': [],
                        'element_types': {},
                        'error': 'Prediction slot timeout'
                    }
                results = self.structure_engine.predict(str(image_path))
            all_elements = []
--- a/backend/app/services/service_pool.py
+++ b/backend/app/services/service_pool.py
@@ -0,0 +1,468 @@
 """
 Tool_OCR - OCR Service Pool
 Manages a pool of OCRService instances to prevent duplicate model loading
 and control concurrent GPU operations.
 """
 import asyncio
 import logging
 import threading
 import time
 from contextlib import contextmanager
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any, Dict, List, Optional, TYPE_CHECKING
 from app.services.memory_manager import get_model_manager, MemoryConfig
 if TYPE_CHECKING:
    from app.services.ocr_service import OCRService
 logger = logging.getLogger(__name__)
 class ServiceState(Enum):
    """State of a pooled service"""
    AVAILABLE = "available"
    IN_USE = "in_use"
    UNHEALTHY = "unhealthy"
    INITIALIZING = "initializing"
@dataclass
 class PooledService:
    """Wrapper for a pooled OCRService instance"""
    service: Any  # OCRService
    device: str
    state: ServiceState = ServiceState.AVAILABLE
    created_at: float = field(default_factory=time.time)
    last_used: float = field(default_factory=time.time)
    use_count: int = 0
    error_count: int = 0
    current_task_id: Optional[str] = None
 class PoolConfig:
    """Configuration for the service pool"""
    def __init__(
        self,
        max_services_per_device: int = 1,
        max_total_services: int = 2,
        acquire_timeout_seconds: float = 300.0,
        max_queue_size: int = 50,
        health_check_interval_seconds: int = 60,
        max_consecutive_errors: int = 3,
        service_idle_timeout_seconds: int = 600,
        enable_auto_scaling: bool = False,
    ):
        self.max_services_per_device = max_services_per_device
        self.max_total_services = max_total_services
        self.acquire_timeout_seconds = acquire_timeout_seconds
        self.max_queue_size = max_queue_size
        self.health_check_interval_seconds = health_check_interval_seconds
        self.max_consecutive_errors = max_consecutive_errors
        self.service_idle_timeout_seconds = service_idle_timeout_seconds
        self.enable_auto_scaling = enable_auto_scaling
 class OCRServicePool:
    """
    Pool of OCRService instances with concurrency control.
    Features:
    - Per-device instance management (one service per GPU)
    - Queue-based task distribution
    - Semaphore-based concurrency limits
    - Health monitoring
    - Automatic service recovery
    """
    _instance = None
    _lock = threading.Lock()
    def __new__(cls, *args, **kwargs):
        """Singleton pattern"""
        with cls._lock:
            if cls._instance is None:
                cls._instance = super().__new__(cls)
                cls._instance._initialized = False
            return cls._instance
    def __init__(self, config: Optional[PoolConfig] = None):
        if self._initialized:
            return
        self.config = config or PoolConfig()
        self.services: Dict[str, List[PooledService]] = {}
        self.semaphores: Dict[str, threading.Semaphore] = {}
        self.queues: Dict[str, List] = {}
        self._pool_lock = threading.RLock()
        self._condition = threading.Condition(self._pool_lock)
        # Metrics
        self._metrics = {
            "total_acquisitions": 0,
            "total_releases": 0,
            "total_timeouts": 0,
            "total_errors": 0,
            "queue_waits": 0,
        }
        # Initialize default device pool
        self._initialize_device("GPU:0")
        self._initialized = True
        logger.info("OCRServicePool initialized")
    def _initialize_device(self, device: str):
        """Initialize pool resources for a device"""
        with self._pool_lock:
            if device not in self.services:
                self.services[device] = []
                self.semaphores[device] = threading.Semaphore(
                    self.config.max_services_per_device
                )
                self.queues[device] = []
                logger.info(f"Initialized pool for device {device}")
    def _create_service(self, device: str) -> PooledService:
        """
        Create a new OCRService instance for the pool.
        Args:
            device: Device identifier (e.g., "GPU:0", "CPU")
        Returns:
            PooledService wrapper
        """
        # Import here to avoid circular imports
        from app.services.ocr_service import OCRService
        logger.info(f"Creating new OCRService for device {device}")
        start_time = time.time()
        # Create service instance
        service = OCRService()
        creation_time = time.time() - start_time
        logger.info(f"OCRService created in {creation_time:.2f}s for device {device}")
        return PooledService(
            service=service,
            device=device,
            state=ServiceState.AVAILABLE
        )
    def acquire(
        self,
        device: str = "GPU:0",
        timeout: Optional[float] = None,
        task_id: Optional[str] = None
    ) -> Optional[PooledService]:
        """
        Acquire an OCRService from the pool.
        Args:
            device: Preferred device (e.g., "GPU:0")
            timeout: Maximum time to wait for a service
            task_id: Optional task ID for tracking
        Returns:
            PooledService if available, None if timeout
        """
        timeout = timeout or self.config.acquire_timeout_seconds
        self._initialize_device(device)
        start_time = time.time()
        deadline = start_time + timeout
        with self._condition:
            while True:
                # Try to get an available service
                service = self._try_acquire_service(device, task_id)
                if service:
                    self._metrics["total_acquisitions"] += 1
                    return service
                # Check if we can create a new service
                if self._can_create_service(device):
                    try:
                        pooled = self._create_service(device)
                        pooled.state = ServiceState.IN_USE
                        pooled.current_task_id = task_id
                        pooled.use_count += 1
                        self.services[device].append(pooled)
                        self._metrics["total_acquisitions"] += 1
                        logger.info(f"Created and acquired new service for {device}")
                        return pooled
                    except Exception as e:
                        logger.error(f"Failed to create service for {device}: {e}")
                        self._metrics["total_errors"] += 1
                # Wait for a service to become available
                remaining = deadline - time.time()
                if remaining <= 0:
                    self._metrics["total_timeouts"] += 1
                    logger.warning(f"Timeout waiting for service on {device}")
                    return None
                self._metrics["queue_waits"] += 1
                logger.debug(f"Waiting for service on {device} (timeout: {remaining:.1f}s)")
                self._condition.wait(timeout=min(remaining, 1.0))
    def _try_acquire_service(self, device: str, task_id: Optional[str]) -> Optional[PooledService]:
        """Try to acquire an available service without waiting"""
        for pooled in self.services.get(device, []):
            if pooled.state == ServiceState.AVAILABLE:
                pooled.state = ServiceState.IN_USE
                pooled.last_used = time.time()
                pooled.use_count += 1
                pooled.current_task_id = task_id
                logger.debug(f"Acquired existing service for {device} (use #{pooled.use_count})")
                return pooled
        return None
    def _can_create_service(self, device: str) -> bool:
        """Check if a new service can be created"""
        device_count = len(self.services.get(device, []))
        total_count = sum(len(services) for services in self.services.values())
        return (
            device_count < self.config.max_services_per_device and
            total_count < self.config.max_total_services
        )
    def release(self, pooled: PooledService, error: Optional[Exception] = None):
        """
        Release a service back to the pool.
        Args:
            pooled: The pooled service to release
            error: Optional error that occurred during use
        """
        with self._condition:
            if error:
                pooled.error_count += 1
                self._metrics["total_errors"] += 1
                logger.warning(f"Service released with error: {error}")
                # Mark unhealthy if too many errors
                if pooled.error_count >= self.config.max_consecutive_errors:
                    pooled.state = ServiceState.UNHEALTHY
                    logger.error(f"Service marked unhealthy after {pooled.error_count} errors")
                else:
                    pooled.state = ServiceState.AVAILABLE
            else:
                pooled.error_count = 0  # Reset error count on success
                pooled.state = ServiceState.AVAILABLE
            pooled.last_used = time.time()
            pooled.current_task_id = None
            self._metrics["total_releases"] += 1
            # Clean up GPU memory after release
            try:
                model_manager = get_model_manager()
                model_manager.memory_guard.clear_gpu_cache()
            except Exception as e:
                logger.debug(f"Cache clear after release failed: {e}")
            # Notify waiting threads
            self._condition.notify_all()
        logger.debug(f"Service released for device {pooled.device}")
    @contextmanager
    def acquire_context(
        self,
        device: str = "GPU:0",
        timeout: Optional[float] = None,
        task_id: Optional[str] = None
    ):
        """
        Context manager for acquiring and releasing a service.
        Usage:
            with pool.acquire_context("GPU:0") as pooled:
                result = pooled.service.process(...)
        """
        pooled = None
        error = None
        try:
            pooled = self.acquire(device, timeout, task_id)
            if pooled is None:
                raise TimeoutError(f"Failed to acquire service for {device}")
            yield pooled
        except Exception as e:
            error = e
            raise
        finally:
            if pooled:
                self.release(pooled, error)
    def get_service(self, device: str = "GPU:0") -> Optional["OCRService"]:
        """
        Get a service directly (for backward compatibility).
        This acquires a service and returns the underlying OCRService.
        The caller is responsible for calling release_service() when done.
        Args:
            device: Device identifier
        Returns:
            OCRService instance or None
        """
        pooled = self.acquire(device)
        if pooled:
            return pooled.service
        return None
    def get_pool_stats(self) -> Dict:
        """Get current pool statistics"""
        with self._pool_lock:
            stats = {
                "devices": {},
                "metrics": self._metrics.copy(),
                "total_services": 0,
                "available_services": 0,
                "in_use_services": 0,
            }
            for device, services in self.services.items():
                available = sum(1 for s in services if s.state == ServiceState.AVAILABLE)
                in_use = sum(1 for s in services if s.state == ServiceState.IN_USE)
                unhealthy = sum(1 for s in services if s.state == ServiceState.UNHEALTHY)
                stats["devices"][device] = {
                    "total": len(services),
                    "available": available,
                    "in_use": in_use,
                    "unhealthy": unhealthy,
                    "max_allowed": self.config.max_services_per_device,
                }
                stats["total_services"] += len(services)
                stats["available_services"] += available
                stats["in_use_services"] += in_use
            return stats
    def health_check(self) -> Dict:
        """
        Perform health check on all pooled services.
        Returns:
            Health check results
        """
        results = {
            "healthy": True,
            "services": [],
            "timestamp": time.time()
        }
        with self._pool_lock:
            for device, services in self.services.items():
                for idx, pooled in enumerate(services):
                    service_health = {
                        "device": device,
                        "index": idx,
                        "state": pooled.state.value,
                        "error_count": pooled.error_count,
                        "use_count": pooled.use_count,
                        "idle_seconds": time.time() - pooled.last_used,
                    }
                    # Check if service is responsive
                    if pooled.state == ServiceState.AVAILABLE:
                        try:
                            # Simple check - verify service has required attributes
                            has_process = hasattr(pooled.service, 'process')
                            has_gpu_status = hasattr(pooled.service, 'get_gpu_status')
                            service_health["responsive"] = has_process and has_gpu_status
                        except Exception as e:
                            service_health["responsive"] = False
                            service_health["error"] = str(e)
                            results["healthy"] = False
                    else:
                        service_health["responsive"] = pooled.state != ServiceState.UNHEALTHY
                    if pooled.state == ServiceState.UNHEALTHY:
                        results["healthy"] = False
                    results["services"].append(service_health)
        return results
    def recover_unhealthy(self):
        """
        Attempt to recover unhealthy services.
        """
        with self._pool_lock:
            for device, services in self.services.items():
                for idx, pooled in enumerate(services):
                    if pooled.state == ServiceState.UNHEALTHY:
                        logger.info(f"Attempting to recover unhealthy service {device}:{idx}")
                        try:
                            # Remove old service
                            services.remove(pooled)
                            # Create new service
                            new_pooled = self._create_service(device)
                            services.append(new_pooled)
                            logger.info(f"Successfully recovered service {device}:{idx}")
                        except Exception as e:
                            logger.error(f"Failed to recover service {device}:{idx}: {e}")
    def shutdown(self):
        """
        Shutdown the pool and cleanup all services.
        """
        logger.info("OCRServicePool shutdown started")
        with self._pool_lock:
            for device, services in self.services.items():
                for pooled in services:
                    try:
                        # Clean up service resources
                        if hasattr(pooled.service, 'cleanup_gpu_memory'):
                            pooled.service.cleanup_gpu_memory()
                    except Exception as e:
                        logger.warning(f"Error cleaning up service: {e}")
            # Clear all pools
            self.services.clear()
            self.semaphores.clear()
            self.queues.clear()
        logger.info("OCRServicePool shutdown completed")
 # Global singleton instance
 _service_pool: Optional[OCRServicePool] = None
 def get_service_pool(config: Optional[PoolConfig] = None) -> OCRServicePool:
    """
    Get the global OCRServicePool instance.
    Args:
        config: Optional configuration (only used on first call)
    Returns:
        OCRServicePool singleton instance
    """
    global _service_pool
    if _service_pool is None:
        _service_pool = OCRServicePool(config)
    return _service_pool
 def shutdown_service_pool():
    """Shutdown the global service pool"""
    global _service_pool
    if _service_pool is not None:
        _service_pool.shutdown()
        _service_pool = None
--- a/backend/tests/services/test_memory_manager.py
+++ b/backend/tests/services/test_memory_manager.py
--- a/backend/tests/services/test_ocr_memory_integration.py
+++ b/backend/tests/services/test_ocr_memory_integration.py
@@ -0,0 +1,380 @@
 """
 Tests for OCR Service Memory Integration
 Tests the integration of MemoryGuard with OCRService patterns,
 including pre-operation memory checks and CPU fallback logic.
 """
 import pytest
 from unittest.mock import Mock, patch, MagicMock
 import sys
 # Mock paddle before importing memory_manager
 paddle_mock = MagicMock()
 paddle_mock.is_compiled_with_cuda.return_value = False
 paddle_mock.device.cuda.device_count.return_value = 0
 paddle_mock.device.cuda.memory_allocated.return_value = 0
 paddle_mock.device.cuda.memory_reserved.return_value = 0
 paddle_mock.device.cuda.empty_cache = MagicMock()
 sys.modules['paddle'] = paddle_mock
 from app.services.memory_manager import (
    MemoryGuard,
    MemoryConfig,
    MemoryStats,
 )
 class TestMemoryGuardIntegration:
    """Tests for MemoryGuard integration patterns used in OCRService"""
    def setup_method(self):
        """Setup for each test"""
        self.config = MemoryConfig(
            warning_threshold=0.80,
            critical_threshold=0.95,
            emergency_threshold=0.98,
            enable_cpu_fallback=True,
        )
    def teardown_method(self):
        """Cleanup after each test"""
        pass
    def test_memory_check_below_threshold_allows_processing(self):
        """Test that memory check returns True when below thresholds"""
        guard = MemoryGuard(self.config)
        # Mock stats below warning threshold
        with patch.object(guard, 'get_memory_stats') as mock_stats:
            mock_stats.return_value = MemoryStats(
                gpu_used_ratio=0.50,
                gpu_free_mb=4000,
                gpu_total_mb=8000,
            )
            is_available, stats = guard.check_memory(required_mb=2000)
            assert is_available is True
            assert stats.gpu_free_mb >= 2000
        guard.shutdown()
    def test_memory_check_above_critical_blocks_processing(self):
        """Test that memory check returns False when above critical threshold"""
        guard = MemoryGuard(self.config)
        # Mock stats above critical threshold
        with patch.object(guard, 'get_memory_stats') as mock_stats:
            mock_stats.return_value = MemoryStats(
                gpu_used_ratio=0.96,
                gpu_free_mb=320,
                gpu_total_mb=8000,
            )
            is_available, stats = guard.check_memory(required_mb=1000)
            assert is_available is False
        guard.shutdown()
    def test_memory_check_insufficient_free_memory(self):
        """Test that memory check returns False when free memory < required"""
        guard = MemoryGuard(self.config)
        # Mock stats with insufficient free memory but below critical ratio
        with patch.object(guard, 'get_memory_stats') as mock_stats:
            mock_stats.return_value = MemoryStats(
                gpu_used_ratio=0.70,
                gpu_free_mb=500,
                gpu_total_mb=8000,
            )
            is_available, stats = guard.check_memory(required_mb=1000)
            # Should return False (not enough free memory)
            assert is_available is False
        guard.shutdown()
 class TestCPUFallbackPattern:
    """Tests for CPU fallback pattern as used in OCRService"""
    def test_cpu_fallback_activation_pattern(self):
        """Test the CPU fallback activation pattern"""
        # Simulate the pattern used in OCRService._activate_cpu_fallback
        class MockOCRService:
            def __init__(self):
                self._cpu_fallback_active = False
                self.use_gpu = True
                self.gpu_available = True
                self.gpu_info = {'device_id': 0}
                self._memory_guard = Mock()
            def _activate_cpu_fallback(self):
                if self._cpu_fallback_active:
                    return
                self._cpu_fallback_active = True
                self.use_gpu = False
                self.gpu_info['cpu_fallback'] = True
                self.gpu_info['fallback_reason'] = 'GPU memory insufficient'
                if self._memory_guard:
                    self._memory_guard.clear_gpu_cache()
        service = MockOCRService()
        # Verify initial state
        assert service._cpu_fallback_active is False
        assert service.use_gpu is True
        # Activate fallback
        service._activate_cpu_fallback()
        # Verify fallback state
        assert service._cpu_fallback_active is True
        assert service.use_gpu is False
        assert service.gpu_info.get('cpu_fallback') is True
        service._memory_guard.clear_gpu_cache.assert_called_once()
    def test_cpu_fallback_idempotent(self):
        """Test that CPU fallback activation is idempotent"""
        class MockOCRService:
            def __init__(self):
                self._cpu_fallback_active = False
                self.use_gpu = True
                self._memory_guard = Mock()
                self.gpu_info = {}
            def _activate_cpu_fallback(self):
                if self._cpu_fallback_active:
                    return
                self._cpu_fallback_active = True
                self.use_gpu = False
                if self._memory_guard:
                    self._memory_guard.clear_gpu_cache()
        service = MockOCRService()
        # Activate twice
        service._activate_cpu_fallback()
        service._activate_cpu_fallback()
        # clear_gpu_cache should only be called once
        assert service._memory_guard.clear_gpu_cache.call_count == 1
    def test_gpu_mode_restoration_pattern(self):
        """Test the GPU mode restoration pattern"""
        # Simulate the pattern used in OCRService._restore_gpu_mode
        class MockOCRService:
            def __init__(self):
                self._cpu_fallback_active = True
                self.use_gpu = False
                self.gpu_available = True
                self.gpu_info = {
                    'device_id': 0,
                    'cpu_fallback': True,
                    'fallback_reason': 'test'
                }
                self._memory_guard = Mock()
            def _restore_gpu_mode(self):
                if not self._cpu_fallback_active:
                    return
                if not self.gpu_available:
                    return
                # Check if GPU memory is now available
                if self._memory_guard:
                    is_available, stats = self._memory_guard.check_memory(required_mb=2000)
                    if is_available:
                        self._cpu_fallback_active = False
                        self.use_gpu = True
                        self.gpu_info.pop('cpu_fallback', None)
                        self.gpu_info.pop('fallback_reason', None)
        service = MockOCRService()
        # Mock memory guard to indicate sufficient memory
        mock_stats = Mock()
        mock_stats.gpu_free_mb = 5000
        service._memory_guard.check_memory.return_value = (True, mock_stats)
        # Restore GPU mode
        service._restore_gpu_mode()
        # Verify GPU mode restored
        assert service._cpu_fallback_active is False
        assert service.use_gpu is True
        assert 'cpu_fallback' not in service.gpu_info
    def test_gpu_mode_not_restored_when_memory_still_low(self):
        """Test that GPU mode is not restored when memory is still low"""
        class MockOCRService:
            def __init__(self):
                self._cpu_fallback_active = True
                self.use_gpu = False
                self.gpu_available = True
                self.gpu_info = {'cpu_fallback': True}
                self._memory_guard = Mock()
            def _restore_gpu_mode(self):
                if not self._cpu_fallback_active:
                    return
                if not self.gpu_available:
                    return
                if self._memory_guard:
                    is_available, stats = self._memory_guard.check_memory(required_mb=2000)
                    if is_available:
                        self._cpu_fallback_active = False
                        self.use_gpu = True
        service = MockOCRService()
        # Mock memory guard to indicate insufficient memory
        mock_stats = Mock()
        mock_stats.gpu_free_mb = 500
        service._memory_guard.check_memory.return_value = (False, mock_stats)
        # Try to restore GPU mode
        service._restore_gpu_mode()
        # Verify still in fallback mode
        assert service._cpu_fallback_active is True
        assert service.use_gpu is False
 class TestPreOperationMemoryCheckPattern:
    """Tests for pre-operation memory check pattern as used in OCRService"""
    def test_pre_operation_check_with_fallback(self):
        """Test the pre-operation memory check pattern with fallback"""
        guard = MemoryGuard(MemoryConfig(
            warning_threshold=0.80,
            critical_threshold=0.95,
            enable_cpu_fallback=True,
        ))
        # Simulate the pattern:
        # 1. Check if in CPU fallback mode
        # 2. Try to restore GPU mode if memory available
        # 3. Perform memory check for operation
        class MockService:
            def __init__(self):
                self._cpu_fallback_active = False
                self.use_gpu = True
                self.gpu_available = True
                self._memory_guard = guard
            def _restore_gpu_mode(self):
                pass  # Simplified
            def pre_operation_check(self, required_mb: int) -> bool:
                # Try restore first
                if self._cpu_fallback_active:
                    self._restore_gpu_mode()
                # Perform memory check
                if not self.use_gpu:
                    return True  # CPU mode, no GPU check needed
                is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
                return is_available
        service = MockService()
        # Mock sufficient memory
        with patch.object(guard, 'get_memory_stats') as mock_stats:
            mock_stats.return_value = MemoryStats(
                gpu_used_ratio=0.50,
                gpu_free_mb=4000,
                gpu_total_mb=8000,
            )
            result = service.pre_operation_check(required_mb=2000)
            assert result is True
        guard.shutdown()
    def test_pre_operation_check_returns_true_in_cpu_mode(self):
        """Test that pre-operation check returns True when in CPU mode"""
        class MockService:
            def __init__(self):
                self._cpu_fallback_active = True
                self.use_gpu = False
                self._memory_guard = Mock()
            def pre_operation_check(self, required_mb: int) -> bool:
                if not self.use_gpu:
                    return True  # CPU mode, no GPU check needed
                return False
        service = MockService()
        result = service.pre_operation_check(required_mb=5000)
        # Should return True because we're in CPU mode
        assert result is True
        # Memory guard should not be called
        service._memory_guard.check_memory.assert_not_called()
 class TestMemoryCheckWithCleanup:
    """Tests for memory check with cleanup pattern"""
    def test_memory_check_triggers_cleanup_on_failure(self):
        """Test that memory check triggers cleanup when insufficient"""
        guard = MemoryGuard(MemoryConfig(
            warning_threshold=0.80,
            critical_threshold=0.95,
        ))
        # Track cleanup calls
        cleanup_called = False
        def mock_cleanup():
            nonlocal cleanup_called
            cleanup_called = True
        class MockService:
            def __init__(self):
                self._memory_guard = guard
                self.cleanup_func = mock_cleanup
            def check_gpu_memory(self, required_mb: int) -> bool:
                # First check
                with patch.object(self._memory_guard, 'get_memory_stats') as mock_stats:
                    # First call - low memory
                    mock_stats.return_value = MemoryStats(
                        gpu_used_ratio=0.96,
                        gpu_free_mb=300,
                        gpu_total_mb=8000,
                    )
                    is_available, stats = self._memory_guard.check_memory(required_mb=required_mb)
                    if not is_available:
                        # Trigger cleanup
                        self.cleanup_func()
                        self._memory_guard.clear_gpu_cache()
                        return False
                return True
        service = MockService()
        result = service.check_gpu_memory(required_mb=1000)
        # Cleanup should have been triggered
        assert cleanup_called is True
        assert result is False
        guard.shutdown()
 if __name__ == "__main__":
    pytest.main([__file__, "-v"])
--- a/backend/tests/services/test_service_pool.py
+++ b/backend/tests/services/test_service_pool.py
@@ -0,0 +1,387 @@
 """
 Tests for OCR Service Pool
 Tests OCRServicePool functionality including acquire, release, and concurrency.
 """
 import pytest
 import threading
 import time
 from unittest.mock import Mock, patch, MagicMock
 import sys
 # Mock paddle before importing service_pool to avoid import errors
 # when paddle is not installed in the test environment
 paddle_mock = MagicMock()
 paddle_mock.is_compiled_with_cuda.return_value = False
 paddle_mock.device.cuda.device_count.return_value = 0
 paddle_mock.device.cuda.memory_allocated.return_value = 0
 paddle_mock.device.cuda.memory_reserved.return_value = 0
 paddle_mock.device.cuda.empty_cache = MagicMock()
 sys.modules['paddle'] = paddle_mock
 from app.services.service_pool import (
    OCRServicePool,
    PooledService,
    PoolConfig,
    ServiceState,
    get_service_pool,
    shutdown_service_pool,
 )
 class TestPoolConfig:
    """Tests for PoolConfig class"""
    def test_default_values(self):
        """Test default configuration values"""
        config = PoolConfig()
        assert config.max_services_per_device == 1
        assert config.max_total_services == 2
        assert config.acquire_timeout_seconds == 300.0
        assert config.max_queue_size == 50
        assert config.max_consecutive_errors == 3
    def test_custom_values(self):
        """Test custom configuration values"""
        config = PoolConfig(
            max_services_per_device=2,
            max_total_services=4,
            acquire_timeout_seconds=60.0,
        )
        assert config.max_services_per_device == 2
        assert config.max_total_services == 4
        assert config.acquire_timeout_seconds == 60.0
 class TestPooledService:
    """Tests for PooledService class"""
    def test_creation(self):
        """Test PooledService creation"""
        mock_service = Mock()
        pooled = PooledService(
            service=mock_service,
            device="GPU:0",
        )
        assert pooled.service is mock_service
        assert pooled.device == "GPU:0"
        assert pooled.state == ServiceState.AVAILABLE
        assert pooled.use_count == 0
        assert pooled.error_count == 0
 class TestOCRServicePool:
    """Tests for OCRServicePool class"""
    def setup_method(self):
        """Reset singleton before each test"""
        shutdown_service_pool()
        OCRServicePool._instance = None
        OCRServicePool._lock = threading.Lock()
    def teardown_method(self):
        """Cleanup after each test"""
        shutdown_service_pool()
        OCRServicePool._instance = None
    def test_singleton_pattern(self):
        """Test that OCRServicePool is a singleton"""
        pool1 = OCRServicePool()
        pool2 = OCRServicePool()
        assert pool1 is pool2
        pool1.shutdown()
    def test_initialize_device(self):
        """Test device initialization"""
        config = PoolConfig()
        pool = OCRServicePool(config)
        # Default device should be initialized
        assert "GPU:0" in pool.services
        assert "GPU:0" in pool.semaphores
        # Test adding new device
        pool._initialize_device("GPU:1")
        assert "GPU:1" in pool.services
        assert "GPU:1" in pool.semaphores
        pool.shutdown()
    def test_acquire_creates_service(self):
        """Test that acquire creates a new service if none available"""
        config = PoolConfig(max_services_per_device=1)
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        mock_service.process = Mock()
        mock_service.get_gpu_status = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        pooled = pool.acquire(device="GPU:0", timeout=5.0)
        assert pooled is not None
        assert pooled.state == ServiceState.IN_USE
        assert pooled.use_count == 1
        pool.shutdown()
    def test_acquire_reuses_available_service(self):
        """Test that acquire reuses available services"""
        config = PoolConfig(max_services_per_device=1)
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        # First acquire
        pooled1 = pool.acquire(device="GPU:0")
        service_id = id(pooled1.service)
        pool.release(pooled1)
        # Second acquire should get the same service
        pooled2 = pool.acquire(device="GPU:0")
        assert id(pooled2.service) == service_id
        assert pooled2.use_count == 2
        pool.shutdown()
    def test_release_makes_service_available(self):
        """Test that release makes service available again"""
        config = PoolConfig()
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        pooled = pool.acquire(device="GPU:0")
        assert pooled.state == ServiceState.IN_USE
        pool.release(pooled)
        assert pooled.state == ServiceState.AVAILABLE
        pool.shutdown()
    def test_release_with_error(self):
        """Test that release with error increments error count"""
        config = PoolConfig(max_consecutive_errors=3)
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        pooled = pool.acquire(device="GPU:0")
        pool.release(pooled, error=Exception("Test error"))
        assert pooled.error_count == 1
        assert pooled.state == ServiceState.AVAILABLE
        pool.shutdown()
    def test_release_marks_unhealthy_after_errors(self):
        """Test that service is marked unhealthy after too many errors"""
        config = PoolConfig(max_consecutive_errors=2)
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        pooled = pool.acquire(device="GPU:0")
        pool.release(pooled, error=Exception("Error 1"))
        pooled = pool.acquire(device="GPU:0")
        pool.release(pooled, error=Exception("Error 2"))
        assert pooled.state == ServiceState.UNHEALTHY
        assert pooled.error_count == 2
        pool.shutdown()
    def test_acquire_context_manager(self):
        """Test context manager for acquire/release"""
        config = PoolConfig()
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        with pool.acquire_context(device="GPU:0") as pooled:
            assert pooled is not None
            assert pooled.state == ServiceState.IN_USE
        # After context, service should be available
        assert pooled.state == ServiceState.AVAILABLE
        pool.shutdown()
    def test_acquire_context_manager_with_error(self):
        """Test context manager releases on error"""
        config = PoolConfig()
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        with pytest.raises(ValueError):
            with pool.acquire_context(device="GPU:0") as pooled:
                raise ValueError("Test error")
        # Service should still be available after error
        assert pooled.error_count == 1
        pool.shutdown()
    def test_acquire_timeout(self):
        """Test that acquire times out when no service available"""
        config = PoolConfig(
            max_services_per_device=1,
            max_total_services=1,
        )
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        # Acquire the only service
        pooled1 = pool.acquire(device="GPU:0")
        assert pooled1 is not None
        # Try to acquire another - should timeout
        pooled2 = pool.acquire(device="GPU:0", timeout=0.5)
        assert pooled2 is None
        pool.shutdown()
    def test_get_pool_stats(self):
        """Test pool statistics"""
        config = PoolConfig()
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        # Acquire a service
        pooled = pool.acquire(device="GPU:0")
        stats = pool.get_pool_stats()
        assert stats["total_services"] == 1
        assert stats["in_use_services"] == 1
        assert stats["available_services"] == 0
        assert stats["metrics"]["total_acquisitions"] == 1
        pool.release(pooled)
        stats = pool.get_pool_stats()
        assert stats["available_services"] == 1
        assert stats["metrics"]["total_releases"] == 1
        pool.shutdown()
    def test_health_check(self):
        """Test health check functionality"""
        config = PoolConfig()
        pool = OCRServicePool(config)
        # Pre-populate with a mock service
        mock_service = Mock()
        mock_service.process = Mock()
        mock_service.get_gpu_status = Mock()
        pooled_service = PooledService(service=mock_service, device="GPU:0")
        pool.services["GPU:0"].append(pooled_service)
        # Acquire and release to update use_count
        pooled = pool.acquire(device="GPU:0")
        pool.release(pooled)
        health = pool.health_check()
        assert health["healthy"] is True
        assert len(health["services"]) == 1
        assert health["services"][0]["responsive"] is True
        pool.shutdown()
    def test_concurrent_acquire(self):
        """Test concurrent service acquisition"""
        config = PoolConfig(
            max_services_per_device=2,
            max_total_services=2,
        )
        pool = OCRServicePool(config)
        # Pre-populate with 2 mock services
        for i in range(2):
            mock_service = Mock()
            pooled_service = PooledService(service=mock_service, device="GPU:0")
            pool.services["GPU:0"].append(pooled_service)
        results = []
        def worker(worker_id):
            pooled = pool.acquire(device="GPU:0", timeout=5.0, task_id=f"task_{worker_id}")
            if pooled:
                results.append((worker_id, pooled))
                time.sleep(0.1)  # Simulate work
                pool.release(pooled)
        threads = [threading.Thread(target=worker, args=(i,)) for i in range(4)]
        for t in threads:
            t.start()
        for t in threads:
            t.join()
        # All workers should have acquired a service
        assert len(results) == 4
        pool.shutdown()
 class TestGetServicePool:
    """Tests for get_service_pool helper function"""
    def setup_method(self):
        """Reset singleton before each test"""
        shutdown_service_pool()
        OCRServicePool._instance = None
    def teardown_method(self):
        """Cleanup after each test"""
        shutdown_service_pool()
        OCRServicePool._instance = None
    def test_get_service_pool_creates_singleton(self):
        """Test that get_service_pool creates a singleton"""
        pool1 = get_service_pool()
        pool2 = get_service_pool()
        assert pool1 is pool2
        shutdown_service_pool()
    def test_shutdown_service_pool(self):
        """Test shutdown_service_pool cleans up"""
        pool = get_service_pool()
        shutdown_service_pool()
        # Should be able to create new pool
        new_pool = get_service_pool()
        assert new_pool._initialized is True
        shutdown_service_pool()
 if __name__ == "__main__":
    pytest.main([__file__, "-v"])
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -8,6 +8,7 @@
      "name": "frontend",
      "version": "0.0.0",
      "dependencies": {
        "@radix-ui/react-select": "^2.2.6",
        "@tanstack/react-query": "^5.90.7",
        "axios": "^1.13.2",
        "class-variance-authority": "^0.7.0",
@@ -87,7 +88,6 @@
      "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "@babel/code-frame": "^7.27.1",
        "@babel/generator": "^7.28.5",
@@ -947,6 +947,44 @@
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
      }
    },
    "node_modules/@floating-ui/core": {
      "version": "1.7.3",
      "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.3.tgz",
      "integrity": "sha512-sGnvb5dmrJaKEZ+LDIpguvdX3bDlEllmv4/ClQ9awcmCZrlx5jQyyMWFM5kBI+EyNOCDDiKk8il0zeuX3Zlg/w==",
      "license": "MIT",
      "dependencies": {
        "@floating-ui/utils": "^0.2.10"
      }
    },
    "node_modules/@floating-ui/dom": {
      "version": "1.7.4",
      "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.4.tgz",
      "integrity": "sha512-OOchDgh4F2CchOX94cRVqhvy7b3AFb+/rQXyswmzmGakRfkMgoWVjfnLWkRirfLEfuD4ysVW16eXzwt3jHIzKA==",
      "license": "MIT",
      "dependencies": {
        "@floating-ui/core": "^1.7.3",
        "@floating-ui/utils": "^0.2.10"
      }
    },
    "node_modules/@floating-ui/react-dom": {
      "version": "2.1.6",
      "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.6.tgz",
      "integrity": "sha512-4JX6rEatQEvlmgU80wZyq9RT96HZJa88q8hp0pBd+LrczeDI4o6uA2M+uvxngVHo4Ihr8uibXxH6+70zhAFrVw==",
      "license": "MIT",
      "dependencies": {
        "@floating-ui/dom": "^1.7.4"
      },
      "peerDependencies": {
        "react": ">=16.8.0",
        "react-dom": ">=16.8.0"
      }
    },
    "node_modules/@floating-ui/utils": {
      "version": "0.2.10",
      "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.10.tgz",
      "integrity": "sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==",
      "license": "MIT"
    },
    "node_modules/@humanfs/core": {
      "version": "0.19.1",
      "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz",
@@ -1272,6 +1310,502 @@
        "node": ">= 8"
      }
    },
    "node_modules/@radix-ui/number": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz",
      "integrity": "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==",
      "license": "MIT"
    },
    "node_modules/@radix-ui/primitive": {
      "version": "1.1.3",
      "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
      "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
      "license": "MIT"
    },
    "node_modules/@radix-ui/react-arrow": {
      "version": "1.1.7",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz",
      "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-primitive": "2.1.3"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-collection": {
      "version": "1.1.7",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz",
      "integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-compose-refs": "1.1.2",
        "@radix-ui/react-context": "1.1.2",
        "@radix-ui/react-primitive": "2.1.3",
        "@radix-ui/react-slot": "1.2.3"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-compose-refs": {
      "version": "1.1.2",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.2.tgz",
      "integrity": "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg==",
      "license": "MIT",
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-context": {
      "version": "1.1.2",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz",
      "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==",
      "license": "MIT",
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-direction": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz",
      "integrity": "sha512-1UEWRX6jnOA2y4H5WczZ44gOOjTEmlqv1uNW4GAJEO5+bauCBhv8snY65Iw5/VOS/ghKN9gr2KjnLKxrsvoMVw==",
      "license": "MIT",
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-dismissable-layer": {
      "version": "1.1.11",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
      "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/primitive": "1.1.3",
        "@radix-ui/react-compose-refs": "1.1.2",
        "@radix-ui/react-primitive": "2.1.3",
        "@radix-ui/react-use-callback-ref": "1.1.1",
        "@radix-ui/react-use-escape-keydown": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-focus-guards": {
      "version": "1.1.3",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
      "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==",
      "license": "MIT",
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-focus-scope": {
      "version": "1.1.7",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
      "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-compose-refs": "1.1.2",
        "@radix-ui/react-primitive": "2.1.3",
        "@radix-ui/react-use-callback-ref": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-id": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz",
      "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-use-layout-effect": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-popper": {
      "version": "1.2.8",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz",
      "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==",
      "license": "MIT",
      "dependencies": {
        "@floating-ui/react-dom": "^2.0.0",
        "@radix-ui/react-arrow": "1.1.7",
        "@radix-ui/react-compose-refs": "1.1.2",
        "@radix-ui/react-context": "1.1.2",
        "@radix-ui/react-primitive": "2.1.3",
        "@radix-ui/react-use-callback-ref": "1.1.1",
        "@radix-ui/react-use-layout-effect": "1.1.1",
        "@radix-ui/react-use-rect": "1.1.1",
        "@radix-ui/react-use-size": "1.1.1",
        "@radix-ui/rect": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-portal": {
      "version": "1.1.9",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
      "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-primitive": "2.1.3",
        "@radix-ui/react-use-layout-effect": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-primitive": {
      "version": "2.1.3",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.1.3.tgz",
      "integrity": "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-slot": "1.2.3"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-select": {
      "version": "2.2.6",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.6.tgz",
      "integrity": "sha512-I30RydO+bnn2PQztvo25tswPH+wFBjehVGtmagkU78yMdwTwVf12wnAOF+AeP8S2N8xD+5UPbGhkUfPyvT+mwQ==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/number": "1.1.1",
        "@radix-ui/primitive": "1.1.3",
        "@radix-ui/react-collection": "1.1.7",
        "@radix-ui/react-compose-refs": "1.1.2",
        "@radix-ui/react-context": "1.1.2",
        "@radix-ui/react-direction": "1.1.1",
        "@radix-ui/react-dismissable-layer": "1.1.11",
        "@radix-ui/react-focus-guards": "1.1.3",
        "@radix-ui/react-focus-scope": "1.1.7",
        "@radix-ui/react-id": "1.1.1",
        "@radix-ui/react-popper": "1.2.8",
        "@radix-ui/react-portal": "1.1.9",
        "@radix-ui/react-primitive": "2.1.3",
        "@radix-ui/react-slot": "1.2.3",
        "@radix-ui/react-use-callback-ref": "1.1.1",
        "@radix-ui/react-use-controllable-state": "1.2.2",
        "@radix-ui/react-use-layout-effect": "1.1.1",
        "@radix-ui/react-use-previous": "1.1.1",
        "@radix-ui/react-visually-hidden": "1.2.3",
        "aria-hidden": "^1.2.4",
        "react-remove-scroll": "^2.6.3"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-slot": {
      "version": "1.2.3",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz",
      "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-compose-refs": "1.1.2"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-use-callback-ref": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz",
      "integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==",
      "license": "MIT",
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-use-controllable-state": {
      "version": "1.2.2",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz",
      "integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-use-effect-event": "0.0.2",
        "@radix-ui/react-use-layout-effect": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-use-effect-event": {
      "version": "0.0.2",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz",
      "integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-use-layout-effect": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-use-escape-keydown": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz",
      "integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-use-callback-ref": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-use-layout-effect": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz",
      "integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==",
      "license": "MIT",
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-use-previous": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-previous/-/react-use-previous-1.1.1.tgz",
      "integrity": "sha512-2dHfToCj/pzca2Ck724OZ5L0EVrr3eHRNsG/b3xQJLA2hZpVCS99bLAX+hm1IHXDEnzU6by5z/5MIY794/a8NQ==",
      "license": "MIT",
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-use-rect": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz",
      "integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/rect": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-use-size": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz",
      "integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-use-layout-effect": "1.1.1"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/react-visually-hidden": {
      "version": "1.2.3",
      "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz",
      "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-primitive": "2.1.3"
      },
      "peerDependencies": {
        "@types/react": "*",
        "@types/react-dom": "*",
        "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
        "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        },
        "@types/react-dom": {
          "optional": true
        }
      }
    },
    "node_modules/@radix-ui/rect": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz",
      "integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==",
      "license": "MIT"
    },
    "node_modules/@rolldown/pluginutils": {
      "version": "1.0.0-beta.47",
      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.47.tgz",
@@ -1990,7 +2524,6 @@
      "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "undici-types": "~7.16.0"
      }
@@ -2000,7 +2533,6 @@
      "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.4.tgz",
      "integrity": "sha512-tBFxBp9Nfyy5rsmefN+WXc1JeW/j2BpBHFdLZbEVfs9wn3E3NRFxwV0pJg8M1qQAexFpvz73hJXFofV0ZAu92A==",
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "csstype": "^3.0.2"
      }
@@ -2009,7 +2541,7 @@
      "version": "19.2.3",
      "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-19.2.3.tgz",
      "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==",
-      "dev": true,
+      "devOptional": true,
      "license": "MIT",
      "peerDependencies": {
        "@types/react": "^19.2.0"
@@ -2067,7 +2599,6 @@
      "integrity": "sha512-tK3GPFWbirvNgsNKto+UmB/cRtn6TZfyw0D6IKrW55n6Vbs7KJoZtI//kpTKzE/DUmmnAFD8/Ca46s7Obs92/w==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "@typescript-eslint/scope-manager": "8.46.4",
        "@typescript-eslint/types": "8.46.4",
@@ -2326,7 +2857,6 @@
      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "bin": {
        "acorn": "bin/acorn"
      },
@@ -2384,6 +2914,18 @@
      "dev": true,
      "license": "Python-2.0"
    },
    "node_modules/aria-hidden": {
      "version": "1.2.6",
      "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.6.tgz",
      "integrity": "sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA==",
      "license": "MIT",
      "dependencies": {
        "tslib": "^2.0.0"
      },
      "engines": {
        "node": ">=10"
      }
    },
    "node_modules/asynckit": {
      "version": "0.4.0",
      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
@@ -2519,7 +3061,6 @@
        }
      ],
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "baseline-browser-mapping": "^2.8.25",
        "caniuse-lite": "^1.0.30001754",
@@ -2817,6 +3358,12 @@
        "node": ">=8"
      }
    },
    "node_modules/detect-node-es": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz",
      "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==",
      "license": "MIT"
    },
    "node_modules/devlop": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
@@ -2981,7 +3528,6 @@
      "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "@eslint-community/eslint-utils": "^4.8.0",
        "@eslint-community/regexpp": "^4.12.1",
@@ -3414,6 +3960,15 @@
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/get-nonce": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz",
      "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==",
      "license": "MIT",
      "engines": {
        "node": ">=6"
      }
    },
    "node_modules/get-proto": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
@@ -3606,7 +4161,6 @@
        }
      ],
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "@babel/runtime": "^7.27.6"
      },
@@ -5096,7 +5650,6 @@
        }
      ],
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "nanoid": "^3.3.11",
        "picocolors": "^1.1.1",
@@ -5186,7 +5739,6 @@
      "resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz",
      "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==",
      "license": "MIT",
      "peer": true,
      "engines": {
        "node": ">=0.10.0"
      }
@@ -5196,7 +5748,6 @@
      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz",
      "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==",
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "scheduler": "^0.27.0"
      },
@@ -5332,6 +5883,53 @@
        "node": ">=0.10.0"
      }
    },
    "node_modules/react-remove-scroll": {
      "version": "2.7.1",
      "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.7.1.tgz",
      "integrity": "sha512-HpMh8+oahmIdOuS5aFKKY6Pyog+FNaZV/XyJOq7b4YFwsFHe5yYfdbIalI4k3vU2nSDql7YskmUseHsRrJqIPA==",
      "license": "MIT",
      "dependencies": {
        "react-remove-scroll-bar": "^2.3.7",
        "react-style-singleton": "^2.2.3",
        "tslib": "^2.1.0",
        "use-callback-ref": "^1.3.3",
        "use-sidecar": "^1.1.3"
      },
      "engines": {
        "node": ">=10"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/react-remove-scroll-bar": {
      "version": "2.3.8",
      "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.8.tgz",
      "integrity": "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==",
      "license": "MIT",
      "dependencies": {
        "react-style-singleton": "^2.2.2",
        "tslib": "^2.0.0"
      },
      "engines": {
        "node": ">=10"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/react-router": {
      "version": "7.9.6",
      "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.9.6.tgz",
@@ -5370,6 +5968,28 @@
        "react-dom": ">=18"
      }
    },
    "node_modules/react-style-singleton": {
      "version": "2.2.3",
      "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.3.tgz",
      "integrity": "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==",
      "license": "MIT",
      "dependencies": {
        "get-nonce": "^1.0.0",
        "tslib": "^2.0.0"
      },
      "engines": {
        "node": ">=10"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/remark-parse": {
      "version": "11.0.0",
      "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz",
@@ -5691,7 +6311,6 @@
      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "engines": {
        "node": ">=12"
      },
@@ -5770,7 +6389,6 @@
      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
      "devOptional": true,
      "license": "Apache-2.0",
      "peer": true,
      "bin": {
        "tsc": "bin/tsc",
        "tsserver": "bin/tsserver"
@@ -5938,6 +6556,49 @@
        "punycode": "^2.1.0"
      }
    },
    "node_modules/use-callback-ref": {
      "version": "1.3.3",
      "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.3.tgz",
      "integrity": "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==",
      "license": "MIT",
      "dependencies": {
        "tslib": "^2.0.0"
      },
      "engines": {
        "node": ">=10"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/use-sidecar": {
      "version": "1.1.3",
      "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.3.tgz",
      "integrity": "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==",
      "license": "MIT",
      "dependencies": {
        "detect-node-es": "^1.1.0",
        "tslib": "^2.0.0"
      },
      "engines": {
        "node": ">=10"
      },
      "peerDependencies": {
        "@types/react": "*",
        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc"
      },
      "peerDependenciesMeta": {
        "@types/react": {
          "optional": true
        }
      }
    },
    "node_modules/use-sync-external-store": {
      "version": "1.6.0",
      "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz",
@@ -5981,7 +6642,6 @@
      "integrity": "sha512-BxAKBWmIbrDgrokdGZH1IgkIk/5mMHDreLDmCJ0qpyJaAteP8NvMhkwr/ZCQNqNH97bw/dANTE9PDzqwJghfMQ==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "esbuild": "^0.25.0",
        "fdir": "^6.5.0",
@@ -6075,7 +6735,6 @@
      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "engines": {
        "node": ">=12"
      },
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -10,6 +10,7 @@
    "preview": "vite preview"
  },
  "dependencies": {
    "@radix-ui/react-select": "^2.2.6",
    "@tanstack/react-query": "^5.90.7",
    "axios": "^1.13.2",
    "class-variance-authority": "^0.7.0",
--- a/frontend/src/components/PDFViewer.tsx
+++ b/frontend/src/components/PDFViewer.tsx
@@ -1,11 +1,17 @@
-import { useState, useMemo } from 'react'
+import { useState, useCallback, useMemo, useRef, useEffect } from 'react'
-import { Document, Page } from 'react-pdf'
+import { Document, Page, pdfjs } from 'react-pdf'
 import type { PDFDocumentProxy } from 'pdfjs-dist'
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
 import { Button } from '@/components/ui/button'
-import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut } from 'lucide-react'
+import { ChevronLeft, ChevronRight, ZoomIn, ZoomOut, Loader2 } from 'lucide-react'
 import 'react-pdf/dist/Page/AnnotationLayer.css'
 import 'react-pdf/dist/Page/TextLayer.css'
 // Configure standard font data URL for proper font rendering
 const pdfOptions = {
  standardFontDataUrl: `https://unpkg.com/pdfjs-dist@${pdfjs.version}/standard_fonts/`,
 }
 interface PDFViewerProps {
  title?: string
  pdfUrl: string
@@ -17,41 +23,56 @@ export default function PDFViewer({ title, pdfUrl, className, httpHeaders }: PDF
  const [numPages, setNumPages] = useState<number>(0)
  const [pageNumber, setPageNumber] = useState<number>(1)
  const [scale, setScale] = useState<number>(1.0)
-  const [loading, setLoading] = useState<boolean>(true)
+  const [documentLoaded, setDocumentLoaded] = useState<boolean>(false)
  const [error, setError] = useState<string | null>(null)
-  // Memoize the file prop to prevent unnecessary reloads
+  // Store PDF document reference
  const pdfDocRef = useRef<PDFDocumentProxy | null>(null)
  // Memoize file config to prevent unnecessary reloads
  const fileConfig = useMemo(() => {
    return httpHeaders ? { url: pdfUrl, httpHeaders } : pdfUrl
  }, [pdfUrl, httpHeaders])
-  const onDocumentLoadSuccess = ({ numPages }: { numPages: number }) => {
+  // Reset state when URL changes
-    setNumPages(numPages)
+  useEffect(() => {
-    setLoading(false)
+    setDocumentLoaded(false)
    setError(null)
-  }
+    setNumPages(0)
    setPageNumber(1)
    pdfDocRef.current = null
  }, [pdfUrl])
-  const onDocumentLoadError = (error: Error) => {
+  const onDocumentLoadSuccess = useCallback((pdf: { numPages: number }) => {
-    console.error('Error loading PDF:', error)
+    pdfDocRef.current = pdf as unknown as PDFDocumentProxy
-    setError('Failed to load PDF. Please try again later.')
+    setNumPages(pdf.numPages)
-    setLoading(false)
+    setPageNumber(1)
-  }
+    setDocumentLoaded(true)
    setError(null)
  }, [])
-  const goToPreviousPage = () => {
+  const onDocumentLoadError = useCallback((err: Error) => {
    console.error('Error loading PDF:', err)
    setError('無法載入 PDF 檔案。請稍後再試。')
    setDocumentLoaded(false)
    pdfDocRef.current = null
  }, [])
  const goToPreviousPage = useCallback(() => {
    setPageNumber((prev) => Math.max(prev - 1, 1))
-  }
+  }, [])
-  const goToNextPage = () => {
+  const goToNextPage = useCallback(() => {
    setPageNumber((prev) => Math.min(prev + 1, numPages))
-  }
+  }, [numPages])
-  const zoomIn = () => {
+  const zoomIn = useCallback(() => {
    setScale((prev) => Math.min(prev + 0.2, 3.0))
-  }
+  }, [])
-  const zoomOut = () => {
+  const zoomOut = useCallback(() => {
    setScale((prev) => Math.max(prev - 0.2, 0.5))
-  }
+  }, [])
  return (
    <Card className={className}>
@@ -69,18 +90,18 @@ export default function PDFViewer({ title, pdfUrl, className, httpHeaders }: PDF
              variant="outline"
              size="sm"
              onClick={goToPreviousPage}
-              disabled={pageNumber <= 1 || loading}
+              disabled={pageNumber <= 1 || !documentLoaded}
            >
              <ChevronLeft className="h-4 w-4" />
            </Button>
            <span className="text-sm whitespace-nowrap">
-              Page {pageNumber} of {numPages || '...'}
+              第 {pageNumber} 頁 / 共 {numPages || '...'} 頁
            </span>
            <Button
              variant="outline"
              size="sm"
              onClick={goToNextPage}
-              disabled={pageNumber >= numPages || loading}
+              disabled={pageNumber >= numPages || !documentLoaded}
            >
              <ChevronRight className="h-4 w-4" />
            </Button>
@@ -92,7 +113,7 @@ export default function PDFViewer({ title, pdfUrl, className, httpHeaders }: PDF
              variant="outline"
              size="sm"
              onClick={zoomOut}
-              disabled={scale <= 0.5 || loading}
+              disabled={scale <= 0.5 || !documentLoaded}
            >
              <ZoomOut className="h-4 w-4" />
            </Button>
@@ -103,7 +124,7 @@ export default function PDFViewer({ title, pdfUrl, className, httpHeaders }: PDF
              variant="outline"
              size="sm"
              onClick={zoomIn}
-              disabled={scale >= 3.0 || loading}
+              disabled={scale >= 3.0 || !documentLoaded}
            >
              <ZoomIn className="h-4 w-4" />
            </Button>
@@ -113,39 +134,48 @@ export default function PDFViewer({ title, pdfUrl, className, httpHeaders }: PDF
        {/* PDF Document */}
        <div className="border rounded-md bg-muted/10 overflow-auto max-h-[800px]">
          <div className="flex justify-center p-4">
-            {loading && (
+            {error ? (
              <div className="flex items-center justify-center min-h-[400px]">
                <div className="animate-spin rounded-full h-12 w-12 border-b-2 border-primary"></div>
              </div>
            )}
            {error && (
              <div className="flex items-center justify-center min-h-[400px]">
                <div className="text-center">
-                  <p className="text-destructive font-semibold mb-2">Error</p>
+                  <p className="text-destructive font-semibold mb-2">錯誤</p>
                  <p className="text-sm text-muted-foreground">{error}</p>
                </div>
              </div>
-            )}
+            ) : (
            {!error && (
              <Document
                key={pdfUrl}
                file={fileConfig}
                options={pdfOptions}
                onLoadSuccess={onDocumentLoadSuccess}
                onLoadError={onDocumentLoadError}
                loading={
                  <div className="flex items-center justify-center min-h-[400px]">
-                    <div className="animate-spin rounded-full h-12 w-12 border-b-2 border-primary"></div>
+                    <div className="text-center">
                      <Loader2 className="w-12 h-12 animate-spin text-primary mx-auto mb-4" />
                      <p className="text-muted-foreground">載入 PDF 中...</p>
                    </div>
                  </div>
                }
              >
                {documentLoaded && (
                  <Page
                    pageNumber={pageNumber}
                    scale={scale}
                    renderTextLayer={true}
                    renderAnnotationLayer={true}
                    className="shadow-lg"
                    loading={
                      <div className="flex items-center justify-center min-h-[400px]">
                        <Loader2 className="w-8 h-8 animate-spin text-primary" />
                      </div>
                    }
                    error={
                      <div className="text-center p-4 text-destructive">
                        無法載入第 {pageNumber} 頁
                      </div>
                    }
                  />
                )}
              </Document>
            )}
          </div>
--- a/frontend/src/components/TaskNotFound.tsx
+++ b/frontend/src/components/TaskNotFound.tsx
@@ -0,0 +1,46 @@
 import { useNavigate } from 'react-router-dom'
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
 import { Button } from '@/components/ui/button'
 import { Trash2 } from 'lucide-react'
 interface TaskNotFoundProps {
  taskId: string | null
  onClearAndUpload: () => void
 }
 export default function TaskNotFound({ taskId, onClearAndUpload }: TaskNotFoundProps) {
  const navigate = useNavigate()
  const handleClick = () => {
    onClearAndUpload()
    navigate('/upload')
  }
  return (
    <div className="flex items-center justify-center min-h-[60vh]">
      <Card className="max-w-md text-center">
        <CardHeader>
          <div className="flex justify-center mb-4">
            <div className="w-16 h-16 bg-destructive/10 rounded-full flex items-center justify-center">
              <Trash2 className="w-8 h-8 text-destructive" />
            </div>
          </div>
          <CardTitle className="text-xl">任務已刪除</CardTitle>
        </CardHeader>
        <CardContent className="space-y-4">
          <p className="text-muted-foreground">
            此任務已被刪除或不存在。請上傳新檔案以建立新任務。
          </p>
          {taskId && (
            <p className="text-xs text-muted-foreground font-mono">
              任務 ID: {taskId}
            </p>
          )}
          <Button onClick={handleClick} size="lg">
            前往上傳頁面
          </Button>
        </CardContent>
      </Card>
    </div>
  )
 }
--- a/frontend/src/components/ui/select.tsx
+++ b/frontend/src/components/ui/select.tsx
@@ -1,12 +1,14 @@
 import * as React from 'react'
 import * as SelectPrimitive from '@radix-ui/react-select'
 import { cn } from '@/lib/utils'
-import { ChevronDown } from 'lucide-react'
+import { Check, ChevronDown, ChevronUp } from 'lucide-react'
-export interface SelectProps extends React.SelectHTMLAttributes<HTMLSelectElement> {
+// Simple native select for backwards compatibility
 export interface NativeSelectProps extends React.SelectHTMLAttributes<HTMLSelectElement> {
  options: Array<{ value: string; label: string }>
 }
-const Select = React.forwardRef<HTMLSelectElement, SelectProps>(
+const NativeSelect = React.forwardRef<HTMLSelectElement, NativeSelectProps>(
  ({ className, options, ...props }, ref) => {
    return (
      <div className="relative">
@@ -33,6 +35,168 @@ const Select = React.forwardRef<HTMLSelectElement, SelectProps>(
    )
  }
 )
-Select.displayName = 'Select'
+NativeSelect.displayName = 'NativeSelect'
-export { Select }
+const Select = SelectPrimitive.Root
 const SelectGroup = SelectPrimitive.Group
 const SelectValue = SelectPrimitive.Value
 const SelectTrigger = React.forwardRef<
  React.ComponentRef<typeof SelectPrimitive.Trigger>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Trigger>
 >(({ className, children, ...props }, ref) => (
  <SelectPrimitive.Trigger
    ref={ref}
    className={cn(
      'flex h-10 w-full items-center justify-between rounded-lg border border-border bg-background px-3 py-2 text-sm',
      'ring-offset-background placeholder:text-muted-foreground',
      'focus:outline-none focus:ring-2 focus:ring-primary/20 focus:border-primary',
      'disabled:cursor-not-allowed disabled:opacity-50',
      '[&>span]:line-clamp-1',
      className
    )}
    {...props}
  >
    {children}
    <SelectPrimitive.Icon asChild>
      <ChevronDown className="h-4 w-4 opacity-50" />
    </SelectPrimitive.Icon>
  </SelectPrimitive.Trigger>
 ))
 SelectTrigger.displayName = SelectPrimitive.Trigger.displayName
 const SelectScrollUpButton = React.forwardRef<
  React.ComponentRef<typeof SelectPrimitive.ScrollUpButton>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollUpButton>
 >(({ className, ...props }, ref) => (
  <SelectPrimitive.ScrollUpButton
    ref={ref}
    className={cn(
      'flex cursor-default items-center justify-center py-1',
      className
    )}
    {...props}
  >
    <ChevronUp className="h-4 w-4" />
  </SelectPrimitive.ScrollUpButton>
 ))
 SelectScrollUpButton.displayName = SelectPrimitive.ScrollUpButton.displayName
 const SelectScrollDownButton = React.forwardRef<
  React.ComponentRef<typeof SelectPrimitive.ScrollDownButton>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.ScrollDownButton>
 >(({ className, ...props }, ref) => (
  <SelectPrimitive.ScrollDownButton
    ref={ref}
    className={cn(
      'flex cursor-default items-center justify-center py-1',
      className
    )}
    {...props}
  >
    <ChevronDown className="h-4 w-4" />
  </SelectPrimitive.ScrollDownButton>
 ))
 SelectScrollDownButton.displayName = SelectPrimitive.ScrollDownButton.displayName
 const SelectContent = React.forwardRef<
  React.ComponentRef<typeof SelectPrimitive.Content>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Content>
 >(({ className, children, position = 'popper', ...props }, ref) => (
  <SelectPrimitive.Portal>
    <SelectPrimitive.Content
      ref={ref}
      className={cn(
        'relative z-50 max-h-96 min-w-[8rem] overflow-hidden rounded-lg border border-border bg-popover text-popover-foreground shadow-md',
        'data-[state=open]:animate-in data-[state=closed]:animate-out',
        'data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0',
        'data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95',
        'data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2',
        'data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2',
        position === 'popper' &&
          'data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1',
        className
      )}
      position={position}
      {...props}
    >
      <SelectScrollUpButton />
      <SelectPrimitive.Viewport
        className={cn(
          'p-1',
          position === 'popper' &&
            'h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)]'
        )}
      >
        {children}
      </SelectPrimitive.Viewport>
      <SelectScrollDownButton />
    </SelectPrimitive.Content>
  </SelectPrimitive.Portal>
 ))
 SelectContent.displayName = SelectPrimitive.Content.displayName
 const SelectLabel = React.forwardRef<
  React.ComponentRef<typeof SelectPrimitive.Label>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Label>
 >(({ className, ...props }, ref) => (
  <SelectPrimitive.Label
    ref={ref}
    className={cn('py-1.5 pl-8 pr-2 text-sm font-semibold', className)}
    {...props}
  />
 ))
 SelectLabel.displayName = SelectPrimitive.Label.displayName
 const SelectItem = React.forwardRef<
  React.ComponentRef<typeof SelectPrimitive.Item>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Item>
 >(({ className, children, ...props }, ref) => (
  <SelectPrimitive.Item
    ref={ref}
    className={cn(
      'relative flex w-full cursor-default select-none items-center rounded-md py-1.5 pl-8 pr-2 text-sm outline-none',
      'focus:bg-accent focus:text-accent-foreground',
      'data-[disabled]:pointer-events-none data-[disabled]:opacity-50',
      className
    )}
    {...props}
  >
    <span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
      <SelectPrimitive.ItemIndicator>
        <Check className="h-4 w-4" />
      </SelectPrimitive.ItemIndicator>
    </span>
    <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
  </SelectPrimitive.Item>
 ))
 SelectItem.displayName = SelectPrimitive.Item.displayName
 const SelectSeparator = React.forwardRef<
  React.ComponentRef<typeof SelectPrimitive.Separator>,
  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Separator>
 >(({ className, ...props }, ref) => (
  <SelectPrimitive.Separator
    ref={ref}
    className={cn('-mx-1 my-1 h-px bg-muted', className)}
    {...props}
  />
 ))
 SelectSeparator.displayName = SelectPrimitive.Separator.displayName
 export {
  Select,
  SelectGroup,
  SelectValue,
  SelectTrigger,
  SelectContent,
  SelectLabel,
  SelectItem,
  SelectSeparator,
  SelectScrollUpButton,
  SelectScrollDownButton,
  NativeSelect,
 }
--- a/frontend/src/hooks/useTaskValidation.ts
+++ b/frontend/src/hooks/useTaskValidation.ts
@@ -0,0 +1,64 @@
 import { useEffect, useState } from 'react'
 import { useQuery } from '@tanstack/react-query'
 import { useUploadStore } from '@/store/uploadStore'
 import { apiClientV2 } from '@/services/apiV2'
 import type { TaskDetail } from '@/types/apiV2'
 interface UseTaskValidationResult {
  taskId: string | null
  taskDetail: TaskDetail | undefined
  isLoading: boolean
  isNotFound: boolean
  clearAndReset: () => void
 }
 /**
 * Hook for validating task existence and handling deleted tasks gracefully.
 * Shows loading state first, then either returns task data or marks as not found.
 */
 export function useTaskValidation(options?: {
  refetchInterval?: number | false | ((query: any) => number | false)
 }): UseTaskValidationResult {
  const { batchId, clearUpload } = useUploadStore()
  const taskId = batchId ? String(batchId) : null
  const [isNotFound, setIsNotFound] = useState(false)
  const { data: taskDetail, isLoading, error, isFetching } = useQuery({
    queryKey: ['taskDetail', taskId],
    queryFn: () => apiClientV2.getTask(taskId!),
    enabled: !!taskId && !isNotFound,
    retry: (failureCount, error: any) => {
      // Don't retry on 404
      if (error?.response?.status === 404) {
        return false
      }
      return failureCount < 2
    },
    refetchInterval: options?.refetchInterval ?? false,
    // Disable stale time to ensure we check fresh data
    staleTime: 0,
  })
  // Handle 404 error - mark as not found immediately
  useEffect(() => {
    if (error && (error as any)?.response?.status === 404) {
      setIsNotFound(true)
    }
  }, [error])
  // Clear state and store
  const clearAndReset = () => {
    clearUpload()
    setIsNotFound(false)
  }
  return {
    taskId,
    taskDetail,
    // Show loading if we have a taskId and are still fetching (but not if already marked as not found)
    isLoading: !!taskId && !isNotFound && (isLoading || isFetching) && !taskDetail,
    isNotFound,
    clearAndReset,
  }
 }
--- a/frontend/src/main.tsx
+++ b/frontend/src/main.tsx
@@ -1,4 +1,3 @@
 import { StrictMode } from 'react'
 import { createRoot } from 'react-dom/client'
 import { BrowserRouter } from 'react-router-dom'
 import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
@@ -10,8 +9,8 @@ import App from './App.tsx'
 // Configure PDF.js worker for react-pdf
 import { pdfjs } from 'react-pdf'
-// Use the worker from react-pdf's bundled pdfjs-dist
+// Use CDN for the worker (most reliable for Vite)
-pdfjs.GlobalWorkerOptions.workerSrc = `//unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.mjs`
+pdfjs.GlobalWorkerOptions.workerSrc = `https://unpkg.com/pdfjs-dist@${pdfjs.version}/build/pdf.worker.min.mjs`
 // Create React Query client
 const queryClient = new QueryClient({
@@ -24,8 +23,9 @@ const queryClient = new QueryClient({
  },
 })
 // Note: StrictMode disabled due to react-pdf incompatibility
 // StrictMode's double-invocation causes PDF worker race conditions
 createRoot(document.getElementById('root')!).render(
  <StrictMode>
  <QueryClientProvider client={queryClient}>
    <I18nextProvider i18n={i18n}>
      <ToastProvider>
@@ -34,6 +34,5 @@ createRoot(document.getElementById('root')!).render(
        </BrowserRouter>
      </ToastProvider>
    </I18nextProvider>
-    </QueryClientProvider>
+  </QueryClientProvider>,
  </StrictMode>,
 )
--- a/frontend/src/pages/ProcessingPage.tsx
+++ b/frontend/src/pages/ProcessingPage.tsx
@@ -1,26 +1,35 @@
-import { useEffect, useState } from 'react'
+import { useState, useEffect } from 'react'
 import { useNavigate } from 'react-router-dom'
 import { useTranslation } from 'react-i18next'
-import { useQuery, useMutation } from '@tanstack/react-query'
+import { useMutation } from '@tanstack/react-query'
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
 import { Progress } from '@/components/ui/progress'
 import { Button } from '@/components/ui/button'
 import { Badge } from '@/components/ui/badge'
 import { useToast } from '@/components/ui/toast'
 import { useUploadStore } from '@/store/uploadStore'
 import { apiClientV2 } from '@/services/apiV2'
 import { Play, CheckCircle, FileText, AlertCircle, Clock, Activity, Loader2 } from 'lucide-react'
 import PPStructureParams from '@/components/PPStructureParams'
 import TaskNotFound from '@/components/TaskNotFound'
 import { useTaskValidation } from '@/hooks/useTaskValidation'
 import type { PPStructureV3Params, ProcessingOptions } from '@/types/apiV2'
 export default function ProcessingPage() {
  const { t } = useTranslation()
  const navigate = useNavigate()
  const { toast } = useToast()
  const { batchId } = useUploadStore()
-  // In V2, batchId is actually a task_id (string)
+  // Use shared hook for task validation
-  const taskId = batchId ? String(batchId) : null
+  const { taskId, taskDetail, isLoading: isValidating, isNotFound, clearAndReset } = useTaskValidation({
    refetchInterval: (query) => {
      const data = query.state.data
      if (!data) return 2000
      if (data.status === 'completed' || data.status === 'failed') {
        return false
      }
      return 2000
    },
  })
  // PP-StructureV3 parameters state
  const [ppStructureParams, setPpStructureParams] = useState<PPStructureV3Params>({})
@@ -56,22 +65,6 @@ export default function ProcessingPage() {
    },
  })
  // Poll task status
  const { data: taskDetail } = useQuery({
    queryKey: ['taskDetail', taskId],
    queryFn: () => apiClientV2.getTask(taskId!),
    enabled: !!taskId,
    refetchInterval: (query) => {
      const data = query.state.data
      if (!data) return 2000
      // Stop polling if completed or failed
      if (data.status === 'completed' || data.status === 'failed') {
        return false
      }
      return 2000 // Poll every 2 seconds
    },
  })
  // Auto-redirect when completed
  useEffect(() => {
    if (taskDetail?.status === 'completed') {
@@ -115,6 +108,23 @@ export default function ProcessingPage() {
    }
  }
  // Show loading while validating task
  if (isValidating) {
    return (
      <div className="flex items-center justify-center min-h-[60vh]">
        <div className="text-center">
          <Loader2 className="w-12 h-12 animate-spin text-primary mx-auto mb-4" />
          <p className="text-muted-foreground">載入任務資訊...</p>
        </div>
      </div>
    )
  }
  // Show message when task was deleted
  if (isNotFound) {
    return <TaskNotFound taskId={taskId} onClearAndUpload={clearAndReset} />
  }
  // Show helpful message when no task is selected
  if (!taskId) {
    return (
--- a/frontend/src/pages/ResultsPage.tsx
+++ b/frontend/src/pages/ResultsPage.tsx
@@ -1,29 +1,23 @@
 import { useMemo } from 'react'
 import { useNavigate } from 'react-router-dom'
 import { useTranslation } from 'react-i18next'
 import { useQuery } from '@tanstack/react-query'
 import { Button } from '@/components/ui/button'
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
 import PDFViewer from '@/components/PDFViewer'
 import { useToast } from '@/components/ui/toast'
 import { useUploadStore } from '@/store/uploadStore'
 import { apiClientV2 } from '@/services/apiV2'
 import { FileText, Download, AlertCircle, TrendingUp, Clock, Layers, FileJson, Loader2 } from 'lucide-react'
 import { Badge } from '@/components/ui/badge'
 import TaskNotFound from '@/components/TaskNotFound'
 import { useTaskValidation } from '@/hooks/useTaskValidation'
 export default function ResultsPage() {
  const { t } = useTranslation()
  const navigate = useNavigate()
  const { toast } = useToast()
  const { batchId } = useUploadStore()
-  // In V2, batchId is actually a task_id (string)
+  // Use shared hook for task validation
-  const taskId = batchId ? String(batchId) : null
+  const { taskId, taskDetail, isLoading, isNotFound, clearAndReset } = useTaskValidation({
  // Get task details
  const { data: taskDetail, isLoading } = useQuery({
    queryKey: ['taskDetail', taskId],
    queryFn: () => apiClientV2.getTask(taskId!),
    enabled: !!taskId,
    refetchInterval: (query) => {
      const data = query.state.data
      if (!data) return 2000
@@ -34,6 +28,19 @@ export default function ResultsPage() {
    },
  })
  // Construct PDF URL for preview - memoize to prevent unnecessary reloads
  // Must be called unconditionally before any early returns (React hooks rule)
  const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000'
  const pdfUrl = useMemo(() => {
    return taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : ''
  }, [taskId, API_BASE_URL])
  // Get auth token for PDF preview - memoize to prevent new object reference each render
  const pdfHttpHeaders = useMemo(() => {
    const authToken = localStorage.getItem('auth_token_v2')
    return authToken ? { Authorization: `Bearer ${authToken}` } : undefined
  }, [])
  const handleDownloadPDF = async () => {
    if (!taskId) return
    try {
@@ -101,6 +108,23 @@ export default function ResultsPage() {
    }
  }
  // Show loading while validating task
  if (isLoading) {
    return (
      <div className="flex items-center justify-center min-h-[60vh]">
        <div className="text-center">
          <Loader2 className="w-12 h-12 animate-spin text-primary mx-auto mb-4" />
          <p className="text-muted-foreground">載入任務結果...</p>
        </div>
      </div>
    )
  }
  // Show message when task was deleted
  if (isNotFound) {
    return <TaskNotFound taskId={taskId} onClearAndUpload={clearAndReset} />
  }
  // Show helpful message when no task is selected
  if (!taskId) {
    return (
@@ -127,17 +151,7 @@ export default function ResultsPage() {
    )
  }
-  if (isLoading) {
+  // Fallback for no task detail (shouldn't happen with proper validation)
    return (
      <div className="flex items-center justify-center min-h-[60vh]">
        <div className="text-center">
          <Loader2 className="w-12 h-12 animate-spin text-primary mx-auto mb-4" />
          <p className="text-muted-foreground">載入任務結果...</p>
        </div>
      </div>
    )
  }
  if (!taskDetail) {
    return (
      <div className="flex items-center justify-center min-h-[60vh]">
@@ -157,14 +171,6 @@ export default function ResultsPage() {
  const isCompleted = taskDetail.status === 'completed'
  // Construct PDF URL for preview
  const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000'
  const pdfUrl = taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : ''
  // Get auth token for PDF preview
  const authToken = localStorage.getItem('auth_token_v2')
  const pdfHttpHeaders = authToken ? { Authorization: `Bearer ${authToken}` } : undefined
  return (
    <div className="space-y-6">
      {/* Page Header */}
--- a/frontend/src/pages/TaskDetailPage.tsx
+++ b/frontend/src/pages/TaskDetailPage.tsx
@@ -1,3 +1,4 @@
 import { useMemo } from 'react'
 import { useParams, useNavigate } from 'react-router-dom'
 import { useTranslation } from 'react-i18next'
 import { useQuery } from '@tanstack/react-query'
@@ -65,6 +66,19 @@ export default function TaskDetailPage() {
    retry: false,
  })
  // Construct PDF URL for preview - memoize to prevent unnecessary reloads
  // Must be called unconditionally before any early returns (React hooks rule)
  const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000'
  const pdfUrl = useMemo(() => {
    return taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : ''
  }, [taskId, API_BASE_URL])
  // Get auth token for PDF preview - memoize to prevent new object reference each render
  const pdfHttpHeaders = useMemo(() => {
    const authToken = localStorage.getItem('auth_token_v2')
    return authToken ? { Authorization: `Bearer ${authToken}` } : undefined
  }, [])
  const getTrackBadge = (track?: ProcessingTrack) => {
    if (!track) return null
    switch (track) {
@@ -218,14 +232,6 @@ export default function TaskDetailPage() {
  const isProcessing = taskDetail.status === 'processing'
  const isFailed = taskDetail.status === 'failed'
  // Construct PDF URL for preview
  const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000'
  const pdfUrl = taskId ? `${API_BASE_URL}/api/v2/tasks/${taskId}/download/pdf` : ''
  // Get auth token for PDF preview
  const authToken = localStorage.getItem('auth_token_v2')
  const pdfHttpHeaders = authToken ? { Authorization: `Bearer ${authToken}` } : undefined
  return (
    <div className="space-y-6">
      {/* Page Header */}
--- a/frontend/src/pages/TaskHistoryPage.tsx
+++ b/frontend/src/pages/TaskHistoryPage.tsx
@@ -28,7 +28,7 @@ import {
  TableHeader,
  TableRow,
 } from '@/components/ui/table'
-import { Select } from '@/components/ui/select'
+import { NativeSelect } from '@/components/ui/select'
 import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'
 export default function TaskHistoryPage() {
@@ -112,6 +112,43 @@ export default function TaskHistoryPage() {
    }
  }
  // Delete all tasks
  const handleDeleteAll = async () => {
    if (tasks.length === 0) {
      alert('沒有可刪除的任務')
      return
    }
    if (!confirm(`確定要刪除所有 ${total} 個任務嗎？此操作無法復原！`)) return
    try {
      setLoading(true)
      // Delete tasks one by one
      for (const task of tasks) {
        await apiClientV2.deleteTask(task.task_id)
      }
      // If there are more pages, keep fetching and deleting
      let hasMoreTasks = hasMore
      while (hasMoreTasks) {
        const response = await apiClientV2.listTasks({ page: 1, page_size: 100 })
        if (response.tasks.length === 0) break
        for (const task of response.tasks) {
          await apiClientV2.deleteTask(task.task_id)
        }
        hasMoreTasks = response.has_more
      }
      fetchTasks()
      fetchStats()
      alert('所有任務已刪除')
    } catch (err: any) {
      alert(err.response?.data?.detail || '刪除任務失敗')
      fetchTasks()
      fetchStats()
    } finally {
      setLoading(false)
    }
  }
  // View task details
  const handleViewDetails = (taskId: string) => {
    navigate(`/tasks/${taskId}`)
@@ -220,10 +257,16 @@ export default function TaskHistoryPage() {
          <h1 className="text-3xl font-bold text-gray-900">任務歷史</h1>
          <p className="text-gray-600 mt-1">查看和管理您的 OCR 任務</p>
        </div>
        <div className="flex gap-2">
          <Button onClick={() => fetchTasks()} variant="outline">
            <RefreshCw className="w-4 h-4 mr-2" />
            刷新
          </Button>
          <Button onClick={handleDeleteAll} variant="destructive" disabled={loading || tasks.length === 0}>
            <Trash2 className="w-4 h-4 mr-2" />
            刪除全部
          </Button>
        </div>
      </div>
      {/* Statistics */}
@@ -288,7 +331,7 @@ export default function TaskHistoryPage() {
          <div className="grid grid-cols-1 md:grid-cols-4 gap-4">
            <div>
              <label className="block text-sm font-medium text-gray-700 mb-2">狀態</label>
-              <Select
+              <NativeSelect
                value={statusFilter}
                onChange={(e) => {
                  setStatusFilter(e.target.value as any)
--- a/openspec/changes/enhance-memory-management/design.md
+++ b/openspec/changes/enhance-memory-management/design.md
@@ -416,3 +416,172 @@ async def test_concurrent_load():
 - Stress testing
 - Performance tuning
 - Documentation and monitoring
 ## Configuration Settings Reference
 All memory management settings are defined in `backend/app/core/config.py` under the `Settings` class.
 ### Memory Thresholds
 | Setting | Type | Default | Description |
 |---------|------|---------|-------------|
 | `memory_warning_threshold` | float | 0.80 | GPU memory usage ratio (0-1) to trigger warning alerts |
 | `memory_critical_threshold` | float | 0.95 | GPU memory ratio to start throttling operations |
 | `memory_emergency_threshold` | float | 0.98 | GPU memory ratio to trigger emergency cleanup |
 ### Memory Monitoring
 | Setting | Type | Default | Description |
 |---------|------|---------|-------------|
 | `memory_check_interval_seconds` | int | 30 | Background check interval for memory monitoring |
 | `enable_memory_alerts` | bool | True | Enable/disable memory threshold alerts |
 | `gpu_memory_limit_mb` | int | 6144 | Maximum GPU memory to use (MB) |
 | `gpu_memory_reserve_mb` | int | 512 | Memory reserved for CUDA overhead |
 ### Model Lifecycle Management
 | Setting | Type | Default | Description |
 |---------|------|---------|-------------|
 | `enable_model_lifecycle_management` | bool | True | Use ModelManager for model lifecycle |
 | `model_idle_timeout_seconds` | int | 300 | Unload models after idle time |
 | `pp_structure_idle_timeout_seconds` | int | 300 | Unload PP-StructureV3 after idle |
 | `structure_model_memory_mb` | int | 2000 | Estimated memory for PP-StructureV3 |
 | `ocr_model_memory_mb` | int | 500 | Estimated memory per OCR language model |
 | `enable_lazy_model_loading` | bool | True | Load models on demand |
 | `auto_unload_unused_models` | bool | True | Auto-unload unused language models |
 ### Service Pool Configuration
 | Setting | Type | Default | Description |
 |---------|------|---------|-------------|
 | `enable_service_pool` | bool | True | Use OCRServicePool |
 | `max_services_per_device` | int | 1 | Max OCRService instances per GPU |
 | `max_total_services` | int | 2 | Max total OCRService instances |
 | `service_acquire_timeout_seconds` | float | 300.0 | Timeout for acquiring service from pool |
 | `max_queue_size` | int | 50 | Max pending tasks per device queue |
 ### Concurrency Control
 | Setting | Type | Default | Description |
 |---------|------|---------|-------------|
 | `max_concurrent_predictions` | int | 2 | Max concurrent PP-StructureV3 predictions |
 | `max_concurrent_pages` | int | 2 | Max pages processed concurrently |
 | `inference_batch_size` | int | 1 | Batch size for inference |
 | `enable_batch_processing` | bool | True | Enable batch processing for large docs |
 ### Recovery Settings
 | Setting | Type | Default | Description |
 |---------|------|---------|-------------|
 | `enable_cpu_fallback` | bool | True | Fall back to CPU when GPU memory low |
 | `enable_emergency_cleanup` | bool | True | Auto-cleanup on memory pressure |
 | `enable_worker_restart` | bool | False | Restart workers on OOM (requires supervisor) |
 ### Feature Flags
 | Setting | Type | Default | Description |
 |---------|------|---------|-------------|
 | `enable_chart_recognition` | bool | True | Enable chart/diagram recognition |
 | `enable_formula_recognition` | bool | True | Enable math formula recognition |
 | `enable_table_recognition` | bool | True | Enable table structure recognition |
 | `enable_seal_recognition` | bool | True | Enable seal/stamp recognition |
 | `enable_text_recognition` | bool | True | Enable general text recognition |
 | `enable_memory_optimization` | bool | True | Enable memory optimizations |
 ### Environment Variable Override
 All settings can be overridden via environment variables. The format is uppercase with underscores:
 ```bash
 # Example .env file
 MEMORY_WARNING_THRESHOLD=0.75
 MEMORY_CRITICAL_THRESHOLD=0.90
 MAX_CONCURRENT_PREDICTIONS=1
 GPU_MEMORY_LIMIT_MB=4096
 ENABLE_CPU_FALLBACK=true
 ```
 ### Recommended Configurations
 #### RTX 4060 8GB (Default)
 ```bash
 GPU_MEMORY_LIMIT_MB=6144
 MAX_CONCURRENT_PREDICTIONS=2
 MAX_CONCURRENT_PAGES=2
 INFERENCE_BATCH_SIZE=1
 ```
 #### RTX 3090 24GB
 ```bash
 GPU_MEMORY_LIMIT_MB=20480
 MAX_CONCURRENT_PREDICTIONS=4
 MAX_CONCURRENT_PAGES=4
 INFERENCE_BATCH_SIZE=2
 ```
 #### CPU-Only Mode
 ```bash
 FORCE_CPU_MODE=true
 MAX_CONCURRENT_PREDICTIONS=1
 ENABLE_CPU_FALLBACK=false
 ```
 ## Prometheus Metrics
 The system exports Prometheus-format metrics via the `PrometheusMetrics` class. Available metrics:
 ### GPU Metrics
 - `tool_ocr_memory_gpu_total_bytes` - Total GPU memory
 - `tool_ocr_memory_gpu_used_bytes` - Used GPU memory
 - `tool_ocr_memory_gpu_free_bytes` - Free GPU memory
 - `tool_ocr_memory_gpu_utilization_ratio` - GPU utilization (0-1)
 ### Model Metrics
 - `tool_ocr_memory_models_loaded_total` - Number of loaded models
 - `tool_ocr_memory_models_memory_bytes` - Total memory used by models
 - `tool_ocr_memory_model_ref_count{model_id}` - Reference count per model
 ### Prediction Metrics
 - `tool_ocr_memory_predictions_active` - Currently active predictions
 - `tool_ocr_memory_predictions_queue_depth` - Predictions waiting in queue
 - `tool_ocr_memory_predictions_total` - Total predictions processed (counter)
 - `tool_ocr_memory_predictions_timeouts_total` - Total prediction timeouts (counter)
 ### Pool Metrics
 - `tool_ocr_memory_pool_services_total` - Total services in pool
 - `tool_ocr_memory_pool_services_available` - Available services
 - `tool_ocr_memory_pool_services_in_use` - Services in use
 - `tool_ocr_memory_pool_acquisitions_total` - Total acquisitions (counter)
 ### Recovery Metrics
 - `tool_ocr_memory_recovery_count_total` - Total recovery attempts
 - `tool_ocr_memory_recovery_in_cooldown` - In cooldown (0/1)
 - `tool_ocr_memory_recovery_cooldown_remaining_seconds` - Remaining cooldown
 ## Memory Dump API
 The `MemoryDumper` class provides debugging capabilities:
 ```python
 from app.services.memory_manager import get_memory_dumper
 dumper = get_memory_dumper()
 # Create a memory dump
 dump = dumper.create_dump(include_python_objects=True)
 # Get dump as dictionary for JSON serialization
 dump_dict = dumper.to_dict(dump)
 # Compare two dumps to detect memory growth
 comparison = dumper.compare_dumps(dump1, dump2)
 ```
 Memory dumps include:
 - GPU/CPU memory usage
 - Loaded models and reference counts
 - Active predictions and queue state
 - Service pool statistics
 - Recovery manager state
 - Python GC statistics
 - Large Python objects (optional)
--- a/openspec/changes/enhance-memory-management/tasks.md
+++ b/openspec/changes/enhance-memory-management/tasks.md
@@ -3,123 +3,123 @@
 ## Section 1: Model Lifecycle Management (Priority: Critical)
 ### 1.1 Create ModelManager class
- [ ] Design ModelManager interface with load/unload/get methods
+- [x] Design ModelManager interface with load/unload/get methods
- [ ] Implement reference counting for model instances
+- [x] Implement reference counting for model instances
- [ ] Add idle timeout tracking with configurable thresholds
+- [x] Add idle timeout tracking with configurable thresholds
- [ ] Create teardown() method for explicit cleanup
+- [x] Create teardown() method for explicit cleanup
- [ ] Add logging for model lifecycle events
+- [x] Add logging for model lifecycle events
 ### 1.2 Integrate PP-StructureV3 with ModelManager
- [ ] Remove permanent exemption from unloading (lines 255-267)
+- [x] Remove permanent exemption from unloading (lines 255-267)
- [ ] Wrap PP-StructureV3 in managed model wrapper
+- [x] Wrap PP-StructureV3 in managed model wrapper
- [ ] Implement lazy loading on first access
+- [x] Implement lazy loading on first access
- [ ] Add unload capability with cache clearing
+- [x] Add unload capability with cache clearing
- [ ] Test model reload after unload
+- [x] Test model reload after unload
 ## Section 2: Service Singleton Pattern (Priority: Critical)
 ### 2.1 Create OCRServicePool
- [ ] Design pool interface with acquire/release methods
+- [x] Design pool interface with acquire/release methods
- [ ] Implement per-device instance management
+- [x] Implement per-device instance management
- [ ] Add queue-based task distribution
+- [x] Add queue-based task distribution
- [ ] Implement concurrency limits via semaphores
+- [x] Implement concurrency limits via semaphores
- [ ] Add health check for pooled instances
+- [x] Add health check for pooled instances
 ### 2.2 Refactor task router
- [ ] Replace OCRService() instantiation with pool.acquire()
+- [x] Replace OCRService() instantiation with pool.acquire()
- [ ] Add proper release in finally blocks
+- [x] Add proper release in finally blocks
- [ ] Handle pool exhaustion gracefully
+- [x] Handle pool exhaustion gracefully
- [ ] Add metrics for pool utilization
+- [x] Add metrics for pool utilization
- [ ] Update error handling for pooled services
+- [x] Update error handling for pooled services
 ## Section 3: Enhanced Memory Monitoring (Priority: High)
 ### 3.1 Create MemoryGuard class
- [ ] Implement paddle.device.cuda memory queries
+- [x] Implement paddle.device.cuda memory queries
- [ ] Add pynvml integration as fallback
+- [x] Add pynvml integration as fallback
- [ ] Add torch memory query support
+- [x] Add torch memory query support
- [ ] Create configurable threshold system
+- [x] Create configurable threshold system
- [ ] Implement memory prediction for operations
+- [x] Implement memory prediction for operations
 ### 3.2 Integrate memory checks
- [ ] Replace existing check_gpu_memory implementation
+- [x] Replace existing check_gpu_memory implementation
- [ ] Add pre-operation memory checks
+- [x] Add pre-operation memory checks
- [ ] Implement CPU fallback when memory low
+- [x] Implement CPU fallback when memory low
- [ ] Add memory usage logging
+- [x] Add memory usage logging
- [ ] Create memory pressure alerts
+- [x] Create memory pressure alerts
 ## Section 4: Concurrency Control (Priority: High)
 ### 4.1 Implement prediction semaphores
- [ ] Add semaphore for PP-StructureV3.predict
+- [x] Add semaphore for PP-StructureV3.predict
- [ ] Configure max concurrent predictions
+- [x] Configure max concurrent predictions
- [ ] Add queue for waiting predictions
+- [x] Add queue for waiting predictions
- [ ] Implement timeout handling
+- [x] Implement timeout handling
- [ ] Add metrics for queue depth
+- [x] Add metrics for queue depth
 ### 4.2 Add selective processing
- [ ] Create config for disabling chart/formula/table
+- [x] Create config for disabling chart/formula/table
- [ ] Implement batch processing for large documents
+- [x] Implement batch processing for large documents
- [ ] Add progressive loading for multi-page docs
+- [x] Add progressive loading for multi-page docs
- [ ] Create priority queue for operations
+- [x] Create priority queue for operations
- [ ] Test memory savings with selective processing
+- [x] Test memory savings with selective processing
 ## Section 5: Active Memory Management (Priority: Medium)
 ### 5.1 Create memory monitor thread
- [ ] Implement background monitoring loop
+- [x] Implement background monitoring loop
- [ ] Add periodic memory metrics collection
+- [x] Add periodic memory metrics collection
- [ ] Create threshold-based triggers
+- [x] Create threshold-based triggers
- [ ] Implement automatic cache clearing
+- [x] Implement automatic cache clearing
- [ ] Add LRU-based model unloading
+- [x] Add LRU-based model unloading
 ### 5.2 Add recovery mechanisms
- [ ] Implement emergency memory release
+- [x] Implement emergency memory release
- [ ] Add worker process restart capability
+- [x] Add worker process restart capability (RecoveryManager)
- [ ] Create memory dump for debugging
+- [x] Create memory dump for debugging
- [ ] Add cooldown period after recovery
+- [x] Add cooldown period after recovery
- [ ] Test recovery under various scenarios
+- [x] Test recovery under various scenarios
 ## Section 6: Cleanup Hooks (Priority: Medium)
 ### 6.1 Implement shutdown handlers
- [ ] Add FastAPI shutdown event handler
+- [x] Add FastAPI shutdown event handler
- [ ] Create signal handlers (SIGTERM, SIGINT)
+- [x] Create signal handlers (SIGTERM, SIGINT)
- [ ] Implement graceful model unloading
+- [x] Implement graceful model unloading
- [ ] Add connection draining
+- [x] Add connection draining
- [ ] Test shutdown sequence
+- [x] Test shutdown sequence
 ### 6.2 Add task cleanup
- [ ] Wrap background tasks with cleanup
+- [x] Wrap background tasks with cleanup
- [ ] Add success/failure callbacks
+- [x] Add success/failure callbacks
- [ ] Implement resource release on completion
+- [x] Implement resource release on completion
- [ ] Add cleanup verification logging
+- [x] Add cleanup verification logging
- [ ] Test cleanup in error scenarios
+- [x] Test cleanup in error scenarios
 ## Section 7: Configuration & Settings (Priority: Low)
 ### 7.1 Add memory settings to config
- [ ] Define memory threshold parameters
+- [x] Define memory threshold parameters
- [ ] Add model timeout settings
+- [x] Add model timeout settings
- [ ] Configure pool sizes
+- [x] Configure pool sizes
- [ ] Add feature flags for new behavior
+- [x] Add feature flags for new behavior
- [ ] Document all settings
+- [x] Document all settings
 ### 7.2 Create monitoring dashboard
- [ ] Add memory metrics endpoint
+- [x] Add memory metrics endpoint
- [ ] Create pool status endpoint
+- [x] Create pool status endpoint
- [ ] Add model lifecycle stats
+- [x] Add model lifecycle stats
- [ ] Implement health check endpoint
+- [x] Implement health check endpoint
- [ ] Add Prometheus metrics export
+- [x] Add Prometheus metrics export
 ## Section 8: Testing & Documentation (Priority: High)
 ### 8.1 Create comprehensive tests
- [ ] Unit tests for ModelManager
+- [x] Unit tests for ModelManager
- [ ] Integration tests for OCRServicePool
+- [x] Integration tests for OCRServicePool
- [ ] Memory leak detection tests
+- [x] Memory leak detection tests
- [ ] Stress tests with concurrent requests
+- [x] Stress tests with concurrent requests
- [ ] Performance benchmarks
+- [x] Performance benchmarks
 ### 8.2 Documentation
 - [ ] Document memory management architecture
@@ -131,5 +131,46 @@
 ---
 **Total Tasks**: 58
-**Estimated Effort**: 3-4 weeks
+**Completed**: 53
-**Critical Path**: Sections 1-2 must be completed first as they form the foundation
+**Remaining**: 5 (Section 8.2 Documentation only)
 **Progress**: ~91%
 **Critical Path Status**: Sections 1-8.1 are completed (foundation, memory monitoring, prediction semaphores, batch processing, recovery, signal handlers, configuration, Prometheus metrics, and comprehensive tests in place)
 ## Implementation Summary
 ### Files Created
 - `backend/app/services/memory_manager.py` - ModelManager, MemoryGuard, MemoryConfig, PredictionSemaphore, BatchProcessor, ProgressiveLoader, PriorityOperationQueue, RecoveryManager
 - `backend/app/services/service_pool.py` - OCRServicePool, PoolConfig
 - `backend/tests/services/test_memory_manager.py` - Unit tests for memory management (57 tests)
 - `backend/tests/services/test_service_pool.py` - Unit tests for service pool (18 tests)
 - `backend/tests/services/test_ocr_memory_integration.py` - Integration tests for memory check patterns (10 tests)
 ### Files Modified
 - `backend/app/core/config.py` - Added memory management configuration settings
 - `backend/app/services/ocr_service.py` - Removed PP-StructureV3 exemption, added unload capability, integrated MemoryGuard for pre-operation checks and CPU fallback, added PredictionSemaphore for concurrent prediction control
 - `backend/app/services/pp_structure_enhanced.py` - Added PredictionSemaphore control for predict calls
 - `backend/app/routers/tasks.py` - Refactored to use service pool
 - `backend/app/main.py` - Added startup/shutdown handlers, signal handlers (SIGTERM/SIGINT), connection draining, recovery manager shutdown
 ### New Classes Added (Section 4.2-8)
 - `BatchProcessor` - Memory-aware batch processing for large documents with priority support
 - `ProgressiveLoader` - Progressive page loading with lookahead and automatic cleanup
 - `PriorityOperationQueue` - Priority queue with timeout and cancellation support
 - `RecoveryManager` - Memory recovery with cooldown period and attempt limits
 - `MemoryDumper` - Memory dump creation for debugging with history and comparison
 - `PrometheusMetrics` - Prometheus-format metrics export for monitoring
 - Signal handlers for graceful shutdown (SIGTERM, SIGINT)
 - Connection draining for clean shutdown
 ### New Test Classes Added (Section 8.1)
 - `TestModelReloadAfterUnload` - Tests for model reload after unload
 - `TestSelectiveProcessingMemorySavings` - Tests for memory savings with selective processing
 - `TestRecoveryScenarios` - Tests for recovery under various scenarios
 - `TestShutdownSequence` - Tests for shutdown sequence
 - `TestCleanupInErrorScenarios` - Tests for cleanup in error scenarios
 - `TestMemoryLeakDetection` - Tests for memory leak detection
 - `TestStressConcurrentRequests` - Stress tests with concurrent requests
 - `TestPerformanceBenchmarks` - Performance benchmark tests
 - `TestMemoryDumper` - Tests for MemoryDumper class
 - `TestPrometheusMetrics` - Tests for PrometheusMetrics class