feat: implement hybrid image extraction and memory management

Backend: - Add hybrid image extraction for Direct track (inline image blocks) - Add render_inline_image_regions() fallback when OCR doesn't find images - Add check_document_for_missing_images() for detecting missing images - Add memory management system (MemoryGuard, ModelManager, ServicePool) - Update pdf_generator_service to handle HYBRID processing track - Add ElementType.LOGO for logo extraction Frontend: - Fix PDF viewer re-rendering issues with memoization - Add TaskNotFound component and useTaskValidation hook - Disable StrictMode due to react-pdf incompatibility - Fix task detail and results page loading states 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 10:56:22 +08:00
parent ba8ddf2b68
commit 1afdb822c3
26 changed files with 8273 additions and 366 deletions
--- a/backend/app/services/pdf_generator_service.py
+++ b/backend/app/services/pdf_generator_service.py
@@ -572,8 +572,10 @@ class PDFGeneratorService:
                    processing_track = unified_doc.metadata.get('processing_track')

            # Route to track-specific rendering method
-            is_direct_track = (processing_track == 'direct' or
-                              processing_track == ProcessingTrack.DIRECT)
+            # ProcessingTrack is (str, Enum), so comparing with enum value works for both string and enum
+            # HYBRID track uses Direct track rendering (Direct text/tables + OCR images)
+            is_direct_track = (processing_track == ProcessingTrack.DIRECT or
+                               processing_track == ProcessingTrack.HYBRID)

            logger.info(f"Processing track: {processing_track}, using {'Direct' if is_direct_track else 'OCR'} track rendering")

@@ -675,8 +677,11 @@ class PDFGeneratorService:
            logger.info("=== Direct Track PDF Generation ===")
            logger.info(f"Total pages: {len(unified_doc.pages)}")

-            # Set current track for helper methods
-            self.current_processing_track = 'direct'
+            # Set current track for helper methods (may be DIRECT or HYBRID)
+            if hasattr(unified_doc, 'metadata') and unified_doc.metadata:
+                self.current_processing_track = unified_doc.metadata.processing_track
+            else:
+                self.current_processing_track = ProcessingTrack.DIRECT

            # Get page dimensions from first page (for canvas initialization)
            if not unified_doc.pages:
@@ -1074,11 +1079,16 @@ class PDFGeneratorService:
        # *** 優先級 1: 檢查 ocr_dimensions (UnifiedDocument 轉換來的) ***
        if 'ocr_dimensions' in ocr_data:
            dims = ocr_data['ocr_dimensions']
-            w = float(dims.get('width', 0))
-            h = float(dims.get('height', 0))
-            if w > 0 and h > 0:
-                logger.info(f"使用 ocr_dimensions 欄位的頁面尺寸: {w:.1f} x {h:.1f}")
-                return (w, h)
+            # Handle both dict format {'width': w, 'height': h} and
+            # list format [{'page': 1, 'width': w, 'height': h}, ...]
+            if isinstance(dims, list) and len(dims) > 0:
+                dims = dims[0]  # Use first page dimensions
+            if isinstance(dims, dict):
+                w = float(dims.get('width', 0))
+                h = float(dims.get('height', 0))
+                if w > 0 and h > 0:
+                    logger.info(f"使用 ocr_dimensions 欄位的頁面尺寸: {w:.1f} x {h:.1f}")
+                    return (w, h)

        # *** 優先級 2: 檢查原始 JSON 的 dimensions ***
        if 'dimensions' in ocr_data:
@@ -1418,8 +1428,8 @@ class PDFGeneratorService:
            # Set font with track-specific styling
            # Note: OCR track has no StyleInfo (extracted from images), so no advanced formatting
            style_info = region.get('style')
-            is_direct_track = (self.current_processing_track == 'direct' or
-                              self.current_processing_track == ProcessingTrack.DIRECT)
+            is_direct_track = (self.current_processing_track == ProcessingTrack.DIRECT or
+                               self.current_processing_track == ProcessingTrack.HYBRID)

            if style_info and is_direct_track:
                # Direct track: Apply rich styling from StyleInfo
@@ -1661,10 +1671,15 @@ class PDFGeneratorService:
                return

            # Construct full path to image
+            # saved_path is relative to result_dir (e.g., "imgs/element_id.png")
            image_path = result_dir / image_path_str

+            # Fallback for legacy data
            if not image_path.exists():
-                logger.warning(f"Image not found: {image_path}")
+                image_path = result_dir / Path(image_path_str).name
+
+            if not image_path.exists():
+                logger.warning(f"Image not found: {image_path_str} (in {result_dir})")
                return

            # Get bbox for positioning
@@ -2289,12 +2304,30 @@ class PDFGeneratorService:
                col_widths = element.metadata['column_widths']
                logger.debug(f"Using extracted column widths: {col_widths}")

-            # Create table without rowHeights (will use canvas scaling instead)
-            t = Table(table_content, colWidths=col_widths)
+            # Use original row heights from extraction if available
+            # Row heights must match the number of data rows exactly
+            row_heights_list = None
+            if element.metadata and 'row_heights' in element.metadata:
+                extracted_row_heights = element.metadata['row_heights']
+                num_data_rows = len(table_content)
+                num_height_rows = len(extracted_row_heights)
+
+                if num_height_rows == num_data_rows:
+                    row_heights_list = extracted_row_heights
+                    logger.debug(f"Using extracted row heights ({num_height_rows} rows): {row_heights_list}")
+                else:
+                    # Row counts don't match - this can happen with merged cells or empty rows
+                    logger.warning(f"Row height mismatch: {num_height_rows} heights for {num_data_rows} data rows, falling back to auto-sizing")
+
+            # Create table with both column widths and row heights for accurate sizing
+            t = Table(table_content, colWidths=col_widths, rowHeights=row_heights_list)

            # Apply style with minimal padding to reduce table extension
+            # Use Chinese font to support special characters (℃, μm, ≦, ×, Ω, etc.)
+            font_for_table = self.font_name if self.font_registered else 'Helvetica'
            style = TableStyle([
                ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
+                ('FONTNAME', (0, 0), (-1, -1), font_for_table),
                ('FONTSIZE', (0, 0), (-1, -1), 8),
                ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
@@ -2307,8 +2340,8 @@ class PDFGeneratorService:
            ])
            t.setStyle(style)

-            # CRITICAL: Use canvas scaling to fit table within bbox
-            # This is more reliable than rowHeights which doesn't always work
+            # Use canvas scaling as fallback to fit table within bbox
+            # With proper row heights, scaling should be minimal (close to 1.0)

            # Step 1: Wrap to get actual rendered size
            actual_width, actual_height = t.wrapOn(pdf_canvas, table_width * 10, table_height * 10)
@@ -2358,11 +2391,16 @@ class PDFGeneratorService:
                logger.warning(f"No image path for element {element.element_id}")
                return

-            # Construct full path
+            # Construct full path to image
+            # saved_path is relative to result_dir (e.g., "document_id_p1_img0.png")
            image_path = result_dir / image_path_str

+            # Fallback for legacy data
            if not image_path.exists():
-                logger.warning(f"Image not found: {image_path}")
+                image_path = result_dir / Path(image_path_str).name
+
+            if not image_path.exists():
+                logger.warning(f"Image not found: {image_path_str} (in {result_dir})")
                return

            # Get bbox
@@ -2388,7 +2426,7 @@ class PDFGeneratorService:
                preserveAspectRatio=True
            )

-            logger.debug(f"Drew image: {image_path_str}")
+            logger.debug(f"Drew image: {image_path} (from: {original_path_str})")

        except Exception as e:
            logger.error(f"Failed to draw image element {element.element_id}: {e}")