feat: implement hybrid image extraction and memory management
Backend: - Add hybrid image extraction for Direct track (inline image blocks) - Add render_inline_image_regions() fallback when OCR doesn't find images - Add check_document_for_missing_images() for detecting missing images - Add memory management system (MemoryGuard, ModelManager, ServicePool) - Update pdf_generator_service to handle HYBRID processing track - Add ElementType.LOGO for logo extraction Frontend: - Fix PDF viewer re-rendering issues with memoization - Add TaskNotFound component and useTaskValidation hook - Disable StrictMode due to react-pdf incompatibility - Fix task detail and results page loading states 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -572,8 +572,10 @@ class PDFGeneratorService:
|
||||
processing_track = unified_doc.metadata.get('processing_track')
|
||||
|
||||
# Route to track-specific rendering method
|
||||
is_direct_track = (processing_track == 'direct' or
|
||||
processing_track == ProcessingTrack.DIRECT)
|
||||
# ProcessingTrack is (str, Enum), so comparing with enum value works for both string and enum
|
||||
# HYBRID track uses Direct track rendering (Direct text/tables + OCR images)
|
||||
is_direct_track = (processing_track == ProcessingTrack.DIRECT or
|
||||
processing_track == ProcessingTrack.HYBRID)
|
||||
|
||||
logger.info(f"Processing track: {processing_track}, using {'Direct' if is_direct_track else 'OCR'} track rendering")
|
||||
|
||||
@@ -675,8 +677,11 @@ class PDFGeneratorService:
|
||||
logger.info("=== Direct Track PDF Generation ===")
|
||||
logger.info(f"Total pages: {len(unified_doc.pages)}")
|
||||
|
||||
# Set current track for helper methods
|
||||
self.current_processing_track = 'direct'
|
||||
# Set current track for helper methods (may be DIRECT or HYBRID)
|
||||
if hasattr(unified_doc, 'metadata') and unified_doc.metadata:
|
||||
self.current_processing_track = unified_doc.metadata.processing_track
|
||||
else:
|
||||
self.current_processing_track = ProcessingTrack.DIRECT
|
||||
|
||||
# Get page dimensions from first page (for canvas initialization)
|
||||
if not unified_doc.pages:
|
||||
@@ -1074,11 +1079,16 @@ class PDFGeneratorService:
|
||||
# *** 優先級 1: 檢查 ocr_dimensions (UnifiedDocument 轉換來的) ***
|
||||
if 'ocr_dimensions' in ocr_data:
|
||||
dims = ocr_data['ocr_dimensions']
|
||||
w = float(dims.get('width', 0))
|
||||
h = float(dims.get('height', 0))
|
||||
if w > 0 and h > 0:
|
||||
logger.info(f"使用 ocr_dimensions 欄位的頁面尺寸: {w:.1f} x {h:.1f}")
|
||||
return (w, h)
|
||||
# Handle both dict format {'width': w, 'height': h} and
|
||||
# list format [{'page': 1, 'width': w, 'height': h}, ...]
|
||||
if isinstance(dims, list) and len(dims) > 0:
|
||||
dims = dims[0] # Use first page dimensions
|
||||
if isinstance(dims, dict):
|
||||
w = float(dims.get('width', 0))
|
||||
h = float(dims.get('height', 0))
|
||||
if w > 0 and h > 0:
|
||||
logger.info(f"使用 ocr_dimensions 欄位的頁面尺寸: {w:.1f} x {h:.1f}")
|
||||
return (w, h)
|
||||
|
||||
# *** 優先級 2: 檢查原始 JSON 的 dimensions ***
|
||||
if 'dimensions' in ocr_data:
|
||||
@@ -1418,8 +1428,8 @@ class PDFGeneratorService:
|
||||
# Set font with track-specific styling
|
||||
# Note: OCR track has no StyleInfo (extracted from images), so no advanced formatting
|
||||
style_info = region.get('style')
|
||||
is_direct_track = (self.current_processing_track == 'direct' or
|
||||
self.current_processing_track == ProcessingTrack.DIRECT)
|
||||
is_direct_track = (self.current_processing_track == ProcessingTrack.DIRECT or
|
||||
self.current_processing_track == ProcessingTrack.HYBRID)
|
||||
|
||||
if style_info and is_direct_track:
|
||||
# Direct track: Apply rich styling from StyleInfo
|
||||
@@ -1661,10 +1671,15 @@ class PDFGeneratorService:
|
||||
return
|
||||
|
||||
# Construct full path to image
|
||||
# saved_path is relative to result_dir (e.g., "imgs/element_id.png")
|
||||
image_path = result_dir / image_path_str
|
||||
|
||||
# Fallback for legacy data
|
||||
if not image_path.exists():
|
||||
logger.warning(f"Image not found: {image_path}")
|
||||
image_path = result_dir / Path(image_path_str).name
|
||||
|
||||
if not image_path.exists():
|
||||
logger.warning(f"Image not found: {image_path_str} (in {result_dir})")
|
||||
return
|
||||
|
||||
# Get bbox for positioning
|
||||
@@ -2289,12 +2304,30 @@ class PDFGeneratorService:
|
||||
col_widths = element.metadata['column_widths']
|
||||
logger.debug(f"Using extracted column widths: {col_widths}")
|
||||
|
||||
# Create table without rowHeights (will use canvas scaling instead)
|
||||
t = Table(table_content, colWidths=col_widths)
|
||||
# Use original row heights from extraction if available
|
||||
# Row heights must match the number of data rows exactly
|
||||
row_heights_list = None
|
||||
if element.metadata and 'row_heights' in element.metadata:
|
||||
extracted_row_heights = element.metadata['row_heights']
|
||||
num_data_rows = len(table_content)
|
||||
num_height_rows = len(extracted_row_heights)
|
||||
|
||||
if num_height_rows == num_data_rows:
|
||||
row_heights_list = extracted_row_heights
|
||||
logger.debug(f"Using extracted row heights ({num_height_rows} rows): {row_heights_list}")
|
||||
else:
|
||||
# Row counts don't match - this can happen with merged cells or empty rows
|
||||
logger.warning(f"Row height mismatch: {num_height_rows} heights for {num_data_rows} data rows, falling back to auto-sizing")
|
||||
|
||||
# Create table with both column widths and row heights for accurate sizing
|
||||
t = Table(table_content, colWidths=col_widths, rowHeights=row_heights_list)
|
||||
|
||||
# Apply style with minimal padding to reduce table extension
|
||||
# Use Chinese font to support special characters (℃, μm, ≦, ×, Ω, etc.)
|
||||
font_for_table = self.font_name if self.font_registered else 'Helvetica'
|
||||
style = TableStyle([
|
||||
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
||||
('FONTNAME', (0, 0), (-1, -1), font_for_table),
|
||||
('FONTSIZE', (0, 0), (-1, -1), 8),
|
||||
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
||||
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
||||
@@ -2307,8 +2340,8 @@ class PDFGeneratorService:
|
||||
])
|
||||
t.setStyle(style)
|
||||
|
||||
# CRITICAL: Use canvas scaling to fit table within bbox
|
||||
# This is more reliable than rowHeights which doesn't always work
|
||||
# Use canvas scaling as fallback to fit table within bbox
|
||||
# With proper row heights, scaling should be minimal (close to 1.0)
|
||||
|
||||
# Step 1: Wrap to get actual rendered size
|
||||
actual_width, actual_height = t.wrapOn(pdf_canvas, table_width * 10, table_height * 10)
|
||||
@@ -2358,11 +2391,16 @@ class PDFGeneratorService:
|
||||
logger.warning(f"No image path for element {element.element_id}")
|
||||
return
|
||||
|
||||
# Construct full path
|
||||
# Construct full path to image
|
||||
# saved_path is relative to result_dir (e.g., "document_id_p1_img0.png")
|
||||
image_path = result_dir / image_path_str
|
||||
|
||||
# Fallback for legacy data
|
||||
if not image_path.exists():
|
||||
logger.warning(f"Image not found: {image_path}")
|
||||
image_path = result_dir / Path(image_path_str).name
|
||||
|
||||
if not image_path.exists():
|
||||
logger.warning(f"Image not found: {image_path_str} (in {result_dir})")
|
||||
return
|
||||
|
||||
# Get bbox
|
||||
@@ -2388,7 +2426,7 @@ class PDFGeneratorService:
|
||||
preserveAspectRatio=True
|
||||
)
|
||||
|
||||
logger.debug(f"Drew image: {image_path_str}")
|
||||
logger.debug(f"Drew image: {image_path} (from: {original_path_str})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to draw image element {element.element_id}: {e}")
|
||||
|
||||
Reference in New Issue
Block a user