feat: implement hybrid image extraction and memory management

Backend:
- Add hybrid image extraction for Direct track (inline image blocks)
- Add render_inline_image_regions() fallback when OCR doesn't find images
- Add check_document_for_missing_images() for detecting missing images
- Add memory management system (MemoryGuard, ModelManager, ServicePool)
- Update pdf_generator_service to handle HYBRID processing track
- Add ElementType.LOGO for logo extraction

Frontend:
- Fix PDF viewer re-rendering issues with memoization
- Add TaskNotFound component and useTaskValidation hook
- Disable StrictMode due to react-pdf incompatibility
- Fix task detail and results page loading states

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
egg
2025-11-26 10:56:22 +08:00
parent ba8ddf2b68
commit 1afdb822c3
26 changed files with 8273 additions and 366 deletions

View File

@@ -572,8 +572,10 @@ class PDFGeneratorService:
processing_track = unified_doc.metadata.get('processing_track')
# Route to track-specific rendering method
is_direct_track = (processing_track == 'direct' or
processing_track == ProcessingTrack.DIRECT)
# ProcessingTrack is (str, Enum), so comparing with enum value works for both string and enum
# HYBRID track uses Direct track rendering (Direct text/tables + OCR images)
is_direct_track = (processing_track == ProcessingTrack.DIRECT or
processing_track == ProcessingTrack.HYBRID)
logger.info(f"Processing track: {processing_track}, using {'Direct' if is_direct_track else 'OCR'} track rendering")
@@ -675,8 +677,11 @@ class PDFGeneratorService:
logger.info("=== Direct Track PDF Generation ===")
logger.info(f"Total pages: {len(unified_doc.pages)}")
# Set current track for helper methods
self.current_processing_track = 'direct'
# Set current track for helper methods (may be DIRECT or HYBRID)
if hasattr(unified_doc, 'metadata') and unified_doc.metadata:
self.current_processing_track = unified_doc.metadata.processing_track
else:
self.current_processing_track = ProcessingTrack.DIRECT
# Get page dimensions from first page (for canvas initialization)
if not unified_doc.pages:
@@ -1074,11 +1079,16 @@ class PDFGeneratorService:
# *** 優先級 1: 檢查 ocr_dimensions (UnifiedDocument 轉換來的) ***
if 'ocr_dimensions' in ocr_data:
dims = ocr_data['ocr_dimensions']
w = float(dims.get('width', 0))
h = float(dims.get('height', 0))
if w > 0 and h > 0:
logger.info(f"使用 ocr_dimensions 欄位的頁面尺寸: {w:.1f} x {h:.1f}")
return (w, h)
# Handle both dict format {'width': w, 'height': h} and
# list format [{'page': 1, 'width': w, 'height': h}, ...]
if isinstance(dims, list) and len(dims) > 0:
dims = dims[0] # Use first page dimensions
if isinstance(dims, dict):
w = float(dims.get('width', 0))
h = float(dims.get('height', 0))
if w > 0 and h > 0:
logger.info(f"使用 ocr_dimensions 欄位的頁面尺寸: {w:.1f} x {h:.1f}")
return (w, h)
# *** 優先級 2: 檢查原始 JSON 的 dimensions ***
if 'dimensions' in ocr_data:
@@ -1418,8 +1428,8 @@ class PDFGeneratorService:
# Set font with track-specific styling
# Note: OCR track has no StyleInfo (extracted from images), so no advanced formatting
style_info = region.get('style')
is_direct_track = (self.current_processing_track == 'direct' or
self.current_processing_track == ProcessingTrack.DIRECT)
is_direct_track = (self.current_processing_track == ProcessingTrack.DIRECT or
self.current_processing_track == ProcessingTrack.HYBRID)
if style_info and is_direct_track:
# Direct track: Apply rich styling from StyleInfo
@@ -1661,10 +1671,15 @@ class PDFGeneratorService:
return
# Construct full path to image
# saved_path is relative to result_dir (e.g., "imgs/element_id.png")
image_path = result_dir / image_path_str
# Fallback for legacy data
if not image_path.exists():
logger.warning(f"Image not found: {image_path}")
image_path = result_dir / Path(image_path_str).name
if not image_path.exists():
logger.warning(f"Image not found: {image_path_str} (in {result_dir})")
return
# Get bbox for positioning
@@ -2289,12 +2304,30 @@ class PDFGeneratorService:
col_widths = element.metadata['column_widths']
logger.debug(f"Using extracted column widths: {col_widths}")
# Create table without rowHeights (will use canvas scaling instead)
t = Table(table_content, colWidths=col_widths)
# Use original row heights from extraction if available
# Row heights must match the number of data rows exactly
row_heights_list = None
if element.metadata and 'row_heights' in element.metadata:
extracted_row_heights = element.metadata['row_heights']
num_data_rows = len(table_content)
num_height_rows = len(extracted_row_heights)
if num_height_rows == num_data_rows:
row_heights_list = extracted_row_heights
logger.debug(f"Using extracted row heights ({num_height_rows} rows): {row_heights_list}")
else:
# Row counts don't match - this can happen with merged cells or empty rows
logger.warning(f"Row height mismatch: {num_height_rows} heights for {num_data_rows} data rows, falling back to auto-sizing")
# Create table with both column widths and row heights for accurate sizing
t = Table(table_content, colWidths=col_widths, rowHeights=row_heights_list)
# Apply style with minimal padding to reduce table extension
# Use Chinese font to support special characters (℃, μm, ≦, ×, Ω, etc.)
font_for_table = self.font_name if self.font_registered else 'Helvetica'
style = TableStyle([
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
('FONTNAME', (0, 0), (-1, -1), font_for_table),
('FONTSIZE', (0, 0), (-1, -1), 8),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('VALIGN', (0, 0), (-1, -1), 'TOP'),
@@ -2307,8 +2340,8 @@ class PDFGeneratorService:
])
t.setStyle(style)
# CRITICAL: Use canvas scaling to fit table within bbox
# This is more reliable than rowHeights which doesn't always work
# Use canvas scaling as fallback to fit table within bbox
# With proper row heights, scaling should be minimal (close to 1.0)
# Step 1: Wrap to get actual rendered size
actual_width, actual_height = t.wrapOn(pdf_canvas, table_width * 10, table_height * 10)
@@ -2358,11 +2391,16 @@ class PDFGeneratorService:
logger.warning(f"No image path for element {element.element_id}")
return
# Construct full path
# Construct full path to image
# saved_path is relative to result_dir (e.g., "document_id_p1_img0.png")
image_path = result_dir / image_path_str
# Fallback for legacy data
if not image_path.exists():
logger.warning(f"Image not found: {image_path}")
image_path = result_dir / Path(image_path_str).name
if not image_path.exists():
logger.warning(f"Image not found: {image_path_str} (in {result_dir})")
return
# Get bbox
@@ -2388,7 +2426,7 @@ class PDFGeneratorService:
preserveAspectRatio=True
)
logger.debug(f"Drew image: {image_path_str}")
logger.debug(f"Drew image: {image_path} (from: {original_path_str})")
except Exception as e:
logger.error(f"Failed to draw image element {element.element_id}: {e}")