feat: implement hybrid image extraction and memory management
Backend: - Add hybrid image extraction for Direct track (inline image blocks) - Add render_inline_image_regions() fallback when OCR doesn't find images - Add check_document_for_missing_images() for detecting missing images - Add memory management system (MemoryGuard, ModelManager, ServicePool) - Update pdf_generator_service to handle HYBRID processing track - Add ElementType.LOGO for logo extraction Frontend: - Fix PDF viewer re-rendering issues with memoization - Add TaskNotFound component and useTaskValidation hook - Disable StrictMode due to react-pdf incompatibility - Fix task detail and results page loading states 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,8 @@ except ImportError:
|
||||
import paddle
|
||||
from paddleocr import PPStructureV3
|
||||
from app.models.unified_document import ElementType
|
||||
from app.core.config import settings
|
||||
from app.services.memory_manager import prediction_context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -96,8 +98,22 @@ class PPStructureEnhanced:
|
||||
try:
|
||||
logger.info(f"Enhanced PP-StructureV3 analysis on {image_path.name}")
|
||||
|
||||
# Perform structure analysis
|
||||
results = self.structure_engine.predict(str(image_path))
|
||||
# Perform structure analysis with semaphore control
|
||||
# This prevents OOM errors from multiple simultaneous predictions
|
||||
with prediction_context(timeout=settings.service_acquire_timeout_seconds) as acquired:
|
||||
if not acquired:
|
||||
logger.error("Failed to acquire prediction slot (timeout), returning empty result")
|
||||
return {
|
||||
'has_parsing_res_list': False,
|
||||
'elements': [],
|
||||
'total_elements': 0,
|
||||
'images': [],
|
||||
'tables': [],
|
||||
'element_types': {},
|
||||
'error': 'Prediction slot timeout'
|
||||
}
|
||||
|
||||
results = self.structure_engine.predict(str(image_path))
|
||||
|
||||
all_elements = []
|
||||
all_images = []
|
||||
|
||||
Reference in New Issue
Block a user