feat: implement hybrid image extraction and memory management
Backend: - Add hybrid image extraction for Direct track (inline image blocks) - Add render_inline_image_regions() fallback when OCR doesn't find images - Add check_document_for_missing_images() for detecting missing images - Add memory management system (MemoryGuard, ModelManager, ServicePool) - Update pdf_generator_service to handle HYBRID processing track - Add ElementType.LOGO for logo extraction Frontend: - Fix PDF viewer re-rendering issues with memoization - Add TaskNotFound component and useTaskValidation hook - Disable StrictMode due to react-pdf incompatibility - Fix task detail and results page loading states 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -39,6 +39,7 @@ from app.schemas.task import (
|
||||
from app.services.task_service import task_service
|
||||
from app.services.file_access_service import file_access_service
|
||||
from app.services.ocr_service import OCRService
|
||||
from app.services.service_pool import get_service_pool, PoolConfig
|
||||
|
||||
# Import dual-track components
|
||||
try:
|
||||
@@ -47,6 +48,13 @@ try:
|
||||
except ImportError:
|
||||
DUAL_TRACK_AVAILABLE = False
|
||||
|
||||
# Service pool availability
|
||||
SERVICE_POOL_AVAILABLE = True
|
||||
try:
|
||||
from app.services.memory_manager import get_model_manager
|
||||
except ImportError:
|
||||
SERVICE_POOL_AVAILABLE = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/v2/tasks", tags=["Tasks"])
|
||||
@@ -63,7 +71,10 @@ def process_task_ocr(
|
||||
pp_structure_params: Optional[dict] = None
|
||||
):
|
||||
"""
|
||||
Background task to process OCR for a task with dual-track support
|
||||
Background task to process OCR for a task with dual-track support.
|
||||
|
||||
Uses OCRServicePool to acquire a shared service instance instead of
|
||||
creating a new one, preventing GPU memory proliferation.
|
||||
|
||||
Args:
|
||||
task_id: Task UUID string
|
||||
@@ -80,6 +91,7 @@ def process_task_ocr(
|
||||
|
||||
db = SessionLocal()
|
||||
start_time = datetime.now()
|
||||
pooled_service = None
|
||||
|
||||
try:
|
||||
logger.info(f"Starting OCR processing for task {task_id}, file: {filename}")
|
||||
@@ -91,16 +103,39 @@ def process_task_ocr(
|
||||
logger.error(f"Task {task_id} not found in database")
|
||||
return
|
||||
|
||||
# Initialize OCR service
|
||||
ocr_service = OCRService()
|
||||
# Acquire OCR service from pool (or create new if pool disabled)
|
||||
ocr_service = None
|
||||
if settings.enable_service_pool and SERVICE_POOL_AVAILABLE:
|
||||
try:
|
||||
service_pool = get_service_pool()
|
||||
pooled_service = service_pool.acquire(
|
||||
device="GPU:0",
|
||||
timeout=settings.service_acquire_timeout_seconds,
|
||||
task_id=task_id
|
||||
)
|
||||
if pooled_service:
|
||||
ocr_service = pooled_service.service
|
||||
logger.info(f"Acquired OCR service from pool for task {task_id}")
|
||||
else:
|
||||
logger.warning(f"Timeout acquiring service from pool, creating new instance")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to acquire service from pool: {e}, creating new instance")
|
||||
|
||||
# Fallback: create new instance if pool acquisition failed
|
||||
if ocr_service is None:
|
||||
logger.info("Creating new OCRService instance (pool disabled or unavailable)")
|
||||
ocr_service = OCRService()
|
||||
|
||||
# Create result directory before OCR processing (needed for saving extracted images)
|
||||
result_dir = Path(settings.result_dir) / task_id
|
||||
result_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Process the file with OCR (use dual-track if available)
|
||||
if use_dual_track and hasattr(ocr_service, 'process'):
|
||||
# Use new dual-track processing
|
||||
# Process the file with OCR
|
||||
# Use dual-track processing if:
|
||||
# 1. use_dual_track is True (auto-detection)
|
||||
# 2. OR force_track is specified (explicit track selection)
|
||||
if (use_dual_track or force_track) and hasattr(ocr_service, 'process'):
|
||||
# Use new dual-track processing (or forced track)
|
||||
ocr_result = ocr_service.process(
|
||||
file_path=Path(file_path),
|
||||
lang=language,
|
||||
@@ -111,7 +146,7 @@ def process_task_ocr(
|
||||
pp_structure_params=pp_structure_params
|
||||
)
|
||||
else:
|
||||
# Fall back to traditional processing
|
||||
# Fall back to traditional processing (no force_track support)
|
||||
ocr_result = ocr_service.process_image(
|
||||
image_path=Path(file_path),
|
||||
lang=language,
|
||||
@@ -131,6 +166,16 @@ def process_task_ocr(
|
||||
source_file_path=Path(file_path)
|
||||
)
|
||||
|
||||
# Release service back to pool (success case)
|
||||
if pooled_service:
|
||||
try:
|
||||
service_pool = get_service_pool()
|
||||
service_pool.release(pooled_service, error=None)
|
||||
logger.info(f"Released OCR service back to pool for task {task_id}")
|
||||
pooled_service = None # Prevent double release in finally
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to release service to pool: {e}")
|
||||
|
||||
# Close old session and create fresh one to avoid MySQL timeout
|
||||
# (long OCR processing may cause connection to become stale)
|
||||
db.close()
|
||||
@@ -158,6 +203,15 @@ def process_task_ocr(
|
||||
except Exception as e:
|
||||
logger.exception(f"OCR processing failed for task {task_id}")
|
||||
|
||||
# Release service back to pool with error
|
||||
if pooled_service:
|
||||
try:
|
||||
service_pool = get_service_pool()
|
||||
service_pool.release(pooled_service, error=e)
|
||||
pooled_service = None
|
||||
except Exception as release_error:
|
||||
logger.warning(f"Failed to release service to pool: {release_error}")
|
||||
|
||||
# Update task status to failed (direct database update)
|
||||
try:
|
||||
task = db.query(Task).filter(Task.id == task_db_id).first()
|
||||
@@ -170,6 +224,13 @@ def process_task_ocr(
|
||||
logger.error(f"Failed to update task status: {update_error}")
|
||||
|
||||
finally:
|
||||
# Ensure service is released in case of any missed release
|
||||
if pooled_service:
|
||||
try:
|
||||
service_pool = get_service_pool()
|
||||
service_pool.release(pooled_service, error=None)
|
||||
except Exception:
|
||||
pass
|
||||
db.close()
|
||||
|
||||
|
||||
@@ -330,7 +391,13 @@ async def get_task(
|
||||
with open(result_path) as f:
|
||||
result_data = json.load(f)
|
||||
metadata = result_data.get("metadata", {})
|
||||
processing_track = metadata.get("processing_track")
|
||||
track_str = metadata.get("processing_track")
|
||||
# Convert string to enum to avoid Pydantic serialization warning
|
||||
if track_str:
|
||||
try:
|
||||
processing_track = ProcessingTrackEnum(track_str)
|
||||
except ValueError:
|
||||
processing_track = None
|
||||
except Exception:
|
||||
pass # Silently ignore errors reading the result file
|
||||
|
||||
|
||||
Reference in New Issue
Block a user