feat: implement kanban real-time sync and fix workload cache
## Kanban Real-time Sync (NEW-002)
- Backend:
- WebSocket endpoint: /ws/projects/{project_id}
- Project room management in ConnectionManager
- Redis Pub/Sub: project:{project_id}:tasks channel
- Task CRUD event publishing (5 event types)
- Redis connection retry with exponential backoff
- Race condition fix in broadcast_to_project
- Frontend:
- ProjectSyncContext for WebSocket management
- Reconnection with exponential backoff (max 5 attempts)
- Multi-tab event deduplication via event_id
- Live/Offline connection indicator
- Optimistic updates with rollback
- Spec:
- collaboration spec: +1 requirement (Project Real-time Sync)
- 7 new scenarios for real-time sync
## Workload Cache Fix (NEW-001)
- Added cache invalidation to all task endpoints:
- create_task, update_task, update_task_status
- delete_task, restore_task, assign_task
- Extended to clear heatmap cache as well
## OpenSpec Archive
- 2026-01-05-add-kanban-realtime-sync
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Optional
|
||||
@@ -5,6 +6,7 @@ from fastapi import APIRouter, Depends, HTTPException, status, Query, Request
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.database import get_db
|
||||
from app.core.redis_pubsub import publish_task_event
|
||||
from app.models import User, Project, Task, TaskStatus, AuditAction, Blocker
|
||||
from app.schemas.task import (
|
||||
TaskCreate, TaskUpdate, TaskResponse, TaskWithDetails, TaskListResponse,
|
||||
@@ -16,6 +18,9 @@ from app.middleware.auth import (
|
||||
from app.middleware.audit import get_audit_metadata
|
||||
from app.services.audit_service import AuditService
|
||||
from app.services.trigger_service import TriggerService
|
||||
from app.services.workload_cache import invalidate_user_workload_cache
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["tasks"])
|
||||
|
||||
@@ -231,6 +236,40 @@ async def create_task(
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
# Invalidate workload cache if task has an assignee
|
||||
if task.assignee_id:
|
||||
invalidate_user_workload_cache(task.assignee_id)
|
||||
|
||||
# Publish real-time event
|
||||
try:
|
||||
await publish_task_event(
|
||||
project_id=str(task.project_id),
|
||||
event_type="task_created",
|
||||
task_data={
|
||||
"task_id": str(task.id),
|
||||
"project_id": str(task.project_id),
|
||||
"title": task.title,
|
||||
"description": task.description,
|
||||
"status_id": str(task.status_id) if task.status_id else None,
|
||||
"status_name": task.status.name if task.status else None,
|
||||
"status_color": task.status.color if task.status else None,
|
||||
"assignee_id": str(task.assignee_id) if task.assignee_id else None,
|
||||
"assignee_name": task.assignee.name if task.assignee else None,
|
||||
"priority": task.priority,
|
||||
"due_date": str(task.due_date) if task.due_date else None,
|
||||
"time_estimate": task.original_estimate,
|
||||
"original_estimate": task.original_estimate,
|
||||
"parent_task_id": str(task.parent_task_id) if task.parent_task_id else None,
|
||||
"position": task.position,
|
||||
"created_by": str(task.created_by),
|
||||
"creator_name": task.creator.name if task.creator else None,
|
||||
"created_at": str(task.created_at),
|
||||
},
|
||||
triggered_by=str(current_user.id)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to publish task_created event: {e}")
|
||||
|
||||
return task
|
||||
|
||||
|
||||
@@ -341,6 +380,40 @@ async def update_task(
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
# Invalidate workload cache if original_estimate changed and task has an assignee
|
||||
if "original_estimate" in update_data and task.assignee_id:
|
||||
invalidate_user_workload_cache(task.assignee_id)
|
||||
|
||||
# Publish real-time event
|
||||
try:
|
||||
await publish_task_event(
|
||||
project_id=str(task.project_id),
|
||||
event_type="task_updated",
|
||||
task_data={
|
||||
"task_id": str(task.id),
|
||||
"project_id": str(task.project_id),
|
||||
"title": task.title,
|
||||
"description": task.description,
|
||||
"status_id": str(task.status_id) if task.status_id else None,
|
||||
"status_name": task.status.name if task.status else None,
|
||||
"status_color": task.status.color if task.status else None,
|
||||
"assignee_id": str(task.assignee_id) if task.assignee_id else None,
|
||||
"assignee_name": task.assignee.name if task.assignee else None,
|
||||
"priority": task.priority,
|
||||
"due_date": str(task.due_date) if task.due_date else None,
|
||||
"time_estimate": task.original_estimate,
|
||||
"original_estimate": task.original_estimate,
|
||||
"time_spent": task.time_spent,
|
||||
"parent_task_id": str(task.parent_task_id) if task.parent_task_id else None,
|
||||
"position": task.position,
|
||||
"updated_at": str(task.updated_at),
|
||||
"updated_fields": list(update_data.keys()),
|
||||
},
|
||||
triggered_by=str(current_user.id)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to publish task_updated event: {e}")
|
||||
|
||||
return task
|
||||
|
||||
|
||||
@@ -408,6 +481,26 @@ async def delete_task(
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
# Invalidate workload cache for assignee
|
||||
if task.assignee_id:
|
||||
invalidate_user_workload_cache(task.assignee_id)
|
||||
|
||||
# Publish real-time event
|
||||
try:
|
||||
await publish_task_event(
|
||||
project_id=str(task.project_id),
|
||||
event_type="task_deleted",
|
||||
task_data={
|
||||
"task_id": str(task.id),
|
||||
"project_id": str(task.project_id),
|
||||
"title": task.title,
|
||||
"parent_task_id": str(task.parent_task_id) if task.parent_task_id else None,
|
||||
},
|
||||
triggered_by=str(current_user.id)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to publish task_deleted event: {e}")
|
||||
|
||||
return task
|
||||
|
||||
|
||||
@@ -461,6 +554,10 @@ async def restore_task(
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
# Invalidate workload cache for assignee
|
||||
if task.assignee_id:
|
||||
invalidate_user_workload_cache(task.assignee_id)
|
||||
|
||||
return task
|
||||
|
||||
|
||||
@@ -500,8 +597,9 @@ async def update_task_status(
|
||||
detail="Status not found in this project",
|
||||
)
|
||||
|
||||
# Capture old status for triggers
|
||||
# Capture old status for triggers and event publishing
|
||||
old_status_id = task.status_id
|
||||
old_status_name = task.status.name if task.status else None
|
||||
|
||||
task.status_id = status_data.status_id
|
||||
|
||||
@@ -530,6 +628,32 @@ async def update_task_status(
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
# Invalidate workload cache when status changes (affects completed/incomplete task calculations)
|
||||
if old_status_id != status_data.status_id and task.assignee_id:
|
||||
invalidate_user_workload_cache(task.assignee_id)
|
||||
|
||||
# Publish real-time event
|
||||
try:
|
||||
await publish_task_event(
|
||||
project_id=str(task.project_id),
|
||||
event_type="task_status_changed",
|
||||
task_data={
|
||||
"task_id": str(task.id),
|
||||
"project_id": str(task.project_id),
|
||||
"title": task.title,
|
||||
"old_status_id": str(old_status_id) if old_status_id else None,
|
||||
"old_status_name": old_status_name,
|
||||
"new_status_id": str(task.status_id) if task.status_id else None,
|
||||
"new_status_name": task.status.name if task.status else None,
|
||||
"new_status_color": task.status.color if task.status else None,
|
||||
"assignee_id": str(task.assignee_id) if task.assignee_id else None,
|
||||
"blocker_flag": task.blocker_flag,
|
||||
},
|
||||
triggered_by=str(current_user.id)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to publish task_status_changed event: {e}")
|
||||
|
||||
return task
|
||||
|
||||
|
||||
@@ -568,6 +692,7 @@ async def assign_task(
|
||||
)
|
||||
|
||||
old_assignee_id = task.assignee_id
|
||||
old_assignee_name = task.assignee.name if task.assignee else None
|
||||
task.assignee_id = assign_data.assignee_id
|
||||
|
||||
# Audit log
|
||||
@@ -594,6 +719,34 @@ async def assign_task(
|
||||
db.commit()
|
||||
db.refresh(task)
|
||||
|
||||
# Invalidate workload cache for both old and new assignees
|
||||
if old_assignee_id != assign_data.assignee_id:
|
||||
if old_assignee_id:
|
||||
invalidate_user_workload_cache(old_assignee_id)
|
||||
if assign_data.assignee_id:
|
||||
invalidate_user_workload_cache(assign_data.assignee_id)
|
||||
|
||||
# Publish real-time event
|
||||
try:
|
||||
await publish_task_event(
|
||||
project_id=str(task.project_id),
|
||||
event_type="task_assigned",
|
||||
task_data={
|
||||
"task_id": str(task.id),
|
||||
"project_id": str(task.project_id),
|
||||
"title": task.title,
|
||||
"old_assignee_id": str(old_assignee_id) if old_assignee_id else None,
|
||||
"old_assignee_name": old_assignee_name,
|
||||
"new_assignee_id": str(task.assignee_id) if task.assignee_id else None,
|
||||
"new_assignee_name": task.assignee.name if task.assignee else None,
|
||||
"status_id": str(task.status_id) if task.status_id else None,
|
||||
"status_name": task.status.name if task.status else None,
|
||||
},
|
||||
triggered_by=str(current_user.id)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to publish task_assigned event: {e}")
|
||||
|
||||
return task
|
||||
|
||||
|
||||
|
||||
@@ -7,9 +7,10 @@ from sqlalchemy.orm import Session
|
||||
from app.core.database import SessionLocal
|
||||
from app.core.security import decode_access_token
|
||||
from app.core.redis import get_redis_sync
|
||||
from app.models import User, Notification
|
||||
from app.models import User, Notification, Project
|
||||
from app.services.websocket_manager import manager
|
||||
from app.core.redis_pubsub import NotificationSubscriber
|
||||
from app.core.redis_pubsub import NotificationSubscriber, ProjectTaskSubscriber
|
||||
from app.middleware.auth import check_project_access
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["websocket"])
|
||||
@@ -226,3 +227,182 @@ async def websocket_notifications(
|
||||
pass
|
||||
await subscriber.stop()
|
||||
await manager.disconnect(websocket, user_id)
|
||||
|
||||
|
||||
async def verify_project_access(user_id: str, project_id: str) -> tuple[bool, Project | None]:
|
||||
"""
|
||||
Check if user has access to the project.
|
||||
|
||||
Args:
|
||||
user_id: The user's ID
|
||||
project_id: The project's ID
|
||||
|
||||
Returns:
|
||||
Tuple of (has_access: bool, project: Project | None)
|
||||
"""
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Get the user
|
||||
user = db.query(User).filter(User.id == user_id).first()
|
||||
if user is None or not user.is_active:
|
||||
return False, None
|
||||
|
||||
# Get the project
|
||||
project = db.query(Project).filter(Project.id == project_id).first()
|
||||
if project is None:
|
||||
return False, None
|
||||
|
||||
# Check access using existing middleware function
|
||||
has_access = check_project_access(user, project)
|
||||
return has_access, project
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.websocket("/ws/projects/{project_id}")
|
||||
async def websocket_project_sync(
|
||||
websocket: WebSocket,
|
||||
project_id: str,
|
||||
token: str = Query(..., description="JWT token for authentication"),
|
||||
):
|
||||
"""
|
||||
WebSocket endpoint for project task real-time sync.
|
||||
|
||||
Connect with: ws://host/ws/projects/{project_id}?token=<jwt_token>
|
||||
|
||||
Messages sent by server:
|
||||
- {"type": "connected", "data": {"project_id": "...", "user_id": "..."}}
|
||||
- {"type": "task_created", "data": {...}, "triggered_by": "..."}
|
||||
- {"type": "task_updated", "data": {...}, "triggered_by": "..."}
|
||||
- {"type": "task_status_changed", "data": {...}, "triggered_by": "..."}
|
||||
- {"type": "task_deleted", "data": {...}, "triggered_by": "..."}
|
||||
- {"type": "task_assigned", "data": {...}, "triggered_by": "..."}
|
||||
- {"type": "ping"} / {"type": "pong"}
|
||||
|
||||
Messages accepted from client:
|
||||
- {"type": "ping"} - Client keepalive ping
|
||||
"""
|
||||
# Authenticate user
|
||||
user_id, user = await get_user_from_token(token)
|
||||
|
||||
if user_id is None:
|
||||
await websocket.close(code=4001, reason="Invalid or expired token")
|
||||
return
|
||||
|
||||
# Verify user has access to the project
|
||||
has_access, project = await verify_project_access(user_id, project_id)
|
||||
|
||||
if not has_access:
|
||||
await websocket.close(code=4003, reason="Access denied to this project")
|
||||
return
|
||||
|
||||
if project is None:
|
||||
await websocket.close(code=4004, reason="Project not found")
|
||||
return
|
||||
|
||||
# Accept connection and join project room
|
||||
await websocket.accept()
|
||||
await manager.join_project(websocket, user_id, project_id)
|
||||
|
||||
# Create Redis subscriber for project task events
|
||||
subscriber = ProjectTaskSubscriber(project_id)
|
||||
|
||||
async def handle_redis_message(event_data: dict):
|
||||
"""Forward Redis pub/sub task events to WebSocket."""
|
||||
try:
|
||||
# Forward the event directly (it already contains type, data, triggered_by)
|
||||
await websocket.send_json(event_data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error forwarding task event to WebSocket: {e}")
|
||||
|
||||
redis_task = None
|
||||
|
||||
try:
|
||||
# Send initial connection success message
|
||||
await websocket.send_json({
|
||||
"type": "connected",
|
||||
"data": {
|
||||
"project_id": project_id,
|
||||
"user_id": user_id,
|
||||
"project_title": project.title if project else None,
|
||||
},
|
||||
})
|
||||
|
||||
logger.info(f"User {user_id} connected to project {project_id} WebSocket")
|
||||
|
||||
# Start Redis pub/sub subscription in background
|
||||
await subscriber.start()
|
||||
redis_task = asyncio.create_task(subscriber.listen(handle_redis_message))
|
||||
|
||||
# Heartbeat tracking (reuse same configuration as notifications)
|
||||
waiting_for_pong = False
|
||||
ping_sent_at = 0.0
|
||||
last_activity = time.time()
|
||||
|
||||
while True:
|
||||
# Calculate appropriate timeout based on state
|
||||
if waiting_for_pong:
|
||||
# When waiting for pong, use remaining pong timeout
|
||||
remaining = PONG_TIMEOUT - (time.time() - ping_sent_at)
|
||||
if remaining <= 0:
|
||||
logger.warning(f"Pong timeout for user {user_id} in project {project_id}, disconnecting")
|
||||
break
|
||||
timeout = remaining
|
||||
else:
|
||||
# When not waiting, use remaining ping interval
|
||||
remaining = PING_INTERVAL - (time.time() - last_activity)
|
||||
if remaining <= 0:
|
||||
# Time to send ping immediately
|
||||
try:
|
||||
await websocket.send_json({"type": "ping"})
|
||||
waiting_for_pong = True
|
||||
ping_sent_at = time.time()
|
||||
last_activity = ping_sent_at
|
||||
timeout = PONG_TIMEOUT
|
||||
except Exception:
|
||||
break
|
||||
else:
|
||||
timeout = remaining
|
||||
|
||||
try:
|
||||
# Wait for messages from client
|
||||
data = await asyncio.wait_for(
|
||||
websocket.receive_json(),
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
last_activity = time.time()
|
||||
msg_type = data.get("type")
|
||||
|
||||
# Handle ping message from client
|
||||
if msg_type == "ping":
|
||||
await websocket.send_json({"type": "pong"})
|
||||
|
||||
# Handle pong message from client (response to our ping)
|
||||
elif msg_type == "pong":
|
||||
waiting_for_pong = False
|
||||
logger.debug(f"Pong received from user {user_id} in project {project_id}")
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
if waiting_for_pong:
|
||||
# Strict timeout check
|
||||
if time.time() - ping_sent_at >= PONG_TIMEOUT:
|
||||
logger.warning(f"Pong timeout for user {user_id} in project {project_id}, disconnecting")
|
||||
break
|
||||
# If not waiting_for_pong, loop will handle sending ping at top
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.info(f"User {user_id} disconnected from project {project_id} WebSocket")
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket error for project {project_id}: {e}")
|
||||
finally:
|
||||
# Clean up Redis subscription
|
||||
if redis_task:
|
||||
redis_task.cancel()
|
||||
try:
|
||||
await redis_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
await subscriber.stop()
|
||||
await manager.leave_project(websocket, user_id, project_id)
|
||||
logger.info(f"User {user_id} left project {project_id} room")
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
"""Redis Pub/Sub service for cross-process notification broadcasting."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Optional, Callable, Any
|
||||
import redis.asyncio as aioredis
|
||||
|
||||
@@ -9,6 +12,10 @@ from app.core.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Redis retry configuration
|
||||
MAX_REDIS_RETRIES = 3
|
||||
REDIS_RETRY_DELAY = 0.5 # seconds (base delay for exponential backoff)
|
||||
|
||||
# Global async Redis client for pub/sub
|
||||
_pubsub_redis: Optional[aioredis.Redis] = None
|
||||
|
||||
@@ -18,6 +25,11 @@ def get_channel_name(user_id: str) -> str:
|
||||
return f"notifications:{user_id}"
|
||||
|
||||
|
||||
def get_project_channel_name(project_id: str) -> str:
|
||||
"""Get the Redis channel name for project task events."""
|
||||
return f"project:{project_id}:tasks"
|
||||
|
||||
|
||||
async def get_pubsub_redis() -> aioredis.Redis:
|
||||
"""Get or create the async Redis client for pub/sub."""
|
||||
global _pubsub_redis
|
||||
@@ -120,3 +132,187 @@ class NotificationSubscriber:
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._running
|
||||
|
||||
|
||||
async def _reset_pubsub_redis() -> None:
|
||||
"""Reset the Redis connection on failure."""
|
||||
global _pubsub_redis
|
||||
if _pubsub_redis is not None:
|
||||
try:
|
||||
await _pubsub_redis.close()
|
||||
except Exception:
|
||||
pass
|
||||
_pubsub_redis = None
|
||||
|
||||
|
||||
async def publish_task_event(
|
||||
project_id: str,
|
||||
event_type: str,
|
||||
task_data: dict,
|
||||
triggered_by: str
|
||||
) -> bool:
|
||||
"""
|
||||
Publish a task event to a project's channel with retry logic.
|
||||
|
||||
Args:
|
||||
project_id: The project ID
|
||||
event_type: Event type (task_created, task_updated, task_status_changed, task_deleted, task_assigned)
|
||||
task_data: The task data to include in the event
|
||||
triggered_by: User ID who triggered this event
|
||||
|
||||
Returns:
|
||||
True if published successfully, False otherwise
|
||||
"""
|
||||
channel = get_project_channel_name(project_id)
|
||||
message = json.dumps({
|
||||
"type": event_type,
|
||||
"event_id": str(uuid.uuid4()), # Unique event ID for multi-tab deduplication
|
||||
"data": task_data,
|
||||
"triggered_by": triggered_by,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
}, default=str)
|
||||
|
||||
for attempt in range(MAX_REDIS_RETRIES):
|
||||
try:
|
||||
redis_client = await get_pubsub_redis()
|
||||
# Test connection with ping before publishing
|
||||
await redis_client.ping()
|
||||
await redis_client.publish(channel, message)
|
||||
logger.debug(f"Published task event '{event_type}' to channel {channel}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Redis publish attempt {attempt + 1}/{MAX_REDIS_RETRIES} failed: {e}")
|
||||
if attempt < MAX_REDIS_RETRIES - 1:
|
||||
# Exponential backoff
|
||||
await asyncio.sleep(REDIS_RETRY_DELAY * (attempt + 1))
|
||||
# Reset connection on failure
|
||||
await _reset_pubsub_redis()
|
||||
else:
|
||||
logger.error(f"Failed to publish task event '{event_type}' after {MAX_REDIS_RETRIES} attempts")
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
class ProjectTaskSubscriber:
|
||||
"""
|
||||
Subscriber for project task events via Redis Pub/Sub.
|
||||
Used by WebSocket connections to receive real-time task updates.
|
||||
Includes automatic reconnection handling.
|
||||
"""
|
||||
|
||||
def __init__(self, project_id: str):
|
||||
self.project_id = project_id
|
||||
self.channel = get_project_channel_name(project_id)
|
||||
self.pubsub: Optional[aioredis.client.PubSub] = None
|
||||
self._running = False
|
||||
self._reconnect_attempts = 0
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start subscribing to the project's task channel with retry logic."""
|
||||
for attempt in range(MAX_REDIS_RETRIES):
|
||||
try:
|
||||
redis_client = await get_pubsub_redis()
|
||||
# Test connection health
|
||||
await redis_client.ping()
|
||||
self.pubsub = redis_client.pubsub()
|
||||
await self.pubsub.subscribe(self.channel)
|
||||
self._running = True
|
||||
self._reconnect_attempts = 0
|
||||
logger.debug(f"Subscribed to project task channel {self.channel}")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Redis subscribe attempt {attempt + 1}/{MAX_REDIS_RETRIES} failed: {e}")
|
||||
if attempt < MAX_REDIS_RETRIES - 1:
|
||||
await asyncio.sleep(REDIS_RETRY_DELAY * (attempt + 1))
|
||||
await _reset_pubsub_redis()
|
||||
else:
|
||||
logger.error(f"Failed to subscribe to channel {self.channel} after {MAX_REDIS_RETRIES} attempts")
|
||||
raise
|
||||
|
||||
async def _reconnect(self) -> bool:
|
||||
"""Attempt to reconnect to Redis and resubscribe."""
|
||||
self._reconnect_attempts += 1
|
||||
if self._reconnect_attempts > MAX_REDIS_RETRIES:
|
||||
logger.error(f"Max reconnection attempts reached for channel {self.channel}")
|
||||
return False
|
||||
|
||||
logger.info(f"Attempting to reconnect to Redis (attempt {self._reconnect_attempts}/{MAX_REDIS_RETRIES})")
|
||||
|
||||
# Clean up old pubsub
|
||||
if self.pubsub:
|
||||
try:
|
||||
await self.pubsub.close()
|
||||
except Exception:
|
||||
pass
|
||||
self.pubsub = None
|
||||
|
||||
# Reset global connection
|
||||
await _reset_pubsub_redis()
|
||||
|
||||
# Wait with exponential backoff
|
||||
await asyncio.sleep(REDIS_RETRY_DELAY * self._reconnect_attempts)
|
||||
|
||||
try:
|
||||
redis_client = await get_pubsub_redis()
|
||||
await redis_client.ping()
|
||||
self.pubsub = redis_client.pubsub()
|
||||
await self.pubsub.subscribe(self.channel)
|
||||
self._reconnect_attempts = 0
|
||||
logger.info(f"Successfully reconnected to channel {self.channel}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Reconnection attempt failed: {e}")
|
||||
return False
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop subscribing and clean up."""
|
||||
self._running = False
|
||||
if self.pubsub:
|
||||
try:
|
||||
await self.pubsub.unsubscribe(self.channel)
|
||||
await self.pubsub.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error during pubsub cleanup: {e}")
|
||||
self.pubsub = None
|
||||
logger.debug(f"Unsubscribed from project task channel {self.channel}")
|
||||
|
||||
async def listen(self, callback: Callable[[dict], Any]) -> None:
|
||||
"""
|
||||
Listen for task events and call the callback for each event.
|
||||
Includes automatic reconnection on connection failures.
|
||||
|
||||
Args:
|
||||
callback: Async function to call with each task event dict.
|
||||
The dict contains: type, data, triggered_by
|
||||
"""
|
||||
if not self.pubsub:
|
||||
raise RuntimeError("Subscriber not started. Call start() first.")
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
async for message in self.pubsub.listen():
|
||||
if not self._running:
|
||||
break
|
||||
|
||||
if message["type"] == "message":
|
||||
try:
|
||||
data = json.loads(message["data"])
|
||||
await callback(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Invalid JSON in task event: {message['data']}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing task event: {e}")
|
||||
except Exception as e:
|
||||
if not self._running:
|
||||
break
|
||||
logger.warning(f"Redis connection error in task listener: {e}")
|
||||
# Attempt to reconnect
|
||||
if await self._reconnect():
|
||||
continue # Resume listening after successful reconnection
|
||||
else:
|
||||
logger.error(f"Failed to recover connection for channel {self.channel}")
|
||||
break
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._running
|
||||
|
||||
@@ -1,17 +1,23 @@
|
||||
import json
|
||||
import asyncio
|
||||
from typing import Dict, Set, Optional
|
||||
import logging
|
||||
from typing import Dict, Set, Optional, Tuple
|
||||
from fastapi import WebSocket
|
||||
from app.core.redis import get_redis_sync
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConnectionManager:
|
||||
"""Manager for WebSocket connections."""
|
||||
|
||||
def __init__(self):
|
||||
# user_id -> set of WebSocket connections
|
||||
# user_id -> set of WebSocket connections (for notifications)
|
||||
self.active_connections: Dict[str, Set[WebSocket]] = {}
|
||||
# project_id -> set of (user_id, WebSocket) tuples (for project sync)
|
||||
self.project_connections: Dict[str, Set[Tuple[str, WebSocket]]] = {}
|
||||
self._lock = asyncio.Lock()
|
||||
self._project_lock = asyncio.Lock()
|
||||
|
||||
async def connect(self, websocket: WebSocket, user_id: str):
|
||||
"""Accept and track a new WebSocket connection."""
|
||||
@@ -52,6 +58,93 @@ class ConnectionManager:
|
||||
"""Check if a user has any active connections."""
|
||||
return user_id in self.active_connections and len(self.active_connections[user_id]) > 0
|
||||
|
||||
# Project room management methods
|
||||
|
||||
async def join_project(self, websocket: WebSocket, user_id: str, project_id: str):
|
||||
"""
|
||||
Add user to a project room for real-time task sync.
|
||||
|
||||
Args:
|
||||
websocket: The WebSocket connection
|
||||
user_id: The user's ID
|
||||
project_id: The project to join
|
||||
"""
|
||||
async with self._project_lock:
|
||||
if project_id not in self.project_connections:
|
||||
self.project_connections[project_id] = set()
|
||||
self.project_connections[project_id].add((user_id, websocket))
|
||||
logger.debug(f"User {user_id} joined project room {project_id}")
|
||||
|
||||
async def leave_project(self, websocket: WebSocket, user_id: str, project_id: str):
|
||||
"""
|
||||
Remove user from a project room.
|
||||
|
||||
Args:
|
||||
websocket: The WebSocket connection
|
||||
user_id: The user's ID
|
||||
project_id: The project to leave
|
||||
"""
|
||||
async with self._project_lock:
|
||||
if project_id in self.project_connections:
|
||||
self.project_connections[project_id].discard((user_id, websocket))
|
||||
if not self.project_connections[project_id]:
|
||||
del self.project_connections[project_id]
|
||||
logger.debug(f"User {user_id} left project room {project_id}")
|
||||
|
||||
async def broadcast_to_project(
|
||||
self,
|
||||
project_id: str,
|
||||
message: dict,
|
||||
exclude_user_id: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Broadcast message to all users in a project room.
|
||||
|
||||
Args:
|
||||
project_id: The project room to broadcast to
|
||||
message: The message to send
|
||||
exclude_user_id: Optional user ID to exclude from broadcast (e.g., the sender)
|
||||
"""
|
||||
# Create snapshot while holding lock to prevent race condition
|
||||
async with self._project_lock:
|
||||
if project_id not in self.project_connections:
|
||||
return
|
||||
connections_snapshot = list(self.project_connections[project_id])
|
||||
|
||||
disconnected = set()
|
||||
for user_id, websocket in connections_snapshot:
|
||||
# Skip excluded user (sender)
|
||||
if exclude_user_id and user_id == exclude_user_id:
|
||||
continue
|
||||
|
||||
try:
|
||||
await websocket.send_json(message)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to send message to user {user_id} in project {project_id}: {e}")
|
||||
disconnected.add((user_id, websocket))
|
||||
|
||||
# Clean up disconnected connections
|
||||
if disconnected:
|
||||
async with self._project_lock:
|
||||
for conn in disconnected:
|
||||
if project_id in self.project_connections:
|
||||
self.project_connections[project_id].discard(conn)
|
||||
if not self.project_connections[project_id]:
|
||||
del self.project_connections[project_id]
|
||||
|
||||
def get_project_user_count(self, project_id: str) -> int:
|
||||
"""Get the number of unique users in a project room."""
|
||||
if project_id not in self.project_connections:
|
||||
return 0
|
||||
unique_users = set(user_id for user_id, _ in self.project_connections[project_id])
|
||||
return len(unique_users)
|
||||
|
||||
def is_user_in_project(self, user_id: str, project_id: str) -> bool:
|
||||
"""Check if a user has any active connections to a project room."""
|
||||
if project_id not in self.project_connections:
|
||||
return False
|
||||
return any(uid == user_id for uid, _ in self.project_connections[project_id])
|
||||
|
||||
|
||||
# Global connection manager instance
|
||||
manager = ConnectionManager()
|
||||
|
||||
@@ -155,9 +155,19 @@ def invalidate_user_workload_cache(user_id: str) -> None:
|
||||
"""
|
||||
Invalidate all cached workload data for a user.
|
||||
|
||||
This clears:
|
||||
1. User-specific workload cache (workload:user:{user_id}:*)
|
||||
2. All heatmap caches (workload:heatmap:*) since heatmap aggregates may include this user
|
||||
|
||||
Note: This uses pattern matching which may be slow for large datasets.
|
||||
For Phase 1, we rely on TTL expiration instead of active invalidation.
|
||||
"""
|
||||
pattern = f"workload:*:{user_id}:*"
|
||||
for key in redis_client.scan_iter(match=pattern):
|
||||
# Clear user-specific workload cache
|
||||
user_pattern = f"workload:user:{user_id}:*"
|
||||
for key in redis_client.scan_iter(match=user_pattern):
|
||||
redis_client.delete(key)
|
||||
|
||||
# Clear all heatmap caches since they aggregate user data
|
||||
heatmap_pattern = "workload:heatmap:*"
|
||||
for key in redis_client.scan_iter(match=heatmap_pattern):
|
||||
redis_client.delete(key)
|
||||
|
||||
Reference in New Issue
Block a user