feat: implement kanban real-time sync and fix workload cache

## Kanban Real-time Sync (NEW-002) - Backend: - WebSocket endpoint: /ws/projects/{project_id} - Project room management in ConnectionManager - Redis Pub/Sub: project:{project_id}:tasks channel - Task CRUD event publishing (5 event types) - Redis connection retry with exponential backoff - Race condition fix in broadcast_to_project - Frontend: - ProjectSyncContext for WebSocket management - Reconnection with exponential backoff (max 5 attempts) - Multi-tab event deduplication via event_id - Live/Offline connection indicator - Optimistic updates with rollback - Spec: - collaboration spec: +1 requirement (Project Real-time Sync) - 7 new scenarios for real-time sync ## Workload Cache Fix (NEW-001) - Added cache invalidation to all task endpoints: - create_task, update_task, update_task_status - delete_task, restore_task, assign_task - Extended to clear heatmap cache as well ## OpenSpec Archive - 2026-01-05-add-kanban-realtime-sync 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 20:28:42 +08:00
parent 9b220523ff
commit 69b81d9241
13 changed files with 1470 additions and 31 deletions
--- a/backend/app/api/tasks/router.py
+++ b/backend/app/api/tasks/router.py
@@ -1,3 +1,4 @@
+import logging
 import uuid
 from datetime import datetime, timezone
 from typing import List, Optional
@@ -5,6 +6,7 @@ from fastapi import APIRouter, Depends, HTTPException, status, Query, Request
 from sqlalchemy.orm import Session

 from app.core.database import get_db
+from app.core.redis_pubsub import publish_task_event
 from app.models import User, Project, Task, TaskStatus, AuditAction, Blocker
 from app.schemas.task import (
    TaskCreate, TaskUpdate, TaskResponse, TaskWithDetails, TaskListResponse,
@@ -16,6 +18,9 @@ from app.middleware.auth import (
 from app.middleware.audit import get_audit_metadata
 from app.services.audit_service import AuditService
 from app.services.trigger_service import TriggerService
+from app.services.workload_cache import invalidate_user_workload_cache
+
+logger = logging.getLogger(__name__)

 router = APIRouter(tags=["tasks"])

@@ -231,6 +236,40 @@ async def create_task(
    db.commit()
    db.refresh(task)

+    # Invalidate workload cache if task has an assignee
+    if task.assignee_id:
+        invalidate_user_workload_cache(task.assignee_id)
+
+    # Publish real-time event
+    try:
+        await publish_task_event(
+            project_id=str(task.project_id),
+            event_type="task_created",
+            task_data={
+                "task_id": str(task.id),
+                "project_id": str(task.project_id),
+                "title": task.title,
+                "description": task.description,
+                "status_id": str(task.status_id) if task.status_id else None,
+                "status_name": task.status.name if task.status else None,
+                "status_color": task.status.color if task.status else None,
+                "assignee_id": str(task.assignee_id) if task.assignee_id else None,
+                "assignee_name": task.assignee.name if task.assignee else None,
+                "priority": task.priority,
+                "due_date": str(task.due_date) if task.due_date else None,
+                "time_estimate": task.original_estimate,
+                "original_estimate": task.original_estimate,
+                "parent_task_id": str(task.parent_task_id) if task.parent_task_id else None,
+                "position": task.position,
+                "created_by": str(task.created_by),
+                "creator_name": task.creator.name if task.creator else None,
+                "created_at": str(task.created_at),
+            },
+            triggered_by=str(current_user.id)
+        )
+    except Exception as e:
+        logger.warning(f"Failed to publish task_created event: {e}")
+
    return task


@@ -341,6 +380,40 @@ async def update_task(
    db.commit()
    db.refresh(task)

+    # Invalidate workload cache if original_estimate changed and task has an assignee
+    if "original_estimate" in update_data and task.assignee_id:
+        invalidate_user_workload_cache(task.assignee_id)
+
+    # Publish real-time event
+    try:
+        await publish_task_event(
+            project_id=str(task.project_id),
+            event_type="task_updated",
+            task_data={
+                "task_id": str(task.id),
+                "project_id": str(task.project_id),
+                "title": task.title,
+                "description": task.description,
+                "status_id": str(task.status_id) if task.status_id else None,
+                "status_name": task.status.name if task.status else None,
+                "status_color": task.status.color if task.status else None,
+                "assignee_id": str(task.assignee_id) if task.assignee_id else None,
+                "assignee_name": task.assignee.name if task.assignee else None,
+                "priority": task.priority,
+                "due_date": str(task.due_date) if task.due_date else None,
+                "time_estimate": task.original_estimate,
+                "original_estimate": task.original_estimate,
+                "time_spent": task.time_spent,
+                "parent_task_id": str(task.parent_task_id) if task.parent_task_id else None,
+                "position": task.position,
+                "updated_at": str(task.updated_at),
+                "updated_fields": list(update_data.keys()),
+            },
+            triggered_by=str(current_user.id)
+        )
+    except Exception as e:
+        logger.warning(f"Failed to publish task_updated event: {e}")
+
    return task


@@ -408,6 +481,26 @@ async def delete_task(
    db.commit()
    db.refresh(task)

+    # Invalidate workload cache for assignee
+    if task.assignee_id:
+        invalidate_user_workload_cache(task.assignee_id)
+
+    # Publish real-time event
+    try:
+        await publish_task_event(
+            project_id=str(task.project_id),
+            event_type="task_deleted",
+            task_data={
+                "task_id": str(task.id),
+                "project_id": str(task.project_id),
+                "title": task.title,
+                "parent_task_id": str(task.parent_task_id) if task.parent_task_id else None,
+            },
+            triggered_by=str(current_user.id)
+        )
+    except Exception as e:
+        logger.warning(f"Failed to publish task_deleted event: {e}")
+
    return task


@@ -461,6 +554,10 @@ async def restore_task(
    db.commit()
    db.refresh(task)

+    # Invalidate workload cache for assignee
+    if task.assignee_id:
+        invalidate_user_workload_cache(task.assignee_id)
+
    return task


@@ -500,8 +597,9 @@ async def update_task_status(
            detail="Status not found in this project",
        )

-    # Capture old status for triggers
+    # Capture old status for triggers and event publishing
    old_status_id = task.status_id
+    old_status_name = task.status.name if task.status else None

    task.status_id = status_data.status_id

@@ -530,6 +628,32 @@ async def update_task_status(
    db.commit()
    db.refresh(task)

+    # Invalidate workload cache when status changes (affects completed/incomplete task calculations)
+    if old_status_id != status_data.status_id and task.assignee_id:
+        invalidate_user_workload_cache(task.assignee_id)
+
+    # Publish real-time event
+    try:
+        await publish_task_event(
+            project_id=str(task.project_id),
+            event_type="task_status_changed",
+            task_data={
+                "task_id": str(task.id),
+                "project_id": str(task.project_id),
+                "title": task.title,
+                "old_status_id": str(old_status_id) if old_status_id else None,
+                "old_status_name": old_status_name,
+                "new_status_id": str(task.status_id) if task.status_id else None,
+                "new_status_name": task.status.name if task.status else None,
+                "new_status_color": task.status.color if task.status else None,
+                "assignee_id": str(task.assignee_id) if task.assignee_id else None,
+                "blocker_flag": task.blocker_flag,
+            },
+            triggered_by=str(current_user.id)
+        )
+    except Exception as e:
+        logger.warning(f"Failed to publish task_status_changed event: {e}")
+
    return task


@@ -568,6 +692,7 @@ async def assign_task(
            )

    old_assignee_id = task.assignee_id
+    old_assignee_name = task.assignee.name if task.assignee else None
    task.assignee_id = assign_data.assignee_id

    # Audit log
@@ -594,6 +719,34 @@ async def assign_task(
    db.commit()
    db.refresh(task)

+    # Invalidate workload cache for both old and new assignees
+    if old_assignee_id != assign_data.assignee_id:
+        if old_assignee_id:
+            invalidate_user_workload_cache(old_assignee_id)
+        if assign_data.assignee_id:
+            invalidate_user_workload_cache(assign_data.assignee_id)
+
+    # Publish real-time event
+    try:
+        await publish_task_event(
+            project_id=str(task.project_id),
+            event_type="task_assigned",
+            task_data={
+                "task_id": str(task.id),
+                "project_id": str(task.project_id),
+                "title": task.title,
+                "old_assignee_id": str(old_assignee_id) if old_assignee_id else None,
+                "old_assignee_name": old_assignee_name,
+                "new_assignee_id": str(task.assignee_id) if task.assignee_id else None,
+                "new_assignee_name": task.assignee.name if task.assignee else None,
+                "status_id": str(task.status_id) if task.status_id else None,
+                "status_name": task.status.name if task.status else None,
+            },
+            triggered_by=str(current_user.id)
+        )
+    except Exception as e:
+        logger.warning(f"Failed to publish task_assigned event: {e}")
+
    return task


--- a/backend/app/api/websocket/router.py
+++ b/backend/app/api/websocket/router.py
@@ -7,9 +7,10 @@ from sqlalchemy.orm import Session
 from app.core.database import SessionLocal
 from app.core.security import decode_access_token
 from app.core.redis import get_redis_sync
-from app.models import User, Notification
+from app.models import User, Notification, Project
 from app.services.websocket_manager import manager
-from app.core.redis_pubsub import NotificationSubscriber
+from app.core.redis_pubsub import NotificationSubscriber, ProjectTaskSubscriber
+from app.middleware.auth import check_project_access

 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["websocket"])
@@ -226,3 +227,182 @@ async def websocket_notifications(
                pass
        await subscriber.stop()
        await manager.disconnect(websocket, user_id)
+
+
+async def verify_project_access(user_id: str, project_id: str) -> tuple[bool, Project | None]:
+    """
+    Check if user has access to the project.
+
+    Args:
+        user_id: The user's ID
+        project_id: The project's ID
+
+    Returns:
+        Tuple of (has_access: bool, project: Project | None)
+    """
+    db = SessionLocal()
+    try:
+        # Get the user
+        user = db.query(User).filter(User.id == user_id).first()
+        if user is None or not user.is_active:
+            return False, None
+
+        # Get the project
+        project = db.query(Project).filter(Project.id == project_id).first()
+        if project is None:
+            return False, None
+
+        # Check access using existing middleware function
+        has_access = check_project_access(user, project)
+        return has_access, project
+    finally:
+        db.close()
+
+
+@router.websocket("/ws/projects/{project_id}")
+async def websocket_project_sync(
+    websocket: WebSocket,
+    project_id: str,
+    token: str = Query(..., description="JWT token for authentication"),
+):
+    """
+    WebSocket endpoint for project task real-time sync.
+
+    Connect with: ws://host/ws/projects/{project_id}?token=<jwt_token>
+
+    Messages sent by server:
+    - {"type": "connected", "data": {"project_id": "...", "user_id": "..."}}
+    - {"type": "task_created", "data": {...}, "triggered_by": "..."}
+    - {"type": "task_updated", "data": {...}, "triggered_by": "..."}
+    - {"type": "task_status_changed", "data": {...}, "triggered_by": "..."}
+    - {"type": "task_deleted", "data": {...}, "triggered_by": "..."}
+    - {"type": "task_assigned", "data": {...}, "triggered_by": "..."}
+    - {"type": "ping"} / {"type": "pong"}
+
+    Messages accepted from client:
+    - {"type": "ping"}  - Client keepalive ping
+    """
+    # Authenticate user
+    user_id, user = await get_user_from_token(token)
+
+    if user_id is None:
+        await websocket.close(code=4001, reason="Invalid or expired token")
+        return
+
+    # Verify user has access to the project
+    has_access, project = await verify_project_access(user_id, project_id)
+
+    if not has_access:
+        await websocket.close(code=4003, reason="Access denied to this project")
+        return
+
+    if project is None:
+        await websocket.close(code=4004, reason="Project not found")
+        return
+
+    # Accept connection and join project room
+    await websocket.accept()
+    await manager.join_project(websocket, user_id, project_id)
+
+    # Create Redis subscriber for project task events
+    subscriber = ProjectTaskSubscriber(project_id)
+
+    async def handle_redis_message(event_data: dict):
+        """Forward Redis pub/sub task events to WebSocket."""
+        try:
+            # Forward the event directly (it already contains type, data, triggered_by)
+            await websocket.send_json(event_data)
+        except Exception as e:
+            logger.error(f"Error forwarding task event to WebSocket: {e}")
+
+    redis_task = None
+
+    try:
+        # Send initial connection success message
+        await websocket.send_json({
+            "type": "connected",
+            "data": {
+                "project_id": project_id,
+                "user_id": user_id,
+                "project_title": project.title if project else None,
+            },
+        })
+
+        logger.info(f"User {user_id} connected to project {project_id} WebSocket")
+
+        # Start Redis pub/sub subscription in background
+        await subscriber.start()
+        redis_task = asyncio.create_task(subscriber.listen(handle_redis_message))
+
+        # Heartbeat tracking (reuse same configuration as notifications)
+        waiting_for_pong = False
+        ping_sent_at = 0.0
+        last_activity = time.time()
+
+        while True:
+            # Calculate appropriate timeout based on state
+            if waiting_for_pong:
+                # When waiting for pong, use remaining pong timeout
+                remaining = PONG_TIMEOUT - (time.time() - ping_sent_at)
+                if remaining <= 0:
+                    logger.warning(f"Pong timeout for user {user_id} in project {project_id}, disconnecting")
+                    break
+                timeout = remaining
+            else:
+                # When not waiting, use remaining ping interval
+                remaining = PING_INTERVAL - (time.time() - last_activity)
+                if remaining <= 0:
+                    # Time to send ping immediately
+                    try:
+                        await websocket.send_json({"type": "ping"})
+                        waiting_for_pong = True
+                        ping_sent_at = time.time()
+                        last_activity = ping_sent_at
+                        timeout = PONG_TIMEOUT
+                    except Exception:
+                        break
+                else:
+                    timeout = remaining
+
+            try:
+                # Wait for messages from client
+                data = await asyncio.wait_for(
+                    websocket.receive_json(),
+                    timeout=timeout
+                )
+
+                last_activity = time.time()
+                msg_type = data.get("type")
+
+                # Handle ping message from client
+                if msg_type == "ping":
+                    await websocket.send_json({"type": "pong"})
+
+                # Handle pong message from client (response to our ping)
+                elif msg_type == "pong":
+                    waiting_for_pong = False
+                    logger.debug(f"Pong received from user {user_id} in project {project_id}")
+
+            except asyncio.TimeoutError:
+                if waiting_for_pong:
+                    # Strict timeout check
+                    if time.time() - ping_sent_at >= PONG_TIMEOUT:
+                        logger.warning(f"Pong timeout for user {user_id} in project {project_id}, disconnecting")
+                        break
+                # If not waiting_for_pong, loop will handle sending ping at top
+
+    except WebSocketDisconnect:
+        logger.info(f"User {user_id} disconnected from project {project_id} WebSocket")
+    except Exception as e:
+        logger.error(f"WebSocket error for project {project_id}: {e}")
+    finally:
+        # Clean up Redis subscription
+        if redis_task:
+            redis_task.cancel()
+            try:
+                await redis_task
+            except asyncio.CancelledError:
+                pass
+        await subscriber.stop()
+        await manager.leave_project(websocket, user_id, project_id)
+        logger.info(f"User {user_id} left project {project_id} room")
--- a/backend/app/core/redis_pubsub.py
+++ b/backend/app/core/redis_pubsub.py
@@ -1,7 +1,10 @@
 """Redis Pub/Sub service for cross-process notification broadcasting."""

+import asyncio
 import json
 import logging
+import uuid
+from datetime import datetime
 from typing import Optional, Callable, Any
 import redis.asyncio as aioredis

@@ -9,6 +12,10 @@ from app.core.config import settings

 logger = logging.getLogger(__name__)

+# Redis retry configuration
+MAX_REDIS_RETRIES = 3
+REDIS_RETRY_DELAY = 0.5  # seconds (base delay for exponential backoff)
+
 # Global async Redis client for pub/sub
 _pubsub_redis: Optional[aioredis.Redis] = None

@@ -18,6 +25,11 @@ def get_channel_name(user_id: str) -> str:
    return f"notifications:{user_id}"


+def get_project_channel_name(project_id: str) -> str:
+    """Get the Redis channel name for project task events."""
+    return f"project:{project_id}:tasks"
+
+
 async def get_pubsub_redis() -> aioredis.Redis:
    """Get or create the async Redis client for pub/sub."""
    global _pubsub_redis
@@ -120,3 +132,187 @@ class NotificationSubscriber:
    @property
    def is_running(self) -> bool:
        return self._running
+
+
+async def _reset_pubsub_redis() -> None:
+    """Reset the Redis connection on failure."""
+    global _pubsub_redis
+    if _pubsub_redis is not None:
+        try:
+            await _pubsub_redis.close()
+        except Exception:
+            pass
+        _pubsub_redis = None
+
+
+async def publish_task_event(
+    project_id: str,
+    event_type: str,
+    task_data: dict,
+    triggered_by: str
+) -> bool:
+    """
+    Publish a task event to a project's channel with retry logic.
+
+    Args:
+        project_id: The project ID
+        event_type: Event type (task_created, task_updated, task_status_changed, task_deleted, task_assigned)
+        task_data: The task data to include in the event
+        triggered_by: User ID who triggered this event
+
+    Returns:
+        True if published successfully, False otherwise
+    """
+    channel = get_project_channel_name(project_id)
+    message = json.dumps({
+        "type": event_type,
+        "event_id": str(uuid.uuid4()),  # Unique event ID for multi-tab deduplication
+        "data": task_data,
+        "triggered_by": triggered_by,
+        "timestamp": datetime.utcnow().isoformat(),
+    }, default=str)
+
+    for attempt in range(MAX_REDIS_RETRIES):
+        try:
+            redis_client = await get_pubsub_redis()
+            # Test connection with ping before publishing
+            await redis_client.ping()
+            await redis_client.publish(channel, message)
+            logger.debug(f"Published task event '{event_type}' to channel {channel}")
+            return True
+        except Exception as e:
+            logger.warning(f"Redis publish attempt {attempt + 1}/{MAX_REDIS_RETRIES} failed: {e}")
+            if attempt < MAX_REDIS_RETRIES - 1:
+                # Exponential backoff
+                await asyncio.sleep(REDIS_RETRY_DELAY * (attempt + 1))
+                # Reset connection on failure
+                await _reset_pubsub_redis()
+            else:
+                logger.error(f"Failed to publish task event '{event_type}' after {MAX_REDIS_RETRIES} attempts")
+                return False
+    return False
+
+
+class ProjectTaskSubscriber:
+    """
+    Subscriber for project task events via Redis Pub/Sub.
+    Used by WebSocket connections to receive real-time task updates.
+    Includes automatic reconnection handling.
+    """
+
+    def __init__(self, project_id: str):
+        self.project_id = project_id
+        self.channel = get_project_channel_name(project_id)
+        self.pubsub: Optional[aioredis.client.PubSub] = None
+        self._running = False
+        self._reconnect_attempts = 0
+
+    async def start(self) -> None:
+        """Start subscribing to the project's task channel with retry logic."""
+        for attempt in range(MAX_REDIS_RETRIES):
+            try:
+                redis_client = await get_pubsub_redis()
+                # Test connection health
+                await redis_client.ping()
+                self.pubsub = redis_client.pubsub()
+                await self.pubsub.subscribe(self.channel)
+                self._running = True
+                self._reconnect_attempts = 0
+                logger.debug(f"Subscribed to project task channel {self.channel}")
+                return
+            except Exception as e:
+                logger.warning(f"Redis subscribe attempt {attempt + 1}/{MAX_REDIS_RETRIES} failed: {e}")
+                if attempt < MAX_REDIS_RETRIES - 1:
+                    await asyncio.sleep(REDIS_RETRY_DELAY * (attempt + 1))
+                    await _reset_pubsub_redis()
+                else:
+                    logger.error(f"Failed to subscribe to channel {self.channel} after {MAX_REDIS_RETRIES} attempts")
+                    raise
+
+    async def _reconnect(self) -> bool:
+        """Attempt to reconnect to Redis and resubscribe."""
+        self._reconnect_attempts += 1
+        if self._reconnect_attempts > MAX_REDIS_RETRIES:
+            logger.error(f"Max reconnection attempts reached for channel {self.channel}")
+            return False
+
+        logger.info(f"Attempting to reconnect to Redis (attempt {self._reconnect_attempts}/{MAX_REDIS_RETRIES})")
+
+        # Clean up old pubsub
+        if self.pubsub:
+            try:
+                await self.pubsub.close()
+            except Exception:
+                pass
+            self.pubsub = None
+
+        # Reset global connection
+        await _reset_pubsub_redis()
+
+        # Wait with exponential backoff
+        await asyncio.sleep(REDIS_RETRY_DELAY * self._reconnect_attempts)
+
+        try:
+            redis_client = await get_pubsub_redis()
+            await redis_client.ping()
+            self.pubsub = redis_client.pubsub()
+            await self.pubsub.subscribe(self.channel)
+            self._reconnect_attempts = 0
+            logger.info(f"Successfully reconnected to channel {self.channel}")
+            return True
+        except Exception as e:
+            logger.warning(f"Reconnection attempt failed: {e}")
+            return False
+
+    async def stop(self) -> None:
+        """Stop subscribing and clean up."""
+        self._running = False
+        if self.pubsub:
+            try:
+                await self.pubsub.unsubscribe(self.channel)
+                await self.pubsub.close()
+            except Exception as e:
+                logger.warning(f"Error during pubsub cleanup: {e}")
+            self.pubsub = None
+            logger.debug(f"Unsubscribed from project task channel {self.channel}")
+
+    async def listen(self, callback: Callable[[dict], Any]) -> None:
+        """
+        Listen for task events and call the callback for each event.
+        Includes automatic reconnection on connection failures.
+
+        Args:
+            callback: Async function to call with each task event dict.
+                      The dict contains: type, data, triggered_by
+        """
+        if not self.pubsub:
+            raise RuntimeError("Subscriber not started. Call start() first.")
+
+        while self._running:
+            try:
+                async for message in self.pubsub.listen():
+                    if not self._running:
+                        break
+
+                    if message["type"] == "message":
+                        try:
+                            data = json.loads(message["data"])
+                            await callback(data)
+                        except json.JSONDecodeError:
+                            logger.warning(f"Invalid JSON in task event: {message['data']}")
+                        except Exception as e:
+                            logger.error(f"Error processing task event: {e}")
+            except Exception as e:
+                if not self._running:
+                    break
+                logger.warning(f"Redis connection error in task listener: {e}")
+                # Attempt to reconnect
+                if await self._reconnect():
+                    continue  # Resume listening after successful reconnection
+                else:
+                    logger.error(f"Failed to recover connection for channel {self.channel}")
+                    break
+
+    @property
+    def is_running(self) -> bool:
+        return self._running
--- a/backend/app/services/websocket_manager.py
+++ b/backend/app/services/websocket_manager.py
@@ -1,17 +1,23 @@
 import json
 import asyncio
-from typing import Dict, Set, Optional
+import logging
+from typing import Dict, Set, Optional, Tuple
 from fastapi import WebSocket
 from app.core.redis import get_redis_sync

+logger = logging.getLogger(__name__)
+

 class ConnectionManager:
    """Manager for WebSocket connections."""

    def __init__(self):
-        # user_id -> set of WebSocket connections
+        # user_id -> set of WebSocket connections (for notifications)
        self.active_connections: Dict[str, Set[WebSocket]] = {}
+        # project_id -> set of (user_id, WebSocket) tuples (for project sync)
+        self.project_connections: Dict[str, Set[Tuple[str, WebSocket]]] = {}
        self._lock = asyncio.Lock()
+        self._project_lock = asyncio.Lock()

    async def connect(self, websocket: WebSocket, user_id: str):
        """Accept and track a new WebSocket connection."""
@@ -52,6 +58,93 @@ class ConnectionManager:
        """Check if a user has any active connections."""
        return user_id in self.active_connections and len(self.active_connections[user_id]) > 0

+    # Project room management methods
+
+    async def join_project(self, websocket: WebSocket, user_id: str, project_id: str):
+        """
+        Add user to a project room for real-time task sync.
+
+        Args:
+            websocket: The WebSocket connection
+            user_id: The user's ID
+            project_id: The project to join
+        """
+        async with self._project_lock:
+            if project_id not in self.project_connections:
+                self.project_connections[project_id] = set()
+            self.project_connections[project_id].add((user_id, websocket))
+            logger.debug(f"User {user_id} joined project room {project_id}")
+
+    async def leave_project(self, websocket: WebSocket, user_id: str, project_id: str):
+        """
+        Remove user from a project room.
+
+        Args:
+            websocket: The WebSocket connection
+            user_id: The user's ID
+            project_id: The project to leave
+        """
+        async with self._project_lock:
+            if project_id in self.project_connections:
+                self.project_connections[project_id].discard((user_id, websocket))
+                if not self.project_connections[project_id]:
+                    del self.project_connections[project_id]
+                logger.debug(f"User {user_id} left project room {project_id}")
+
+    async def broadcast_to_project(
+        self,
+        project_id: str,
+        message: dict,
+        exclude_user_id: Optional[str] = None
+    ):
+        """
+        Broadcast message to all users in a project room.
+
+        Args:
+            project_id: The project room to broadcast to
+            message: The message to send
+            exclude_user_id: Optional user ID to exclude from broadcast (e.g., the sender)
+        """
+        # Create snapshot while holding lock to prevent race condition
+        async with self._project_lock:
+            if project_id not in self.project_connections:
+                return
+            connections_snapshot = list(self.project_connections[project_id])
+
+        disconnected = set()
+        for user_id, websocket in connections_snapshot:
+            # Skip excluded user (sender)
+            if exclude_user_id and user_id == exclude_user_id:
+                continue
+
+            try:
+                await websocket.send_json(message)
+            except Exception as e:
+                logger.warning(f"Failed to send message to user {user_id} in project {project_id}: {e}")
+                disconnected.add((user_id, websocket))
+
+        # Clean up disconnected connections
+        if disconnected:
+            async with self._project_lock:
+                for conn in disconnected:
+                    if project_id in self.project_connections:
+                        self.project_connections[project_id].discard(conn)
+                        if not self.project_connections[project_id]:
+                            del self.project_connections[project_id]
+
+    def get_project_user_count(self, project_id: str) -> int:
+        """Get the number of unique users in a project room."""
+        if project_id not in self.project_connections:
+            return 0
+        unique_users = set(user_id for user_id, _ in self.project_connections[project_id])
+        return len(unique_users)
+
+    def is_user_in_project(self, user_id: str, project_id: str) -> bool:
+        """Check if a user has any active connections to a project room."""
+        if project_id not in self.project_connections:
+            return False
+        return any(uid == user_id for uid, _ in self.project_connections[project_id])
+

 # Global connection manager instance
 manager = ConnectionManager()
--- a/backend/app/services/workload_cache.py
+++ b/backend/app/services/workload_cache.py
@@ -155,9 +155,19 @@ def invalidate_user_workload_cache(user_id: str) -> None:
    """
    Invalidate all cached workload data for a user.

+    This clears:
+    1. User-specific workload cache (workload:user:{user_id}:*)
+    2. All heatmap caches (workload:heatmap:*) since heatmap aggregates may include this user
+
    Note: This uses pattern matching which may be slow for large datasets.
    For Phase 1, we rely on TTL expiration instead of active invalidation.
    """
-    pattern = f"workload:*:{user_id}:*"
-    for key in redis_client.scan_iter(match=pattern):
+    # Clear user-specific workload cache
+    user_pattern = f"workload:user:{user_id}:*"
+    for key in redis_client.scan_iter(match=user_pattern):
+        redis_client.delete(key)
+
+    # Clear all heatmap caches since they aggregate user data
+    heatmap_pattern = "workload:heatmap:*"
+    for key in redis_client.scan_iter(match=heatmap_pattern):
        redis_client.delete(key)