feat: implement 8 OpenSpec proposals for security, reliability, and UX improvements

## Security Enhancements (P0) - Add input validation with max_length and numeric range constraints - Implement WebSocket token authentication via first message - Add path traversal prevention in file storage service ## Permission Enhancements (P0) - Add project member management for cross-department access - Implement is_department_manager flag for workload visibility ## Cycle Detection (P0) - Add DFS-based cycle detection for task dependencies - Add formula field circular reference detection - Display user-friendly cycle path visualization ## Concurrency & Reliability (P1) - Implement optimistic locking with version field (409 Conflict on mismatch) - Add trigger retry mechanism with exponential backoff (1s, 2s, 4s) - Implement cascade restore for soft-deleted tasks ## Rate Limiting (P1) - Add tiered rate limits: standard (60/min), sensitive (20/min), heavy (5/min) - Apply rate limits to tasks, reports, attachments, and comments ## Frontend Improvements (P1) - Add responsive sidebar with hamburger menu for mobile - Improve touch-friendly UI with proper tap target sizes - Complete i18n translations for all components ## Backend Reliability (P2) - Configure database connection pool (size=10, overflow=20) - Add Redis fallback mechanism with message queue - Add blocker check before task deletion ## API Enhancements (P3) - Add standardized response wrapper utility - Add /health/ready and /health/live endpoints - Implement project templates with status/field copying ## Tests Added - test_input_validation.py - Schema and path traversal tests - test_concurrency_reliability.py - Optimistic locking and retry tests - test_backend_reliability.py - Connection pool and Redis tests - test_api_enhancements.py - Health check and template tests Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 22:13:43 +08:00
parent 96210c7ad4
commit 3bdc6ff1c9
106 changed files with 9704 additions and 429 deletions
--- a/backend/app/services/notification_service.py
+++ b/backend/app/services/notification_service.py
@@ -4,8 +4,10 @@ import re
 import asyncio
 import logging
 import threading
+import os
 from datetime import datetime, timezone
 from typing import List, Optional, Dict, Set
+from collections import deque
 from sqlalchemy.orm import Session
 from sqlalchemy import event

@@ -22,9 +24,152 @@ _pending_publish: Dict[int, List[dict]] = {}
 # Track which sessions have handlers registered
 _registered_sessions: Set[int] = set()

+# Redis fallback queue configuration
+REDIS_FALLBACK_MAX_QUEUE_SIZE = int(os.getenv("REDIS_FALLBACK_MAX_QUEUE_SIZE", "1000"))
+REDIS_FALLBACK_RETRY_INTERVAL = int(os.getenv("REDIS_FALLBACK_RETRY_INTERVAL", "5"))  # seconds
+REDIS_FALLBACK_MAX_RETRIES = int(os.getenv("REDIS_FALLBACK_MAX_RETRIES", "10"))
+
+# Redis fallback queue for failed publishes
+_redis_fallback_lock = threading.Lock()
+_redis_fallback_queue: deque = deque(maxlen=REDIS_FALLBACK_MAX_QUEUE_SIZE)
+_redis_retry_timer: Optional[threading.Timer] = None
+_redis_available = True
+_redis_consecutive_failures = 0
+
+
+def _add_to_fallback_queue(user_id: str, data: dict, retry_count: int = 0) -> bool:
+    """
+    Add a failed notification to the fallback queue.
+
+    Returns True if added successfully, False if queue is full.
+    """
+    global _redis_consecutive_failures
+
+    with _redis_fallback_lock:
+        if len(_redis_fallback_queue) >= REDIS_FALLBACK_MAX_QUEUE_SIZE:
+            logger.warning(
+                "Redis fallback queue is full (%d items), dropping notification for user %s",
+                REDIS_FALLBACK_MAX_QUEUE_SIZE, user_id
+            )
+            return False
+
+        _redis_fallback_queue.append({
+            "user_id": user_id,
+            "data": data,
+            "retry_count": retry_count,
+            "queued_at": datetime.now(timezone.utc).isoformat(),
+        })
+        _redis_consecutive_failures += 1
+
+        queue_size = len(_redis_fallback_queue)
+        logger.debug("Added notification to fallback queue (size: %d)", queue_size)
+
+        # Start retry mechanism if not already running
+        _ensure_retry_timer_running()
+
+        return True
+
+
+def _ensure_retry_timer_running():
+    """Ensure the retry timer is running if there are items in the queue."""
+    global _redis_retry_timer
+
+    if _redis_retry_timer is None or not _redis_retry_timer.is_alive():
+        _redis_retry_timer = threading.Timer(REDIS_FALLBACK_RETRY_INTERVAL, _process_fallback_queue)
+        _redis_retry_timer.daemon = True
+        _redis_retry_timer.start()
+
+
+def _process_fallback_queue():
+    """Process the fallback queue and retry sending notifications to Redis."""
+    global _redis_available, _redis_consecutive_failures, _redis_retry_timer
+
+    items_to_retry = []
+
+    with _redis_fallback_lock:
+        # Get all items from queue
+        while _redis_fallback_queue:
+            items_to_retry.append(_redis_fallback_queue.popleft())
+
+    if not items_to_retry:
+        _redis_retry_timer = None
+        return
+
+    logger.info("Processing %d items from Redis fallback queue", len(items_to_retry))
+
+    failed_items = []
+    success_count = 0
+
+    for item in items_to_retry:
+        user_id = item["user_id"]
+        data = item["data"]
+        retry_count = item["retry_count"]
+
+        if retry_count >= REDIS_FALLBACK_MAX_RETRIES:
+            logger.warning(
+                "Notification for user %s exceeded max retries (%d), dropping",
+                user_id, REDIS_FALLBACK_MAX_RETRIES
+            )
+            continue
+
+        try:
+            redis_client = get_redis_sync()
+            channel = get_channel_name(user_id)
+            message = json.dumps(data, default=str)
+            redis_client.publish(channel, message)
+            success_count += 1
+        except Exception as e:
+            logger.debug("Retry failed for user %s: %s", user_id, e)
+            failed_items.append({
+                **item,
+                "retry_count": retry_count + 1,
+            })
+
+    # Re-queue failed items
+    if failed_items:
+        with _redis_fallback_lock:
+            for item in failed_items:
+                if len(_redis_fallback_queue) < REDIS_FALLBACK_MAX_QUEUE_SIZE:
+                    _redis_fallback_queue.append(item)
+
+    # Log recovery if we had successes
+    if success_count > 0:
+        with _redis_fallback_lock:
+            _redis_consecutive_failures = 0
+            if not _redis_fallback_queue:
+                _redis_available = True
+                logger.info(
+                    "Redis connection recovered. Successfully processed %d notifications from fallback queue",
+                    success_count
+                )
+
+    # Schedule next retry if queue is not empty
+    with _redis_fallback_lock:
+        if _redis_fallback_queue:
+            _redis_retry_timer = threading.Timer(REDIS_FALLBACK_RETRY_INTERVAL, _process_fallback_queue)
+            _redis_retry_timer.daemon = True
+            _redis_retry_timer.start()
+        else:
+            _redis_retry_timer = None
+
+
+def get_redis_fallback_status() -> dict:
+    """Get current Redis fallback queue status for health checks."""
+    with _redis_fallback_lock:
+        return {
+            "queue_size": len(_redis_fallback_queue),
+            "max_queue_size": REDIS_FALLBACK_MAX_QUEUE_SIZE,
+            "redis_available": _redis_available,
+            "consecutive_failures": _redis_consecutive_failures,
+            "retry_interval_seconds": REDIS_FALLBACK_RETRY_INTERVAL,
+            "max_retries": REDIS_FALLBACK_MAX_RETRIES,
+        }
+

 def _sync_publish(user_id: str, data: dict):
    """Sync fallback to publish notification via Redis when no event loop available."""
+    global _redis_available
+
    try:
        redis_client = get_redis_sync()
        channel = get_channel_name(user_id)
@@ -33,6 +178,10 @@ def _sync_publish(user_id: str, data: dict):
        logger.debug(f"Sync published notification to channel {channel}")
    except Exception as e:
        logger.error(f"Failed to sync publish notification to Redis: {e}")
+        # Add to fallback queue for retry
+        with _redis_fallback_lock:
+            _redis_available = False
+        _add_to_fallback_queue(user_id, data)


 def _cleanup_session(session_id: int, remove_registration: bool = True):
@@ -86,10 +235,16 @@ def _register_session_handlers(db: Session, session_id: int):

 async def _async_publish(user_id: str, data: dict):
    """Async helper to publish notification to Redis."""
+    global _redis_available
+
    try:
        await redis_publish(user_id, data)
    except Exception as e:
        logger.error(f"Failed to publish notification to Redis: {e}")
+        # Add to fallback queue for retry
+        with _redis_fallback_lock:
+            _redis_available = False
+        _add_to_fallback_queue(user_id, data)


 class NotificationService: