feat: implement 8 OpenSpec proposals for security, reliability, and UX improvements

## Security Enhancements (P0)
- Add input validation with max_length and numeric range constraints
- Implement WebSocket token authentication via first message
- Add path traversal prevention in file storage service

## Permission Enhancements (P0)
- Add project member management for cross-department access
- Implement is_department_manager flag for workload visibility

## Cycle Detection (P0)
- Add DFS-based cycle detection for task dependencies
- Add formula field circular reference detection
- Display user-friendly cycle path visualization

## Concurrency & Reliability (P1)
- Implement optimistic locking with version field (409 Conflict on mismatch)
- Add trigger retry mechanism with exponential backoff (1s, 2s, 4s)
- Implement cascade restore for soft-deleted tasks

## Rate Limiting (P1)
- Add tiered rate limits: standard (60/min), sensitive (20/min), heavy (5/min)
- Apply rate limits to tasks, reports, attachments, and comments

## Frontend Improvements (P1)
- Add responsive sidebar with hamburger menu for mobile
- Improve touch-friendly UI with proper tap target sizes
- Complete i18n translations for all components

## Backend Reliability (P2)
- Configure database connection pool (size=10, overflow=20)
- Add Redis fallback mechanism with message queue
- Add blocker check before task deletion

## API Enhancements (P3)
- Add standardized response wrapper utility
- Add /health/ready and /health/live endpoints
- Implement project templates with status/field copying

## Tests Added
- test_input_validation.py - Schema and path traversal tests
- test_concurrency_reliability.py - Optimistic locking and retry tests
- test_backend_reliability.py - Connection pool and Redis tests
- test_api_enhancements.py - Health check and template tests

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
beabigegg
2026-01-10 22:13:43 +08:00
parent 96210c7ad4
commit 3bdc6ff1c9
106 changed files with 9704 additions and 429 deletions

View File

@@ -4,8 +4,10 @@ import re
import asyncio
import logging
import threading
import os
from datetime import datetime, timezone
from typing import List, Optional, Dict, Set
from collections import deque
from sqlalchemy.orm import Session
from sqlalchemy import event
@@ -22,9 +24,152 @@ _pending_publish: Dict[int, List[dict]] = {}
# Track which sessions have handlers registered
_registered_sessions: Set[int] = set()
# Redis fallback queue configuration
REDIS_FALLBACK_MAX_QUEUE_SIZE = int(os.getenv("REDIS_FALLBACK_MAX_QUEUE_SIZE", "1000"))
REDIS_FALLBACK_RETRY_INTERVAL = int(os.getenv("REDIS_FALLBACK_RETRY_INTERVAL", "5")) # seconds
REDIS_FALLBACK_MAX_RETRIES = int(os.getenv("REDIS_FALLBACK_MAX_RETRIES", "10"))
# Redis fallback queue for failed publishes
_redis_fallback_lock = threading.Lock()
_redis_fallback_queue: deque = deque(maxlen=REDIS_FALLBACK_MAX_QUEUE_SIZE)
_redis_retry_timer: Optional[threading.Timer] = None
_redis_available = True
_redis_consecutive_failures = 0
def _add_to_fallback_queue(user_id: str, data: dict, retry_count: int = 0) -> bool:
"""
Add a failed notification to the fallback queue.
Returns True if added successfully, False if queue is full.
"""
global _redis_consecutive_failures
with _redis_fallback_lock:
if len(_redis_fallback_queue) >= REDIS_FALLBACK_MAX_QUEUE_SIZE:
logger.warning(
"Redis fallback queue is full (%d items), dropping notification for user %s",
REDIS_FALLBACK_MAX_QUEUE_SIZE, user_id
)
return False
_redis_fallback_queue.append({
"user_id": user_id,
"data": data,
"retry_count": retry_count,
"queued_at": datetime.now(timezone.utc).isoformat(),
})
_redis_consecutive_failures += 1
queue_size = len(_redis_fallback_queue)
logger.debug("Added notification to fallback queue (size: %d)", queue_size)
# Start retry mechanism if not already running
_ensure_retry_timer_running()
return True
def _ensure_retry_timer_running():
"""Ensure the retry timer is running if there are items in the queue."""
global _redis_retry_timer
if _redis_retry_timer is None or not _redis_retry_timer.is_alive():
_redis_retry_timer = threading.Timer(REDIS_FALLBACK_RETRY_INTERVAL, _process_fallback_queue)
_redis_retry_timer.daemon = True
_redis_retry_timer.start()
def _process_fallback_queue():
"""Process the fallback queue and retry sending notifications to Redis."""
global _redis_available, _redis_consecutive_failures, _redis_retry_timer
items_to_retry = []
with _redis_fallback_lock:
# Get all items from queue
while _redis_fallback_queue:
items_to_retry.append(_redis_fallback_queue.popleft())
if not items_to_retry:
_redis_retry_timer = None
return
logger.info("Processing %d items from Redis fallback queue", len(items_to_retry))
failed_items = []
success_count = 0
for item in items_to_retry:
user_id = item["user_id"]
data = item["data"]
retry_count = item["retry_count"]
if retry_count >= REDIS_FALLBACK_MAX_RETRIES:
logger.warning(
"Notification for user %s exceeded max retries (%d), dropping",
user_id, REDIS_FALLBACK_MAX_RETRIES
)
continue
try:
redis_client = get_redis_sync()
channel = get_channel_name(user_id)
message = json.dumps(data, default=str)
redis_client.publish(channel, message)
success_count += 1
except Exception as e:
logger.debug("Retry failed for user %s: %s", user_id, e)
failed_items.append({
**item,
"retry_count": retry_count + 1,
})
# Re-queue failed items
if failed_items:
with _redis_fallback_lock:
for item in failed_items:
if len(_redis_fallback_queue) < REDIS_FALLBACK_MAX_QUEUE_SIZE:
_redis_fallback_queue.append(item)
# Log recovery if we had successes
if success_count > 0:
with _redis_fallback_lock:
_redis_consecutive_failures = 0
if not _redis_fallback_queue:
_redis_available = True
logger.info(
"Redis connection recovered. Successfully processed %d notifications from fallback queue",
success_count
)
# Schedule next retry if queue is not empty
with _redis_fallback_lock:
if _redis_fallback_queue:
_redis_retry_timer = threading.Timer(REDIS_FALLBACK_RETRY_INTERVAL, _process_fallback_queue)
_redis_retry_timer.daemon = True
_redis_retry_timer.start()
else:
_redis_retry_timer = None
def get_redis_fallback_status() -> dict:
"""Get current Redis fallback queue status for health checks."""
with _redis_fallback_lock:
return {
"queue_size": len(_redis_fallback_queue),
"max_queue_size": REDIS_FALLBACK_MAX_QUEUE_SIZE,
"redis_available": _redis_available,
"consecutive_failures": _redis_consecutive_failures,
"retry_interval_seconds": REDIS_FALLBACK_RETRY_INTERVAL,
"max_retries": REDIS_FALLBACK_MAX_RETRIES,
}
def _sync_publish(user_id: str, data: dict):
"""Sync fallback to publish notification via Redis when no event loop available."""
global _redis_available
try:
redis_client = get_redis_sync()
channel = get_channel_name(user_id)
@@ -33,6 +178,10 @@ def _sync_publish(user_id: str, data: dict):
logger.debug(f"Sync published notification to channel {channel}")
except Exception as e:
logger.error(f"Failed to sync publish notification to Redis: {e}")
# Add to fallback queue for retry
with _redis_fallback_lock:
_redis_available = False
_add_to_fallback_queue(user_id, data)
def _cleanup_session(session_id: int, remove_registration: bool = True):
@@ -86,10 +235,16 @@ def _register_session_handlers(db: Session, session_id: int):
async def _async_publish(user_id: str, data: dict):
"""Async helper to publish notification to Redis."""
global _redis_available
try:
await redis_publish(user_id, data)
except Exception as e:
logger.error(f"Failed to publish notification to Redis: {e}")
# Add to fallback queue for retry
with _redis_fallback_lock:
_redis_available = False
_add_to_fallback_queue(user_id, data)
class NotificationService: