feat: implement 5 QA-driven security and quality proposals

Implemented proposals from comprehensive QA review: 1. extend-csrf-protection - Add POST to CSRF protected methods in frontend - Global CSRF middleware for all state-changing operations - Update tests with CSRF token fixtures 2. tighten-cors-websocket-security - Replace wildcard CORS with explicit method/header lists - Disable query parameter auth in production (code 4002) - Add per-user WebSocket connection limit (max 5, code 4005) 3. shorten-jwt-expiry - Reduce JWT expiry from 7 days to 60 minutes - Add refresh token support with 7-day expiry - Implement token rotation on refresh - Frontend auto-refresh when token near expiry (<5 min) 4. fix-frontend-quality - Add React.lazy() code splitting for all pages - Fix useCallback dependency arrays (Dashboard, Comments) - Add localStorage data validation in AuthContext - Complete i18n for AttachmentUpload component 5. enhance-backend-validation - Add SecurityAuditMiddleware for access denied logging - Add ErrorSanitizerMiddleware for production error messages - Protect /health/detailed with admin authentication - Add input length validation (comment 5000, desc 10000) All 521 backend tests passing. Frontend builds successfully. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 23:19:05 +08:00
parent df50d5e7f8
commit 35c90fe76b
48 changed files with 2132 additions and 403 deletions
--- a/backend/app/middleware/error_sanitizer.py
+++ b/backend/app/middleware/error_sanitizer.py
@@ -0,0 +1,187 @@
+"""Error message sanitization middleware for production environments.
+
+This middleware intercepts error responses and sanitizes them to prevent
+information disclosure in production environments. Detailed error messages
+are only shown when DEBUG mode is enabled.
+"""
+import json
+import logging
+from typing import Optional
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response, JSONResponse
+
+from app.core.config import settings
+
+logger = logging.getLogger(__name__)
+
+# Generic error messages for production
+GENERIC_ERROR_MESSAGES = {
+    400: "Bad Request",
+    401: "Authentication required",
+    403: "Access denied",
+    404: "Resource not found",
+    405: "Method not allowed",
+    409: "Request conflict",
+    422: "Validation error",
+    429: "Too many requests",
+    500: "Internal server error",
+    502: "Service unavailable",
+    503: "Service temporarily unavailable",
+    504: "Request timeout",
+}
+
+# Status codes that should preserve their original message even in production
+# These are typically user-facing validation errors that don't leak sensitive info
+PRESERVE_MESSAGE_CODES = {
+    400,  # Bad request - users need to know what's wrong with their request
+    401,  # Unauthorized - users need to know why auth failed
+    403,  # Forbidden - users need to know what permission they lack
+    404,  # Not found - usually safe to preserve
+    409,  # Conflict - users need to know about conflicts
+    422,  # Validation errors - users need to know what to fix
+}
+
+# Patterns that indicate sensitive information in error messages
+SENSITIVE_PATTERNS = [
+    "traceback",
+    "stack trace",
+    "file path",
+    "/usr/",
+    "/var/",
+    "/home/",
+    "connection refused",
+    "connection error",
+    "timeout connecting",
+    "database error",
+    "sql",
+    "query failed",
+    "password",
+    "secret",
+    "token",
+    "key=",
+    "credentials",
+    ".py line",
+    "exception in",
+]
+
+
+def _contains_sensitive_info(message: str) -> bool:
+    """Check if an error message contains potentially sensitive information."""
+    if not message:
+        return False
+    message_lower = message.lower()
+    return any(pattern.lower() in message_lower for pattern in SENSITIVE_PATTERNS)
+
+
+def _sanitize_detail(detail: any, status_code: int) -> any:
+    """Sanitize error detail, removing sensitive information in production.
+
+    Args:
+        detail: The error detail (can be string, list, or dict)
+        status_code: The HTTP status code
+
+    Returns:
+        Sanitized detail for production, or original detail for debug mode
+    """
+    # In debug mode, return original detail
+    if settings.DEBUG:
+        return detail
+
+    # For preserved status codes, keep the detail if it doesn't contain sensitive info
+    if status_code in PRESERVE_MESSAGE_CODES:
+        if isinstance(detail, str) and not _contains_sensitive_info(detail):
+            return detail
+        if isinstance(detail, list):
+            # For validation errors (list of dicts), keep the structure but sanitize
+            sanitized = []
+            for item in detail:
+                if isinstance(item, dict):
+                    # Keep loc, msg, type for pydantic validation errors
+                    sanitized_item = {}
+                    if 'loc' in item:
+                        sanitized_item['loc'] = item['loc']
+                    if 'msg' in item and not _contains_sensitive_info(str(item['msg'])):
+                        sanitized_item['msg'] = item['msg']
+                    else:
+                        sanitized_item['msg'] = 'Validation failed'
+                    if 'type' in item:
+                        sanitized_item['type'] = item['type']
+                    sanitized.append(sanitized_item)
+                else:
+                    sanitized.append(item if not _contains_sensitive_info(str(item)) else 'Invalid value')
+            return sanitized
+        return detail
+
+    # For other status codes, use generic message
+    return GENERIC_ERROR_MESSAGES.get(status_code, "An error occurred")
+
+
+class ErrorSanitizerMiddleware(BaseHTTPMiddleware):
+    """Middleware to sanitize error responses in production.
+
+    This middleware:
+    1. Intercepts error responses (4xx and 5xx status codes)
+    2. Parses JSON response bodies
+    3. Sanitizes the 'detail' field to remove sensitive information
+    4. Returns the sanitized response
+
+    In DEBUG mode, original error messages are preserved for development.
+    """
+
+    async def dispatch(self, request: Request, call_next) -> Response:
+        response = await call_next(request)
+
+        # Only process error responses with JSON content
+        if response.status_code < 400:
+            return response
+
+        content_type = response.headers.get("content-type", "")
+        if "application/json" not in content_type:
+            return response
+
+        # Read the response body
+        body = b""
+        async for chunk in response.body_iterator:
+            body += chunk
+
+        if not body:
+            return response
+
+        try:
+            data = json.loads(body)
+        except (json.JSONDecodeError, UnicodeDecodeError):
+            # Not valid JSON, return as-is
+            return Response(
+                content=body,
+                status_code=response.status_code,
+                headers=dict(response.headers),
+                media_type=response.media_type,
+            )
+
+        # Sanitize the detail field if present
+        if "detail" in data:
+            original_detail = data["detail"]
+            data["detail"] = _sanitize_detail(original_detail, response.status_code)
+
+            # Log the original error in production for debugging
+            if not settings.DEBUG and original_detail != data["detail"]:
+                logger.warning(
+                    "Sanitized error response",
+                    extra={
+                        "status_code": response.status_code,
+                        "path": str(request.url.path),
+                        "method": request.method,
+                        "original_detail_length": len(str(original_detail)),
+                    }
+                )
+
+        # Return the sanitized response
+        return JSONResponse(
+            content=data,
+            status_code=response.status_code,
+            headers={
+                k: v for k, v in response.headers.items()
+                if k.lower() not in ("content-length", "content-type")
+            },
+        )