Implemented proposals from comprehensive QA review: 1. extend-csrf-protection - Add POST to CSRF protected methods in frontend - Global CSRF middleware for all state-changing operations - Update tests with CSRF token fixtures 2. tighten-cors-websocket-security - Replace wildcard CORS with explicit method/header lists - Disable query parameter auth in production (code 4002) - Add per-user WebSocket connection limit (max 5, code 4005) 3. shorten-jwt-expiry - Reduce JWT expiry from 7 days to 60 minutes - Add refresh token support with 7-day expiry - Implement token rotation on refresh - Frontend auto-refresh when token near expiry (<5 min) 4. fix-frontend-quality - Add React.lazy() code splitting for all pages - Fix useCallback dependency arrays (Dashboard, Comments) - Add localStorage data validation in AuthContext - Complete i18n for AttachmentUpload component 5. enhance-backend-validation - Add SecurityAuditMiddleware for access denied logging - Add ErrorSanitizerMiddleware for production error messages - Protect /health/detailed with admin authentication - Add input length validation (comment 5000, desc 10000) All 521 backend tests passing. Frontend builds successfully. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
188 lines
6.4 KiB
Python
188 lines
6.4 KiB
Python
"""Error message sanitization middleware for production environments.
|
|
|
|
This middleware intercepts error responses and sanitizes them to prevent
|
|
information disclosure in production environments. Detailed error messages
|
|
are only shown when DEBUG mode is enabled.
|
|
"""
|
|
import json
|
|
import logging
|
|
from typing import Optional
|
|
from starlette.middleware.base import BaseHTTPMiddleware
|
|
from starlette.requests import Request
|
|
from starlette.responses import Response, JSONResponse
|
|
|
|
from app.core.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Generic error messages for production
|
|
GENERIC_ERROR_MESSAGES = {
|
|
400: "Bad Request",
|
|
401: "Authentication required",
|
|
403: "Access denied",
|
|
404: "Resource not found",
|
|
405: "Method not allowed",
|
|
409: "Request conflict",
|
|
422: "Validation error",
|
|
429: "Too many requests",
|
|
500: "Internal server error",
|
|
502: "Service unavailable",
|
|
503: "Service temporarily unavailable",
|
|
504: "Request timeout",
|
|
}
|
|
|
|
# Status codes that should preserve their original message even in production
|
|
# These are typically user-facing validation errors that don't leak sensitive info
|
|
PRESERVE_MESSAGE_CODES = {
|
|
400, # Bad request - users need to know what's wrong with their request
|
|
401, # Unauthorized - users need to know why auth failed
|
|
403, # Forbidden - users need to know what permission they lack
|
|
404, # Not found - usually safe to preserve
|
|
409, # Conflict - users need to know about conflicts
|
|
422, # Validation errors - users need to know what to fix
|
|
}
|
|
|
|
# Patterns that indicate sensitive information in error messages
|
|
SENSITIVE_PATTERNS = [
|
|
"traceback",
|
|
"stack trace",
|
|
"file path",
|
|
"/usr/",
|
|
"/var/",
|
|
"/home/",
|
|
"connection refused",
|
|
"connection error",
|
|
"timeout connecting",
|
|
"database error",
|
|
"sql",
|
|
"query failed",
|
|
"password",
|
|
"secret",
|
|
"token",
|
|
"key=",
|
|
"credentials",
|
|
".py line",
|
|
"exception in",
|
|
]
|
|
|
|
|
|
def _contains_sensitive_info(message: str) -> bool:
|
|
"""Check if an error message contains potentially sensitive information."""
|
|
if not message:
|
|
return False
|
|
message_lower = message.lower()
|
|
return any(pattern.lower() in message_lower for pattern in SENSITIVE_PATTERNS)
|
|
|
|
|
|
def _sanitize_detail(detail: any, status_code: int) -> any:
|
|
"""Sanitize error detail, removing sensitive information in production.
|
|
|
|
Args:
|
|
detail: The error detail (can be string, list, or dict)
|
|
status_code: The HTTP status code
|
|
|
|
Returns:
|
|
Sanitized detail for production, or original detail for debug mode
|
|
"""
|
|
# In debug mode, return original detail
|
|
if settings.DEBUG:
|
|
return detail
|
|
|
|
# For preserved status codes, keep the detail if it doesn't contain sensitive info
|
|
if status_code in PRESERVE_MESSAGE_CODES:
|
|
if isinstance(detail, str) and not _contains_sensitive_info(detail):
|
|
return detail
|
|
if isinstance(detail, list):
|
|
# For validation errors (list of dicts), keep the structure but sanitize
|
|
sanitized = []
|
|
for item in detail:
|
|
if isinstance(item, dict):
|
|
# Keep loc, msg, type for pydantic validation errors
|
|
sanitized_item = {}
|
|
if 'loc' in item:
|
|
sanitized_item['loc'] = item['loc']
|
|
if 'msg' in item and not _contains_sensitive_info(str(item['msg'])):
|
|
sanitized_item['msg'] = item['msg']
|
|
else:
|
|
sanitized_item['msg'] = 'Validation failed'
|
|
if 'type' in item:
|
|
sanitized_item['type'] = item['type']
|
|
sanitized.append(sanitized_item)
|
|
else:
|
|
sanitized.append(item if not _contains_sensitive_info(str(item)) else 'Invalid value')
|
|
return sanitized
|
|
return detail
|
|
|
|
# For other status codes, use generic message
|
|
return GENERIC_ERROR_MESSAGES.get(status_code, "An error occurred")
|
|
|
|
|
|
class ErrorSanitizerMiddleware(BaseHTTPMiddleware):
|
|
"""Middleware to sanitize error responses in production.
|
|
|
|
This middleware:
|
|
1. Intercepts error responses (4xx and 5xx status codes)
|
|
2. Parses JSON response bodies
|
|
3. Sanitizes the 'detail' field to remove sensitive information
|
|
4. Returns the sanitized response
|
|
|
|
In DEBUG mode, original error messages are preserved for development.
|
|
"""
|
|
|
|
async def dispatch(self, request: Request, call_next) -> Response:
|
|
response = await call_next(request)
|
|
|
|
# Only process error responses with JSON content
|
|
if response.status_code < 400:
|
|
return response
|
|
|
|
content_type = response.headers.get("content-type", "")
|
|
if "application/json" not in content_type:
|
|
return response
|
|
|
|
# Read the response body
|
|
body = b""
|
|
async for chunk in response.body_iterator:
|
|
body += chunk
|
|
|
|
if not body:
|
|
return response
|
|
|
|
try:
|
|
data = json.loads(body)
|
|
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
# Not valid JSON, return as-is
|
|
return Response(
|
|
content=body,
|
|
status_code=response.status_code,
|
|
headers=dict(response.headers),
|
|
media_type=response.media_type,
|
|
)
|
|
|
|
# Sanitize the detail field if present
|
|
if "detail" in data:
|
|
original_detail = data["detail"]
|
|
data["detail"] = _sanitize_detail(original_detail, response.status_code)
|
|
|
|
# Log the original error in production for debugging
|
|
if not settings.DEBUG and original_detail != data["detail"]:
|
|
logger.warning(
|
|
"Sanitized error response",
|
|
extra={
|
|
"status_code": response.status_code,
|
|
"path": str(request.url.path),
|
|
"method": request.method,
|
|
"original_detail_length": len(str(original_detail)),
|
|
}
|
|
)
|
|
|
|
# Return the sanitized response
|
|
return JSONResponse(
|
|
content=data,
|
|
status_code=response.status_code,
|
|
headers={
|
|
k: v for k, v in response.headers.items()
|
|
if k.lower() not in ("content-length", "content-type")
|
|
},
|
|
)
|