Error Handling & Monitoring

Comprehensive error handling strategies, monitoring approaches, and observability patterns for system reliability.

Error Handling & Monitoring

Robust error handling and comprehensive monitoring are essential for maintaining system reliability and providing excellent user experience.

Error Handling Architecture

Error Classification

System Errors

  • Database Errors: Connection failures, query errors, constraint violations
  • Network Errors: API timeouts, connection refused, DNS failures
  • Infrastructure Errors: Memory exhaustion, disk space, service unavailability
  • Configuration Errors: Missing environment variables, invalid settings

Business Errors

  • Validation Errors: Invalid input data, business rule violations
  • Authorization Errors: Insufficient permissions, expired tokens
  • Resource Errors: Not found, already exists, conflict states
  • Workflow Errors: Invalid state transitions, dependency failures

User Errors

  • Input Errors: Malformed data, missing required fields
  • Authentication Errors: Invalid credentials, expired sessions
  • Permission Errors: Access denied, insufficient privileges
  • Usage Errors: Rate limiting, quota exceeded

Error Types and Classes

// Base error class
export abstract class AppError extends Error {
  abstract readonly statusCode: number
  abstract readonly isOperational: boolean
  abstract readonly errorCode: string
  
  constructor(message: string, public readonly cause?: Error) {
    super(message)
    this.name = this.constructor.name
    
    if (cause) {
      this.stack = `${this.stack}\nCaused by: ${cause.stack}`
    }
  }
}

// Business logic errors
export class ValidationError extends AppError {
  readonly statusCode = 400
  readonly isOperational = true
  readonly errorCode = 'VALIDATION_ERROR'
  
  constructor(message: string, public readonly field?: string) {
    super(message)
  }
}

export class BusinessError extends AppError {
  readonly statusCode = 422
  readonly isOperational = true
  readonly errorCode = 'BUSINESS_ERROR'
}

export class NotFoundError extends AppError {
  readonly statusCode = 404
  readonly isOperational = true
  readonly errorCode = 'NOT_FOUND'
}

export class UnauthorizedError extends AppError {
  readonly statusCode = 401
  readonly isOperational = true
  readonly errorCode = 'UNAUTHORIZED'
}

export class ForbiddenError extends AppError {
  readonly statusCode = 403
  readonly isOperational = true
  readonly errorCode = 'FORBIDDEN'
}

// System errors
export class DatabaseError extends AppError {
  readonly statusCode = 500
  readonly isOperational = true
  readonly errorCode = 'DATABASE_ERROR'
}

export class ExternalServiceError extends AppError {
  readonly statusCode = 502
  readonly isOperational = true
  readonly errorCode = 'EXTERNAL_SERVICE_ERROR'
  
  constructor(message: string, public readonly service: string, cause?: Error) {
    super(message, cause)
  }
}

export class InternalError extends AppError {
  readonly statusCode = 500
  readonly isOperational = false
  readonly errorCode = 'INTERNAL_ERROR'
}

Client-Side Error Handling

React Error Boundaries

// Error boundary component
interface ErrorBoundaryState {
  hasError: boolean
  error?: Error
  errorInfo?: ErrorInfo
}

export class ErrorBoundary extends Component<
  { children: ReactNode; fallback?: ComponentType<ErrorBoundaryProps> },
  ErrorBoundaryState
> {
  constructor(props: any) {
    super(props)
    this.state = { hasError: false }
  }
  
  static getDerivedStateFromError(error: Error): ErrorBoundaryState {
    return {
      hasError: true,
      error
    }
  }
  
  componentDidCatch(error: Error, errorInfo: ErrorInfo) {
    // Log error to monitoring service
    logger.error('React Error Boundary caught error', {
      error: error.message,
      stack: error.stack,
      componentStack: errorInfo.componentStack,
      errorBoundary: this.constructor.name
    })
    
    // Report to error tracking service
    errorTracker.captureException(error, {
      tags: { component: 'error-boundary' },
      extra: { errorInfo }
    })
    
    this.setState({
      hasError: true,
      error,
      errorInfo
    })
  }
  
  render() {
    if (this.state.hasError) {
      const FallbackComponent = this.props.fallback || DefaultErrorFallback
      
      return (
        <FallbackComponent
          error={this.state.error}
          errorInfo={this.state.errorInfo}
          resetError={() => this.setState({ hasError: false })}
        />
      )
    }
    
    return this.props.children
  }
}

// Error fallback component
interface ErrorBoundaryProps {
  error?: Error
  errorInfo?: ErrorInfo
  resetError: () => void
}

export function DefaultErrorFallback({ error, resetError }: ErrorBoundaryProps) {
  return (
    <div className="error-boundary">
      <h2>Something went wrong</h2>
      <p>We apologize for the inconvenience. The error has been reported.</p>
      
      {process.env.NODE_ENV === 'development' && (
        <details className="error-details">
          <summary>Error Details (Development Only)</summary>
          <pre>{error?.message}</pre>
          <pre>{error?.stack}</pre>
        </details>
      )}
      
      <div className="error-actions">
        <button onClick={resetError}>Try Again</button>
        <button onClick={() => window.location.reload()}>Refresh Page</button>
      </div>
    </div>
  )
}

Global Error Handling

// Global error handler for unhandled errors
export class GlobalErrorHandler {
  static initialize() {
    // Handle unhandled promise rejections
    window.addEventListener('unhandledrejection', (event) => {
      logger.error('Unhandled promise rejection', {
        reason: event.reason,
        stack: event.reason?.stack
      })
      
      errorTracker.captureException(event.reason, {
        tags: { type: 'unhandled-promise' }
      })
      
      // Prevent console error
      event.preventDefault()
    })
    
    // Handle global JavaScript errors
    window.addEventListener('error', (event) => {
      logger.error('Global JavaScript error', {
        message: event.message,
        filename: event.filename,
        lineno: event.lineno,
        colno: event.colno,
        stack: event.error?.stack
      })
      
      errorTracker.captureException(event.error, {
        tags: { type: 'global-error' }
      })
    })
    
    // Handle React hydration errors
    if (typeof window !== 'undefined') {
      const originalConsoleError = console.error
      console.error = (...args) => {
        if (args[0]?.includes?.('Hydration')) {
          logger.error('React hydration error', { args })
          errorTracker.captureMessage('Hydration error detected', {
            level: 'error',
            extra: { args }
          })
        }
        originalConsoleError.apply(console, args)
      }
    }
  }
}

API Error Handling

// Centralized API error handling
export class ApiClient {
  private baseURL: string
  private timeout: number
  
  constructor(baseURL: string, timeout = 30000) {
    this.baseURL = baseURL
    this.timeout = timeout
  }
  
  async request<T>(endpoint: string, options: RequestInit = {}): Promise<T> {
    const url = `${this.baseURL}${endpoint}`
    const requestId = generateRequestId()
    
    try {
      const controller = new AbortController()
      const timeoutId = setTimeout(() => controller.abort(), this.timeout)
      
      const response = await fetch(url, {
        ...options,
        signal: controller.signal,
        headers: {
          'Content-Type': 'application/json',
          'X-Request-ID': requestId,
          ...options.headers
        }
      })
      
      clearTimeout(timeoutId)
      
      if (!response.ok) {
        await this.handleErrorResponse(response, requestId)
      }
      
      return await response.json()
      
    } catch (error) {
      if (error instanceof DOMException && error.name === 'AbortError') {
        throw new ApiTimeoutError(`Request timeout after ${this.timeout}ms`)
      }
      
      if (error instanceof TypeError && error.message.includes('fetch')) {
        throw new NetworkError('Network request failed')
      }
      
      throw error
    }
  }
  
  private async handleErrorResponse(response: Response, requestId: string) {
    let errorData: any
    
    try {
      errorData = await response.json()
    } catch {
      errorData = { message: response.statusText }
    }
    
    const errorInfo = {
      status: response.status,
      statusText: response.statusText,
      requestId,
      url: response.url,
      ...errorData
    }
    
    logger.error('API request failed', errorInfo)
    
    switch (response.status) {
      case 400:
        throw new ValidationError(errorData.message || 'Bad Request')
      case 401:
        throw new UnauthorizedError(errorData.message || 'Unauthorized')
      case 403:
        throw new ForbiddenError(errorData.message || 'Forbidden')
      case 404:
        throw new NotFoundError(errorData.message || 'Not Found')
      case 422:
        throw new BusinessError(errorData.message || 'Unprocessable Entity')
      case 429:
        throw new RateLimitError(errorData.message || 'Too Many Requests')
      case 500:
        throw new InternalServerError(errorData.message || 'Internal Server Error')
      case 502:
        throw new BadGatewayError(errorData.message || 'Bad Gateway')
      case 503:
        throw new ServiceUnavailableError(errorData.message || 'Service Unavailable')
      default:
        throw new ApiError(`HTTP ${response.status}: ${errorData.message}`)
    }
  }
}

Server-Side Error Handling

API Route Error Handling

// Error handling middleware for API routes
export function withErrorHandler<T extends any[]>(
  handler: (...args: T) => Promise<Response>
) {
  return async (...args: T): Promise<Response> => {
    try {
      return await handler(...args)
    } catch (error) {
      return handleApiError(error)
    }
  }
}

export function handleApiError(error: unknown): Response {
  const requestId = generateRequestId()
  
  // Log error
  logger.error('API route error', {
    error: error instanceof Error ? error.message : String(error),
    stack: error instanceof Error ? error.stack : undefined,
    requestId
  })
  
  // Handle known error types
  if (error instanceof AppError) {
    return NextResponse.json({
      success: false,
      error: {
        code: error.errorCode,
        message: error.message,
        ...(error instanceof ValidationError && error.field ? { field: error.field } : {})
      },
      requestId
    }, { status: error.statusCode })
  }
  
  // Handle Supabase errors
  if (error && typeof error === 'object' && 'code' in error) {
    const supabaseError = error as any
    return NextResponse.json({
      success: false,
      error: {
        code: 'DATABASE_ERROR',
        message: 'Database operation failed',
        details: process.env.NODE_ENV === 'development' ? supabaseError.message : undefined
      },
      requestId
    }, { status: 500 })
  }
  
  // Handle unknown errors
  errorTracker.captureException(error as Error, {
    tags: { component: 'api-route' },
    extra: { requestId }
  })
  
  return NextResponse.json({
    success: false,
    error: {
      code: 'INTERNAL_ERROR',
      message: 'An unexpected error occurred'
    },
    requestId
  }, { status: 500 })
}

// Usage in API routes
export const GET = withErrorHandler(async (request: Request) => {
  const { searchParams } = new URL(request.url)
  const productId = searchParams.get('id')
  
  if (!productId) {
    throw new ValidationError('Product ID is required')
  }
  
  const product = await productService.getProduct(productId)
  
  return NextResponse.json({
    success: true,
    data: product
  })
})

Service Layer Error Handling

// Service error handling with retry logic
export class ServiceErrorHandler {
  static async withRetry<T>(
    operation: () => Promise<T>,
    options: {
      maxRetries?: number
      backoffMs?: number
      retryCondition?: (error: Error) => boolean
    } = {}
  ): Promise<T> {
    const {
      maxRetries = 3,
      backoffMs = 1000,
      retryCondition = (error) => error instanceof ExternalServiceError
    } = options
    
    let lastError: Error
    
    for (let attempt = 0; attempt <= maxRetries; attempt++) {
      try {
        return await operation()
      } catch (error) {
        lastError = error as Error
        
        if (attempt === maxRetries || !retryCondition(lastError)) {
          throw lastError
        }
        
        const delay = backoffMs * Math.pow(2, attempt)
        logger.warn('Operation failed, retrying', {
          attempt: attempt + 1,
          maxRetries,
          delay,
          error: lastError.message
        })
        
        await new Promise(resolve => setTimeout(resolve, delay))
      }
    }
    
    throw lastError!
  }
  
  static async withCircuitBreaker<T>(
    operation: () => Promise<T>,
    circuitBreaker: CircuitBreaker
  ): Promise<T> {
    if (circuitBreaker.isOpen()) {
      throw new ServiceUnavailableError('Service temporarily unavailable')
    }
    
    try {
      const result = await operation()
      circuitBreaker.recordSuccess()
      return result
    } catch (error) {
      circuitBreaker.recordFailure()
      throw error
    }
  }
}

// Circuit breaker implementation
export class CircuitBreaker {
  private failures = 0
  private lastFailureTime = 0
  private state: 'closed' | 'open' | 'half-open' = 'closed'
  
  constructor(
    private failureThreshold = 5,
    private timeoutMs = 60000
  ) {}
  
  isOpen(): boolean {
    if (this.state === 'open') {
      if (Date.now() - this.lastFailureTime > this.timeoutMs) {
        this.state = 'half-open'
        return false
      }
      return true
    }
    
    return false
  }
  
  recordSuccess(): void {
    this.failures = 0
    this.state = 'closed'
  }
  
  recordFailure(): void {
    this.failures++
    this.lastFailureTime = Date.now()
    
    if (this.failures >= this.failureThreshold) {
      this.state = 'open'
    }
  }
}

Monitoring and Observability

Structured Logging

// Structured logging implementation
export interface LogContext {
  userId?: string
  requestId?: string
  sessionId?: string
  operation?: string
  duration?: number
  [key: string]: any
}

export class Logger {
  private context: LogContext = {}
  
  constructor(private service: string) {}
  
  withContext(context: LogContext): Logger {
    const newLogger = new Logger(this.service)
    newLogger.context = { ...this.context, ...context }
    return newLogger
  }
  
  debug(message: string, meta: any = {}) {
    this.log('debug', message, meta)
  }
  
  info(message: string, meta: any = {}) {
    this.log('info', message, meta)
  }
  
  warn(message: string, meta: any = {}) {
    this.log('warn', message, meta)
  }
  
  error(message: string, meta: any = {}) {
    this.log('error', message, meta)
  }
  
  private log(level: string, message: string, meta: any) {
    const logEntry = {
      timestamp: new Date().toISOString(),
      level,
      service: this.service,
      message,
      ...this.context,
      ...meta
    }
    
    // Console logging for development
    if (process.env.NODE_ENV === 'development') {
      console[level as keyof Console](JSON.stringify(logEntry, null, 2))
    } else {
      // Structured logging for production
      console.log(JSON.stringify(logEntry))
    }
    
    // Send to external logging service
    if (process.env.LOG_ENDPOINT) {
      this.sendToExternalLogger(logEntry)
    }
  }
  
  private async sendToExternalLogger(logEntry: any) {
    try {
      await fetch(process.env.LOG_ENDPOINT!, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify(logEntry)
      })
    } catch (error) {
      // Fallback to console if external logging fails
      console.error('Failed to send log to external service', error)
    }
  }
}

// Usage examples
const logger = new Logger('product-service')

// Basic logging
logger.info('Product created', { productId: '123', name: 'Widget' })

// Contextual logging
const contextLogger = logger.withContext({
  userId: 'user-456',
  requestId: 'req-789'
})

contextLogger.error('Failed to update product', {
  productId: '123',
  error: 'Validation failed'
})

Performance Monitoring

// Performance monitoring and metrics
export class PerformanceMonitor {
  private metrics: Map<string, number[]> = new Map()
  
  static measure<T>(operation: string, fn: () => Promise<T>): Promise<T> {
    const monitor = new PerformanceMonitor()
    return monitor.measureAsync(operation, fn)
  }
  
  async measureAsync<T>(operation: string, fn: () => Promise<T>): Promise<T> {
    const startTime = performance.now()
    const startMemory = process.memoryUsage()
    
    try {
      const result = await fn()
      const duration = performance.now() - startTime
      const endMemory = process.memoryUsage()
      
      this.recordMetric(operation, duration)
      
      logger.info('Operation completed', {
        operation,
        duration: Math.round(duration),
        memoryDelta: {
          heapUsed: endMemory.heapUsed - startMemory.heapUsed,
          external: endMemory.external - startMemory.external
        }
      })
      
      return result
    } catch (error) {
      const duration = performance.now() - startTime
      
      logger.error('Operation failed', {
        operation,
        duration: Math.round(duration),
        error: error instanceof Error ? error.message : String(error)
      })
      
      throw error
    }
  }
  
  recordMetric(operation: string, value: number) {
    const existing = this.metrics.get(operation) || []
    existing.push(value)
    
    // Keep only last 100 measurements
    if (existing.length > 100) {
      existing.shift()
    }
    
    this.metrics.set(operation, existing)
  }
  
  getMetrics(operation: string) {
    const values = this.metrics.get(operation) || []
    
    if (values.length === 0) {
      return null
    }
    
    const sorted = [...values].sort((a, b) => a - b)
    
    return {
      count: values.length,
      avg: values.reduce((sum, val) => sum + val, 0) / values.length,
      min: sorted[0],
      max: sorted[sorted.length - 1],
      p50: sorted[Math.floor(sorted.length * 0.5)],
      p95: sorted[Math.floor(sorted.length * 0.95)],
      p99: sorted[Math.floor(sorted.length * 0.99)]
    }
  }
}

// Usage in services
export class ProductService {
  async getProduct(id: string): Promise<Product> {
    return PerformanceMonitor.measure('getProduct', async () => {
      const { data, error } = await supabase
        .from('products')
        .select('*')
        .eq('id', id)
        .single()
      
      if (error) {
        throw new DatabaseError(error.message)
      }
      
      return data
    })
  }
}

Health Checks and Status Monitoring

// System health monitoring
export interface HealthCheck {
  name: string
  status: 'healthy' | 'degraded' | 'unhealthy'
  responseTime: number
  message?: string
  details?: any
}

export class HealthMonitor {
  private checks: Map<string, () => Promise<HealthCheck>> = new Map()
  
  addCheck(name: string, check: () => Promise<HealthCheck>) {
    this.checks.set(name, check)
  }
  
  async runAllChecks(): Promise<HealthCheck[]> {
    const results = await Promise.allSettled(
      Array.from(this.checks.entries()).map(async ([name, check]) => {
        try {
          return await check()
        } catch (error) {
          return {
            name,
            status: 'unhealthy' as const,
            responseTime: 0,
            message: error instanceof Error ? error.message : 'Unknown error'
          }
        }
      })
    )
    
    return results.map((result, index) => {
      const name = Array.from(this.checks.keys())[index]
      
      if (result.status === 'fulfilled') {
        return result.value
      } else {
        return {
          name,
          status: 'unhealthy' as const,
          responseTime: 0,
          message: 'Health check failed'
        }
      }
    })
  }
  
  async getOverallHealth(): Promise<{
    status: 'healthy' | 'degraded' | 'unhealthy'
    checks: HealthCheck[]
    timestamp: string
  }> {
    const checks = await this.runAllChecks()
    
    let status: 'healthy' | 'degraded' | 'unhealthy' = 'healthy'
    
    if (checks.some(check => check.status === 'unhealthy')) {
      status = 'unhealthy'
    } else if (checks.some(check => check.status === 'degraded')) {
      status = 'degraded'
    }
    
    return {
      status,
      checks,
      timestamp: new Date().toISOString()
    }
  }
}

// Setup health checks
const healthMonitor = new HealthMonitor()

healthMonitor.addCheck('database', async () => {
  const startTime = performance.now()
  
  try {
    const { error } = await supabase.from('health_check').select('1').limit(1)
    
    if (error) {
      throw new Error(error.message)
    }
    
    return {
      name: 'database',
      status: 'healthy' as const,
      responseTime: performance.now() - startTime
    }
  } catch (error) {
    return {
      name: 'database',
      status: 'unhealthy' as const,
      responseTime: performance.now() - startTime,
      message: error instanceof Error ? error.message : 'Database connection failed'
    }
  }
})

healthMonitor.addCheck('external-api', async () => {
  const startTime = performance.now()
  
  try {
    const response = await fetch(process.env.EXTERNAL_API_HEALTH_URL!, {
      method: 'GET',
      timeout: 5000
    })
    
    if (!response.ok) {
      throw new Error(`HTTP ${response.status}`)
    }
    
    return {
      name: 'external-api',
      status: 'healthy' as const,
      responseTime: performance.now() - startTime
    }
  } catch (error) {
    return {
      name: 'external-api',
      status: 'unhealthy' as const,
      responseTime: performance.now() - startTime,
      message: error instanceof Error ? error.message : 'External API unavailable'
    }
  }
})

// Health check API endpoint
export async function GET() {
  const health = await healthMonitor.getOverallHealth()
  
  const statusCode = health.status === 'healthy' ? 200 : 
                    health.status === 'degraded' ? 200 : 503
  
  return NextResponse.json(health, { status: statusCode })
}

This comprehensive error handling and monitoring system ensures robust application behavior and provides visibility into system health and performance.