Monitoring and Observability

Implementing circuit breakers is only half the battle—you also need to monitor their behavior to ensure they’re working correctly and to tune their parameters.

Circuit Breaker Metrics

Collecting metrics from circuit breakers provides valuable insights:

package metrics

import (
    "sync"
    "time"
    
    "example.com/circuitbreaker"
)

// CircuitBreakerMetrics collects metrics about circuit breaker behavior
type CircuitBreakerMetrics struct {
    name                 string
    successCount         int64
    failureCount         int64
    rejectedCount        int64
    stateTransitions     map[circuitbreaker.State]int64
    lastStateTransition  time.Time
    currentState         circuitbreaker.State
    mutex                sync.RWMutex
}

// NewCircuitBreakerMetrics creates a new metrics collector
func NewCircuitBreakerMetrics(name string) *CircuitBreakerMetrics {
    return &CircuitBreakerMetrics{
        name:             name,
        stateTransitions: make(map[circuitbreaker.State]int64),
        currentState:     circuitbreaker.Closed,
        lastStateTransition: time.Now(),
    }
}

// RecordSuccess records a successful request
func (m *CircuitBreakerMetrics) RecordSuccess() {
    m.mutex.Lock()
    defer m.mutex.Unlock()
    m.successCount++
}

// RecordFailure records a failed request
func (m *CircuitBreakerMetrics) RecordFailure() {
    m.mutex.Lock()
    defer m.mutex.Unlock()
    m.failureCount++
}

// RecordRejected records a rejected request due to open circuit
func (m *CircuitBreakerMetrics) RecordRejected() {
    m.mutex.Lock()
    defer m.mutex.Unlock()
    m.rejectedCount++
}

// RecordStateTransition records a state transition
func (m *CircuitBreakerMetrics) RecordStateTransition(newState circuitbreaker.State) {
    m.mutex.Lock()
    defer m.mutex.Unlock()
    
    if m.currentState != newState {
        m.stateTransitions[newState]++
        m.lastStateTransition = time.Now()
        m.currentState = newState
    }
}

// GetMetrics returns the current metrics
func (m *CircuitBreakerMetrics) GetMetrics() map[string]interface{} {
    m.mutex.RLock()
    defer m.mutex.RUnlock()
    
    metrics := map[string]interface{}{
        "name":                 m.name,
        "success_count":        m.successCount,
        "failure_count":        m.failureCount,
        "rejected_count":       m.rejectedCount,
        "current_state":        m.currentState.String(),
        "last_state_transition": m.lastStateTransition,
        "state_transitions":    make(map[string]int64),
    }
    
    for state, count := range m.stateTransitions {
        metrics["state_transitions"].(map[string]int64)[state.String()] = count
    }
    
    return metrics
}

// CalculateErrorRate calculates the current error rate
func (m *CircuitBreakerMetrics) CalculateErrorRate() float64 {
    m.mutex.RLock()
    defer m.mutex.RUnlock()
    
    total := m.successCount + m.failureCount
    if total == 0 {
        return 0.0
    }
    
    return float64(m.failureCount) / float64(total)
}

// InstrumentedCircuitBreaker wraps a circuit breaker with metrics
type InstrumentedCircuitBreaker struct {
    circuitBreaker circuitbreaker.CircuitBreaker
    metrics        *CircuitBreakerMetrics
}

// NewInstrumentedCircuitBreaker creates a new instrumented circuit breaker
func NewInstrumentedCircuitBreaker(
    cb circuitbreaker.CircuitBreaker,
    metrics *CircuitBreakerMetrics,
) *InstrumentedCircuitBreaker {
    return &InstrumentedCircuitBreaker{
        circuitBreaker: cb,
        metrics:        metrics,
    }
}

// AllowRequest checks if a request should be allowed through
func (i *InstrumentedCircuitBreaker) AllowRequest() bool {
    allowed := i.circuitBreaker.AllowRequest()
    if !allowed {
        i.metrics.RecordRejected()
    }
    return allowed
}

// RecordResult records the result of a request
func (i *InstrumentedCircuitBreaker) RecordResult(success bool) {
    i.circuitBreaker.RecordResult(success)
    
    if success {
        i.metrics.RecordSuccess()
    } else {
        i.metrics.RecordFailure()
    }
    
    // Record state changes
    i.metrics.RecordStateTransition(i.circuitBreaker.State())
}

// Execute runs the given function with circuit breaker protection
func (i *InstrumentedCircuitBreaker) Execute(fn func() error) error {
    if !i.AllowRequest() {
        return circuitbreaker.ErrCircuitOpen
    }
    
    err := fn()
    i.RecordResult(err == nil)
    return err
}

// State returns the current state of the circuit breaker
func (i *InstrumentedCircuitBreaker) State() circuitbreaker.State {
    return i.circuitBreaker.State()
}

Prometheus Integration

Exposing circuit breaker metrics to Prometheus enables powerful monitoring and alerting:

package metrics

import (
    "net/http"
    
    "github.com/prometheus/client_golang/prometheus"
    "github.com/prometheus/client_golang/prometheus/promhttp"
)

// PrometheusCircuitBreakerCollector collects circuit breaker metrics for Prometheus
type PrometheusCircuitBreakerCollector struct {
    metrics map[string]*CircuitBreakerMetrics
    
    successCounter *prometheus.CounterVec
    failureCounter *prometheus.CounterVec
    rejectedCounter *prometheus.CounterVec
    stateGauge *prometheus.GaugeVec
    errorRateGauge *prometheus.GaugeVec
}

// NewPrometheusCircuitBreakerCollector creates a new Prometheus collector
func NewPrometheusCircuitBreakerCollector() *PrometheusCircuitBreakerCollector {
    collector := &PrometheusCircuitBreakerCollector{
        metrics: make(map[string]*CircuitBreakerMetrics),
        
        successCounter: prometheus.NewCounterVec(
            prometheus.CounterOpts{
                Name: "circuit_breaker_success_total",
                Help: "Total number of successful requests",
            },
            []string{"name"},
        ),
        
        failureCounter: prometheus.NewCounterVec(
            prometheus.CounterOpts{
                Name: "circuit_breaker_failure_total",
                Help: "Total number of failed requests",
            },
            []string{"name"},
        ),
        
        rejectedCounter: prometheus.NewCounterVec(
            prometheus.CounterOpts{
                Name: "circuit_breaker_rejected_total",
                Help: "Total number of rejected requests due to open circuit",
            },
            []string{"name"},
        ),
        
        stateGauge: prometheus.NewGaugeVec(
            prometheus.GaugeOpts{
                Name: "circuit_breaker_state",
                Help: "Current state of the circuit breaker (0=closed, 1=half-open, 2=open)",
            },
            []string{"name"},
        ),
        
        errorRateGauge: prometheus.NewGaugeVec(
            prometheus.GaugeOpts{
                Name: "circuit_breaker_error_rate",
                Help: "Current error rate of the circuit breaker",
            },
            []string{"name"},
        ),
    }
    
    // Register metrics with Prometheus
    prometheus.MustRegister(
        collector.successCounter,
        collector.failureCounter,
        collector.rejectedCounter,
        collector.stateGauge,
        collector.errorRateGauge,
    )
    
    return collector
}

// RegisterMetrics registers circuit breaker metrics
func (c *PrometheusCircuitBreakerCollector) RegisterMetrics(name string, metrics *CircuitBreakerMetrics) {
    c.metrics[name] = metrics
}

// UpdateMetrics updates Prometheus metrics from circuit breaker metrics
func (c *PrometheusCircuitBreakerCollector) UpdateMetrics() {
    for name, metrics := range c.metrics {
        m := metrics.GetMetrics()
        
        c.successCounter.WithLabelValues(name).Add(float64(m["success_count"].(int64)))
        c.failureCounter.WithLabelValues(name).Add(float64(m["failure_count"].(int64)))
        c.rejectedCounter.WithLabelValues(name).Add(float64(m["rejected_count"].(int64)))
        
        // Map state to numeric value
        var stateValue float64
        switch m["current_state"].(string) {
        case "Closed":
            stateValue = 0
        case "HalfOpen":
            stateValue = 1
        case "Open":
            stateValue = 2
        }
        c.stateGauge.WithLabelValues(name).Set(stateValue)
        
        // Set error rate
        c.errorRateGauge.WithLabelValues(name).Set(metrics.CalculateErrorRate())
    }
}

// SetupPrometheusHandler sets up an HTTP handler for Prometheus metrics
func SetupPrometheusHandler() {
    http.Handle("/metrics", promhttp.Handler())
}

Health Checks and Circuit Breaker Status

Exposing circuit breaker status through health checks helps with operational visibility:

package health

import (
    "encoding/json"
    "net/http"
    
    "example.com/circuitbreaker"
)

// CircuitBreakerHealth represents the health status of circuit breakers
type CircuitBreakerHealth struct {
    breakers map[string]circuitbreaker.CircuitBreaker
}

// NewCircuitBreakerHealth creates a new health check handler
func NewCircuitBreakerHealth() *CircuitBreakerHealth {
    return &CircuitBreakerHealth{
        breakers: make(map[string]circuitbreaker.CircuitBreaker),
    }
}

// RegisterCircuitBreaker registers a circuit breaker for health checks
func (h *CircuitBreakerHealth) RegisterCircuitBreaker(name string, cb circuitbreaker.CircuitBreaker) {
    h.breakers[name] = cb
}

// ServeHTTP implements the http.Handler interface
func (h *CircuitBreakerHealth) ServeHTTP(w http.ResponseWriter, r *http.Request) {
    status := make(map[string]interface{})
    
    allHealthy := true
    for name, cb := range h.breakers {
        state := cb.State()
        
        status[name] = map[string]interface{}{
            "state": state.String(),
            "healthy": state == circuitbreaker.Closed,
        }
        
        if state != circuitbreaker.Closed {
            allHealthy = false
        }
    }
    
    w.Header().Set("Content-Type", "application/json")
    
    if !allHealthy {
        w.WriteHeader(http.StatusServiceUnavailable)
    }
    
    json.NewEncoder(w).Encode(map[string]interface{}{
        "status": status,
        "healthy": allHealthy,
    })
}

// SetupHealthCheckHandler sets up an HTTP handler for health checks
func SetupHealthCheckHandler(health *CircuitBreakerHealth) {
    http.Handle("/health/circuit-breakers", health)
}