Monitoring and Observability
Implementing circuit breakers is only half the battle—you also need to monitor their behavior to ensure they’re working correctly and to tune their parameters.
Circuit Breaker Metrics
Collecting metrics from circuit breakers provides valuable insights:
package metrics
import (
"sync"
"time"
"example.com/circuitbreaker"
)
// CircuitBreakerMetrics collects metrics about circuit breaker behavior
type CircuitBreakerMetrics struct {
name string
successCount int64
failureCount int64
rejectedCount int64
stateTransitions map[circuitbreaker.State]int64
lastStateTransition time.Time
currentState circuitbreaker.State
mutex sync.RWMutex
}
// NewCircuitBreakerMetrics creates a new metrics collector
func NewCircuitBreakerMetrics(name string) *CircuitBreakerMetrics {
return &CircuitBreakerMetrics{
name: name,
stateTransitions: make(map[circuitbreaker.State]int64),
currentState: circuitbreaker.Closed,
lastStateTransition: time.Now(),
}
}
// RecordSuccess records a successful request
func (m *CircuitBreakerMetrics) RecordSuccess() {
m.mutex.Lock()
defer m.mutex.Unlock()
m.successCount++
}
// RecordFailure records a failed request
func (m *CircuitBreakerMetrics) RecordFailure() {
m.mutex.Lock()
defer m.mutex.Unlock()
m.failureCount++
}
// RecordRejected records a rejected request due to open circuit
func (m *CircuitBreakerMetrics) RecordRejected() {
m.mutex.Lock()
defer m.mutex.Unlock()
m.rejectedCount++
}
// RecordStateTransition records a state transition
func (m *CircuitBreakerMetrics) RecordStateTransition(newState circuitbreaker.State) {
m.mutex.Lock()
defer m.mutex.Unlock()
if m.currentState != newState {
m.stateTransitions[newState]++
m.lastStateTransition = time.Now()
m.currentState = newState
}
}
// GetMetrics returns the current metrics
func (m *CircuitBreakerMetrics) GetMetrics() map[string]interface{} {
m.mutex.RLock()
defer m.mutex.RUnlock()
metrics := map[string]interface{}{
"name": m.name,
"success_count": m.successCount,
"failure_count": m.failureCount,
"rejected_count": m.rejectedCount,
"current_state": m.currentState.String(),
"last_state_transition": m.lastStateTransition,
"state_transitions": make(map[string]int64),
}
for state, count := range m.stateTransitions {
metrics["state_transitions"].(map[string]int64)[state.String()] = count
}
return metrics
}
// CalculateErrorRate calculates the current error rate
func (m *CircuitBreakerMetrics) CalculateErrorRate() float64 {
m.mutex.RLock()
defer m.mutex.RUnlock()
total := m.successCount + m.failureCount
if total == 0 {
return 0.0
}
return float64(m.failureCount) / float64(total)
}
// InstrumentedCircuitBreaker wraps a circuit breaker with metrics
type InstrumentedCircuitBreaker struct {
circuitBreaker circuitbreaker.CircuitBreaker
metrics *CircuitBreakerMetrics
}
// NewInstrumentedCircuitBreaker creates a new instrumented circuit breaker
func NewInstrumentedCircuitBreaker(
cb circuitbreaker.CircuitBreaker,
metrics *CircuitBreakerMetrics,
) *InstrumentedCircuitBreaker {
return &InstrumentedCircuitBreaker{
circuitBreaker: cb,
metrics: metrics,
}
}
// AllowRequest checks if a request should be allowed through
func (i *InstrumentedCircuitBreaker) AllowRequest() bool {
allowed := i.circuitBreaker.AllowRequest()
if !allowed {
i.metrics.RecordRejected()
}
return allowed
}
// RecordResult records the result of a request
func (i *InstrumentedCircuitBreaker) RecordResult(success bool) {
i.circuitBreaker.RecordResult(success)
if success {
i.metrics.RecordSuccess()
} else {
i.metrics.RecordFailure()
}
// Record state changes
i.metrics.RecordStateTransition(i.circuitBreaker.State())
}
// Execute runs the given function with circuit breaker protection
func (i *InstrumentedCircuitBreaker) Execute(fn func() error) error {
if !i.AllowRequest() {
return circuitbreaker.ErrCircuitOpen
}
err := fn()
i.RecordResult(err == nil)
return err
}
// State returns the current state of the circuit breaker
func (i *InstrumentedCircuitBreaker) State() circuitbreaker.State {
return i.circuitBreaker.State()
}
Prometheus Integration
Exposing circuit breaker metrics to Prometheus enables powerful monitoring and alerting:
package metrics
import (
"net/http"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// PrometheusCircuitBreakerCollector collects circuit breaker metrics for Prometheus
type PrometheusCircuitBreakerCollector struct {
metrics map[string]*CircuitBreakerMetrics
successCounter *prometheus.CounterVec
failureCounter *prometheus.CounterVec
rejectedCounter *prometheus.CounterVec
stateGauge *prometheus.GaugeVec
errorRateGauge *prometheus.GaugeVec
}
// NewPrometheusCircuitBreakerCollector creates a new Prometheus collector
func NewPrometheusCircuitBreakerCollector() *PrometheusCircuitBreakerCollector {
collector := &PrometheusCircuitBreakerCollector{
metrics: make(map[string]*CircuitBreakerMetrics),
successCounter: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "circuit_breaker_success_total",
Help: "Total number of successful requests",
},
[]string{"name"},
),
failureCounter: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "circuit_breaker_failure_total",
Help: "Total number of failed requests",
},
[]string{"name"},
),
rejectedCounter: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "circuit_breaker_rejected_total",
Help: "Total number of rejected requests due to open circuit",
},
[]string{"name"},
),
stateGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "circuit_breaker_state",
Help: "Current state of the circuit breaker (0=closed, 1=half-open, 2=open)",
},
[]string{"name"},
),
errorRateGauge: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "circuit_breaker_error_rate",
Help: "Current error rate of the circuit breaker",
},
[]string{"name"},
),
}
// Register metrics with Prometheus
prometheus.MustRegister(
collector.successCounter,
collector.failureCounter,
collector.rejectedCounter,
collector.stateGauge,
collector.errorRateGauge,
)
return collector
}
// RegisterMetrics registers circuit breaker metrics
func (c *PrometheusCircuitBreakerCollector) RegisterMetrics(name string, metrics *CircuitBreakerMetrics) {
c.metrics[name] = metrics
}
// UpdateMetrics updates Prometheus metrics from circuit breaker metrics
func (c *PrometheusCircuitBreakerCollector) UpdateMetrics() {
for name, metrics := range c.metrics {
m := metrics.GetMetrics()
c.successCounter.WithLabelValues(name).Add(float64(m["success_count"].(int64)))
c.failureCounter.WithLabelValues(name).Add(float64(m["failure_count"].(int64)))
c.rejectedCounter.WithLabelValues(name).Add(float64(m["rejected_count"].(int64)))
// Map state to numeric value
var stateValue float64
switch m["current_state"].(string) {
case "Closed":
stateValue = 0
case "HalfOpen":
stateValue = 1
case "Open":
stateValue = 2
}
c.stateGauge.WithLabelValues(name).Set(stateValue)
// Set error rate
c.errorRateGauge.WithLabelValues(name).Set(metrics.CalculateErrorRate())
}
}
// SetupPrometheusHandler sets up an HTTP handler for Prometheus metrics
func SetupPrometheusHandler() {
http.Handle("/metrics", promhttp.Handler())
}
Health Checks and Circuit Breaker Status
Exposing circuit breaker status through health checks helps with operational visibility:
package health
import (
"encoding/json"
"net/http"
"example.com/circuitbreaker"
)
// CircuitBreakerHealth represents the health status of circuit breakers
type CircuitBreakerHealth struct {
breakers map[string]circuitbreaker.CircuitBreaker
}
// NewCircuitBreakerHealth creates a new health check handler
func NewCircuitBreakerHealth() *CircuitBreakerHealth {
return &CircuitBreakerHealth{
breakers: make(map[string]circuitbreaker.CircuitBreaker),
}
}
// RegisterCircuitBreaker registers a circuit breaker for health checks
func (h *CircuitBreakerHealth) RegisterCircuitBreaker(name string, cb circuitbreaker.CircuitBreaker) {
h.breakers[name] = cb
}
// ServeHTTP implements the http.Handler interface
func (h *CircuitBreakerHealth) ServeHTTP(w http.ResponseWriter, r *http.Request) {
status := make(map[string]interface{})
allHealthy := true
for name, cb := range h.breakers {
state := cb.State()
status[name] = map[string]interface{}{
"state": state.String(),
"healthy": state == circuitbreaker.Closed,
}
if state != circuitbreaker.Closed {
allHealthy = false
}
}
w.Header().Set("Content-Type", "application/json")
if !allHealthy {
w.WriteHeader(http.StatusServiceUnavailable)
}
json.NewEncoder(w).Encode(map[string]interface{}{
"status": status,
"healthy": allHealthy,
})
}
// SetupHealthCheckHandler sets up an HTTP handler for health checks
func SetupHealthCheckHandler(health *CircuitBreakerHealth) {
http.Handle("/health/circuit-breakers", health)
}