Production Best Practices | Andrew Odendaal

Context in the Real World

Everything we’ve covered so far works great in development, but production is where context patterns either shine or fall apart spectacularly. I’ve learned this the hard way—context issues that never show up during testing can bring down entire systems under load.

The biggest production challenges with context aren’t about correctness—they’re about performance, observability, and operational complexity. You need to monitor context usage, prevent resource leaks, and debug issues across distributed systems.

Monitoring Context Performance

Context operations can become bottlenecks under high load. Here’s how I monitor context performance in production:

type ContextMetrics struct {
    creationCounter    *prometheus.CounterVec
    cancellationCounter *prometheus.CounterVec
    timeoutHistogram   *prometheus.HistogramVec
    activeContexts     prometheus.Gauge
}

func NewContextMetrics() *ContextMetrics {
    return &ContextMetrics{
        creationCounter: prometheus.NewCounterVec(
            prometheus.CounterOpts{
                Name: "context_creations_total",
                Help: "Total context creations by type",
            },
            []string{"type", "operation"},
        ),
        cancellationCounter: prometheus.NewCounterVec(
            prometheus.CounterOpts{
                Name: "context_cancellations_total",
                Help: "Context cancellations by reason",
            },
            []string{"reason", "operation"},
        ),
        timeoutHistogram: prometheus.NewHistogramVec(
            prometheus.HistogramOpts{
                Name: "context_timeout_duration_seconds",
                Help: "Context timeout durations",
                Buckets: []float64{0.001, 0.01, 0.1, 1, 5, 10, 30, 60},
            },
            []string{"operation"},
        ),
        activeContexts: prometheus.NewGauge(
            prometheus.GaugeOpts{
                Name: "active_contexts_current",
                Help: "Currently active contexts",
            },
        ),
    }
}

type MonitoredContext struct {
    context.Context
    metrics   *ContextMetrics
    operation string
    startTime time.Time
}

func (cm *ContextMetrics) WrapContext(ctx context.Context, operation string) *MonitoredContext {
    cm.creationCounter.WithLabelValues("wrapped", operation).Inc()
    cm.activeContexts.Inc()
    
    return &MonitoredContext{
        Context:   ctx,
        metrics:   cm,
        operation: operation,
        startTime: time.Now(),
    }
}

func (mc *MonitoredContext) Done() <-chan struct{} {
    done := mc.Context.Done()
    
    // Monitor for cancellation in background
    go func() {
        <-done
        mc.recordCancellation()
    }()
    
    return done
}

func (mc *MonitoredContext) recordCancellation() {
    mc.metrics.activeContexts.Dec()
    
    reason := "unknown"
    if errors.Is(mc.Err(), context.Canceled) {
        reason = "cancelled"
    } else if errors.Is(mc.Err(), context.DeadlineExceeded) {
        reason = "timeout"
        duration := time.Since(mc.startTime)
        mc.metrics.timeoutHistogram.WithLabelValues(mc.operation).Observe(duration.Seconds())
    }
    
    mc.metrics.cancellationCounter.WithLabelValues(reason, mc.operation).Inc()
}

This monitoring gives you visibility into context usage patterns and helps identify performance issues.

Context Leak Detection

Context leaks are silent killers in production. Here’s my leak detection system:

type ContextLeakDetector struct {
    activeContexts map[uintptr]*ContextInfo
    mu             sync.RWMutex
    alertThreshold int
    checkInterval  time.Duration
    stopChan       chan struct{}
}

type ContextInfo struct {
    ID         uintptr
    CreatedAt  time.Time
    Operation  string
    StackTrace string
    AccessCount int64
}

func NewContextLeakDetector(threshold int, interval time.Duration) *ContextLeakDetector {
    detector := &ContextLeakDetector{
        activeContexts: make(map[uintptr]*ContextInfo),
        alertThreshold: threshold,
        checkInterval:  interval,
        stopChan:       make(chan struct{}),
    }
    
    go detector.monitor()
    return detector
}

func (cld *ContextLeakDetector) RegisterContext(ctx context.Context, operation string) {
    cld.mu.Lock()
    defer cld.mu.Unlock()
    
    id := uintptr(unsafe.Pointer(&ctx))
    
    // Capture stack trace for debugging
    buf := make([]byte, 2048)
    n := runtime.Stack(buf, false)
    
    cld.activeContexts[id] = &ContextInfo{
        ID:         id,
        CreatedAt:  time.Now(),
        Operation:  operation,
        StackTrace: string(buf[:n]),
        AccessCount: 1,
    }
}

func (cld *ContextLeakDetector) UnregisterContext(ctx context.Context) {
    cld.mu.Lock()
    defer cld.mu.Unlock()
    
    id := uintptr(unsafe.Pointer(&ctx))
    delete(cld.activeContexts, id)
}

func (cld *ContextLeakDetector) monitor() {
    ticker := time.NewTicker(cld.checkInterval)
    defer ticker.Stop()
    
    for {
        select {
        case <-ticker.C:
            cld.checkForLeaks()
        case <-cld.stopChan:
            return
        }
    }
}

func (cld *ContextLeakDetector) checkForLeaks() {
    cld.mu.RLock()
    defer cld.mu.RUnlock()
    
    now := time.Now()
    suspiciousContexts := 0
    
    for _, info := range cld.activeContexts {
        age := now.Sub(info.CreatedAt)
        
        // Flag contexts older than 5 minutes
        if age > 5*time.Minute {
            suspiciousContexts++
            if suspiciousContexts <= 5 { // Don't spam logs
                log.Printf("POTENTIAL LEAK: Context %s created %v ago at:\n%s", 
                    info.Operation, age, info.StackTrace)
            }
        }
    }
    
    if suspiciousContexts > cld.alertThreshold {
        log.Printf("ALERT: %d potentially leaked contexts detected", suspiciousContexts)
    }
}

This detector helps catch context leaks before they cause memory issues.

High-Performance Context Pooling

In high-throughput systems, context creation overhead matters. Here’s my pooling approach:

type ContextPool struct {
    pool        sync.Pool
    maxPoolSize int
    currentSize int64
    metrics     *ContextMetrics
}

func NewContextPool(maxSize int, metrics *ContextMetrics) *ContextPool {
    return &ContextPool{
        pool: sync.Pool{
            New: func() interface{} {
                return &PooledContext{}
            },
        },
        maxPoolSize: maxSize,
        metrics:     metrics,
    }
}

type PooledContext struct {
    context.Context
    pool      *ContextPool
    inUse     bool
    createdAt time.Time
}

func (cp *ContextPool) Get(parent context.Context) *PooledContext {
    if atomic.LoadInt64(&cp.currentSize) >= int64(cp.maxPoolSize) {
        // Pool full, create new
        return &PooledContext{
            Context:   parent,
            createdAt: time.Now(),
        }
    }
    
    pooled := cp.pool.Get().(*PooledContext)
    pooled.Context = parent
    pooled.pool = cp
    pooled.inUse = true
    pooled.createdAt = time.Now()
    
    atomic.AddInt64(&cp.currentSize, 1)
    if cp.metrics != nil {
        cp.metrics.creationCounter.WithLabelValues("pooled", "get").Inc()
    }
    
    return pooled
}

func (cp *ContextPool) Put(ctx *PooledContext) {
    if ctx.pool != cp || !ctx.inUse {
        return
    }
    
    ctx.inUse = false
    ctx.Context = nil
    
    // Don't pool old contexts
    if time.Since(ctx.createdAt) > time.Hour {
        atomic.AddInt64(&cp.currentSize, -1)
        return
    }
    
    cp.pool.Put(ctx)
}

func (pc *PooledContext) Release() {
    if pc.pool != nil {
        pc.pool.Put(pc)
    }
}

This pooling reduces allocation overhead while preventing memory bloat.

Distributed Context Tracing

In microservices, you need to trace context across service boundaries:

type DistributedTracer struct {
    serviceName string
}

func NewDistributedTracer(serviceName string) *DistributedTracer {
    return &DistributedTracer{serviceName: serviceName}
}

func (dt *DistributedTracer) InjectHeaders(ctx context.Context, headers map[string]string) {
    if requestID, ok := GetRequestID(ctx); ok {
        headers["X-Request-ID"] = requestID
    }
    
    if traceID, ok := GetTraceID(ctx); ok {
        headers["X-Trace-ID"] = traceID
    }
    
    if userID, ok := GetUserID(ctx); ok {
        headers["X-User-ID"] = userID
    }
    
    // Add service hop information
    headers["X-Service-Path"] = dt.serviceName
}

func (dt *DistributedTracer) ExtractContext(headers map[string]string) context.Context {
    ctx := context.Background()
    
    if requestID := headers["X-Request-ID"]; requestID != "" {
        ctx = WithRequestID(ctx, requestID)
    }
    
    if traceID := headers["X-Trace-ID"]; traceID != "" {
        ctx = WithTraceID(ctx, traceID)
    }
    
    if userID := headers["X-User-ID"]; userID != "" {
        ctx = WithUserID(ctx, userID)
    }
    
    return ctx
}

// HTTP client wrapper
func (dt *DistributedTracer) DoRequest(ctx context.Context, req *http.Request) (*http.Response, error) {
    headers := make(map[string]string)
    dt.InjectHeaders(ctx, headers)
    
    for key, value := range headers {
        req.Header.Set(key, value)
    }
    
    return http.DefaultClient.Do(req)
}

This ensures context information flows correctly across service boundaries.

Production Configuration Management

Production systems need configurable context behavior:

type ContextConfig struct {
    DefaultTimeout      time.Duration `json:"default_timeout"`
    MaxTimeout         time.Duration `json:"max_timeout"`
    EnableLeakDetection bool          `json:"enable_leak_detection"`
    EnablePooling      bool          `json:"enable_pooling"`
    MaxPoolSize        int           `json:"max_pool_size"`
    EnableMetrics      bool          `json:"enable_metrics"`
}

type ProductionContextManager struct {
    config       *ContextConfig
    pool         *ContextPool
    leakDetector *ContextLeakDetector
    metrics      *ContextMetrics
    mu           sync.RWMutex
}

func NewProductionContextManager(config *ContextConfig) *ProductionContextManager {
    manager := &ProductionContextManager{config: config}
    
    if config.EnableMetrics {
        manager.metrics = NewContextMetrics()
    }
    
    if config.EnablePooling {
        manager.pool = NewContextPool(config.MaxPoolSize, manager.metrics)
    }
    
    if config.EnableLeakDetection {
        manager.leakDetector = NewContextLeakDetector(10, 30*time.Second)
    }
    
    return manager
}

func (pcm *ProductionContextManager) CreateContext(parent context.Context, operation string) (context.Context, context.CancelFunc) {
    pcm.mu.RLock()
    config := pcm.config
    pcm.mu.RUnlock()
    
    // Apply default timeout if none exists
    if _, hasDeadline := parent.Deadline(); !hasDeadline {
        parent, _ = context.WithTimeout(parent, config.DefaultTimeout)
    }
    
    ctx, cancel := context.WithCancel(parent)
    
    // Register with leak detector
    if pcm.leakDetector != nil {
        pcm.leakDetector.RegisterContext(ctx, operation)
    }
    
    // Wrap with metrics
    if pcm.metrics != nil {
        ctx = pcm.metrics.WrapContext(ctx, operation)
    }
    
    // Enhanced cancel with cleanup
    enhancedCancel := func() {
        cancel()
        if pcm.leakDetector != nil {
            pcm.leakDetector.UnregisterContext(ctx)
        }
    }
    
    return ctx, enhancedCancel
}

func (pcm *ProductionContextManager) UpdateConfig(newConfig *ContextConfig) error {
    pcm.mu.Lock()
    defer pcm.mu.Unlock()
    
    if newConfig.DefaultTimeout <= 0 || newConfig.MaxTimeout <= 0 {
        return fmt.Errorf("invalid timeout configuration")
    }
    
    if newConfig.DefaultTimeout > newConfig.MaxTimeout {
        return fmt.Errorf("default timeout exceeds max timeout")
    }
    
    pcm.config = newConfig
    return nil
}

This manager provides runtime configuration of context behavior for production environments.

The key insight about production context patterns is that observability and operational control are just as important as functional correctness. The most successful context implementations provide comprehensive monitoring, efficient resource management, and operational flexibility that enable teams to maintain reliable service at scale.

By implementing these production best practices, you’ll have a robust foundation for context-aware applications that can handle the complexities of real-world distributed systems while providing the visibility and control needed for effective operations. The patterns we’ve covered throughout this guide give you a complete toolkit for building sophisticated request lifecycle management that scales from development to production.

Context Pool Management for High-Throughput Systems

In high-throughput systems, context creation overhead can become significant. Here’s a context pooling strategy:

type ContextPool struct {
    pool        sync.Pool
    metrics     *ContextMetrics
    maxPoolSize int
    currentSize int64
    mu          sync.RWMutex
}

type PooledContext struct {
    context.Context
    pool      *ContextPool
    inUse     bool
    createdAt time.Time
}

func NewContextPool(maxSize int, metrics *ContextMetrics) *ContextPool {
    return &ContextPool{
        pool: sync.Pool{
            New: func() interface{} {
                return &PooledContext{
                    createdAt: time.Now(),
                }
            },
        },
        metrics:     metrics,
        maxPoolSize: maxSize,
    }
}

func (cp *ContextPool) Get(parent context.Context) *PooledContext {
    cp.mu.Lock()
    defer cp.mu.Unlock()
    
    if cp.currentSize >= int64(cp.maxPoolSize) {
        // Pool is full, create new context
        return &PooledContext{
            Context:   parent,
            pool:      cp,
            inUse:     true,
            createdAt: time.Now(),
        }
    }
    
    pooled := cp.pool.Get().(*PooledContext)
    pooled.Context = parent
    pooled.pool = cp
    pooled.inUse = true
    pooled.createdAt = time.Now()
    
    atomic.AddInt64(&cp.currentSize, 1)
    cp.metrics.creationCounter.WithLabelValues("pooled", "get").Inc()
    
    return pooled
}

func (cp *ContextPool) Put(ctx *PooledContext) {
    if ctx.pool != cp || !ctx.inUse {
        return
    }
    
    cp.mu.Lock()
    defer cp.mu.Unlock()
    
    ctx.inUse = false
    ctx.Context = nil
    
    // Don't pool contexts that are too old
    if time.Since(ctx.createdAt) > time.Hour {
        atomic.AddInt64(&cp.currentSize, -1)
        return
    }
    
    cp.pool.Put(ctx)
    cp.metrics.creationCounter.WithLabelValues("pooled", "put").Inc()
}

func (pc *PooledContext) Release() {
    if pc.pool != nil {
        pc.pool.Put(pc)
    }
}

This pooling approach reduces allocation overhead in high-throughput scenarios while preventing memory bloat.

Distributed Context Tracing

In microservices architectures, tracing context propagation across service boundaries is crucial:

type DistributedContextTracer struct {
    tracer     opentracing.Tracer
    propagator ContextPropagator
}

type ContextPropagator interface {
    Inject(ctx context.Context, headers map[string]string) error
    Extract(headers map[string]string) (context.Context, error)
}

type HTTPContextPropagator struct{}

func (hcp *HTTPContextPropagator) Inject(ctx context.Context, headers map[string]string) error {
    if requestID := GetRequestID(ctx); requestID != "" {
        headers["X-Request-ID"] = requestID
    }
    
    if traceID := GetTraceID(ctx); traceID != "" {
        headers["X-Trace-ID"] = traceID
    }
    
    if userID := GetUserID(ctx); userID != "" {
        headers["X-User-ID"] = userID
    }
    
    return nil
}

func (hcp *HTTPContextPropagator) Extract(headers map[string]string) (context.Context, error) {
    ctx := context.Background()
    
    if requestID := headers["X-Request-ID"]; requestID != "" {
        ctx = WithRequestID(ctx, requestID)
    }
    
    if traceID := headers["X-Trace-ID"]; traceID != "" {
        ctx = WithTraceID(ctx, traceID)
    }
    
    if userID := headers["X-User-ID"]; userID != "" {
        ctx = WithUserID(ctx, userID)
    }
    
    return ctx, nil
}

func NewDistributedContextTracer(tracer opentracing.Tracer) *DistributedContextTracer {
    return &DistributedContextTracer{
        tracer:     tracer,
        propagator: &HTTPContextPropagator{},
    }
}

func (dct *DistributedContextTracer) StartSpanFromContext(ctx context.Context, operationName string) (opentracing.Span, context.Context) {
    span, ctx := opentracing.StartSpanFromContext(ctx, operationName)
    
    // Enrich span with context values
    if requestID := GetRequestID(ctx); requestID != "" {
        span.SetTag("request.id", requestID)
    }
    
    if userID := GetUserID(ctx); userID != "" {
        span.SetTag("user.id", userID)
    }
    
    return span, ctx
}

func (dct *DistributedContextTracer) InjectIntoHTTPHeaders(ctx context.Context, req *http.Request) error {
    headers := make(map[string]string)
    
    if err := dct.propagator.Inject(ctx, headers); err != nil {
        return err
    }
    
    for key, value := range headers {
        req.Header.Set(key, value)
    }
    
    return nil
}

This tracing system ensures context information flows correctly across service boundaries with proper observability.

Context Configuration Management

Production systems need configurable context behavior that can be adjusted without code changes:

type ContextConfig struct {
    DefaultTimeout        time.Duration `json:"default_timeout"`
    MaxTimeout           time.Duration `json:"max_timeout"`
    EnableLeakDetection  bool          `json:"enable_leak_detection"`
    LeakCheckInterval    time.Duration `json:"leak_check_interval"`
    EnablePooling        bool          `json:"enable_pooling"`
    MaxPoolSize          int           `json:"max_pool_size"`
    EnableMetrics        bool          `json:"enable_metrics"`
    ValueCacheSize       int           `json:"value_cache_size"`
}

type ConfigurableContextManager struct {
    config       *ContextConfig
    pool         *ContextPool
    leakDetector *ContextLeakDetector
    metrics      *ContextMetrics
    mu           sync.RWMutex
}

func NewConfigurableContextManager(config *ContextConfig) *ConfigurableContextManager {
    manager := &ConfigurableContextManager{
        config: config,
    }
    
    if config.EnableMetrics {
        manager.metrics = NewContextMetrics()
    }
    
    if config.EnablePooling {
        manager.pool = NewContextPool(config.MaxPoolSize, manager.metrics)
    }
    
    if config.EnableLeakDetection {
        manager.leakDetector = NewContextLeakDetector(10, config.LeakCheckInterval)
    }
    
    return manager
}

func (ccm *ConfigurableContextManager) CreateContext(parent context.Context, operation string) (context.Context, context.CancelFunc) {
    ccm.mu.RLock()
    config := ccm.config
    ccm.mu.RUnlock()
    
    // Apply default timeout if none exists
    if _, hasDeadline := parent.Deadline(); !hasDeadline {
        parent, _ = context.WithTimeout(parent, config.DefaultTimeout)
    }
    
    ctx, cancel := context.WithCancel(parent)
    
    // Register with leak detector
    if ccm.leakDetector != nil {
        ccm.leakDetector.RegisterContext(ctx, operation)
    }
    
    // Wrap with metrics if enabled
    if ccm.metrics != nil {
        ctx = ccm.metrics.WrapContext(ctx, operation)
    }
    
    // Enhanced cancel function with cleanup
    enhancedCancel := func() {
        cancel()
        if ccm.leakDetector != nil {
            ccm.leakDetector.UnregisterContext(ctx)
        }
    }
    
    return ctx, enhancedCancel
}

func (ccm *ConfigurableContextManager) UpdateConfig(newConfig *ContextConfig) error {
    ccm.mu.Lock()
    defer ccm.mu.Unlock()
    
    // Validate configuration
    if newConfig.DefaultTimeout <= 0 || newConfig.MaxTimeout <= 0 {
        return fmt.Errorf("invalid timeout configuration")
    }
    
    if newConfig.DefaultTimeout > newConfig.MaxTimeout {
        return fmt.Errorf("default timeout cannot exceed max timeout")
    }
    
    ccm.config = newConfig
    return nil
}

This configurable manager allows runtime adjustment of context behavior based on operational requirements.

The key insight about production context patterns is that observability, performance, and operational flexibility are just as important as functional correctness. The most successful context implementations provide comprehensive monitoring, efficient resource management, and operational controls that enable teams to maintain reliable service in production environments.

By implementing these production best practices, you’ll have a robust foundation for context-aware applications that can scale reliably while providing the observability and control needed for effective operations. The patterns covered throughout this guide provide a comprehensive toolkit for building sophisticated request lifecycle management systems that handle the complexities of modern distributed applications.

Continue Your Learning

This is part 7 of 7 in the comprehensive guide.

← Previous Error Handling and Recovery Guide Overview See all 7 parts

Guide Complete!

You've finished all 7 parts of this guide.

Explore More Browse other guides