Performance Benchmarking and Profiling
Performance testing is crucial for Go applications, especially those with high throughput requirements.
Writing Effective Benchmarks
Go’s testing package provides excellent support for benchmarking:
package performance
import (
"bytes"
"crypto/sha256"
"encoding/json"
"fmt"
"sync"
"testing"
)
// Item represents a data structure to benchmark
type Item struct {
ID string `json:"id"`
Name string `json:"name"`
Tags []string `json:"tags"`
Count int `json:"count"`
Value float64 `json:"value"`
IsEnabled bool `json:"is_enabled"`
}
// generateItem creates a test item
func generateItem(id string) Item {
return Item{
ID: id,
Name: "Test Item " + id,
Tags: []string{"tag1", "tag2", "tag3", "tag4", "tag5"},
Count: 42,
Value: 99.99,
IsEnabled: true,
}
}
// BenchmarkJSONMarshal benchmarks JSON marshaling performance
func BenchmarkJSONMarshal(b *testing.B) {
item := generateItem("test-1")
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := json.Marshal(item)
if err != nil {
b.Fatal(err)
}
}
}
// BenchmarkJSONMarshalParallel benchmarks JSON marshaling in parallel
func BenchmarkJSONMarshalParallel(b *testing.B) {
item := generateItem("test-1")
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := json.Marshal(item)
if err != nil {
b.Fatal(err)
}
}
})
}
// ItemCache is a simple cache implementation to benchmark
type ItemCache struct {
items map[string]Item
mu sync.RWMutex
}
// NewItemCache creates a new item cache
func NewItemCache() *ItemCache {
return &ItemCache{
items: make(map[string]Item),
}
}
// Get retrieves an item from the cache
func (c *ItemCache) Get(id string) (Item, bool) {
c.mu.RLock()
defer c.mu.RUnlock()
item, ok := c.items[id]
return item, ok
}
// Set adds an item to the cache
func (c *ItemCache) Set(id string, item Item) {
c.mu.Lock()
defer c.mu.Unlock()
c.items[id] = item
}
// BenchmarkCacheGet benchmarks cache retrieval
func BenchmarkCacheGet(b *testing.B) {
// Setup
cache := NewItemCache()
for i := 0; i < 1000; i++ {
id := fmt.Sprintf("item-%d", i)
cache.Set(id, generateItem(id))
}
// Benchmark different cache sizes
benchmarks := []struct {
name string
cacheSize int
}{
{"Small_10", 10},
{"Medium_100", 100},
{"Large_1000", 1000},
}
for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
// Create cache with specified size
cache := NewItemCache()
for i := 0; i < bm.cacheSize; i++ {
id := fmt.Sprintf("item-%d", i)
cache.Set(id, generateItem(id))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Get random item
id := fmt.Sprintf("item-%d", i%bm.cacheSize)
_, found := cache.Get(id)
if !found {
b.Fatalf("Item %s not found", id)
}
}
})
}
}
// BenchmarkCacheGetParallel benchmarks parallel cache retrieval
func BenchmarkCacheGetParallel(b *testing.B) {
// Setup
cache := NewItemCache()
for i := 0; i < 1000; i++ {
id := fmt.Sprintf("item-%d", i)
cache.Set(id, generateItem(id))
}
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
i := 0
for pb.Next() {
id := fmt.Sprintf("item-%d", i%1000)
_, found := cache.Get(id)
if !found {
b.Fatalf("Item %s not found", id)
}
i++
}
})
}
// HashItem hashes an item using SHA-256
func HashItem(item Item) []byte {
data, _ := json.Marshal(item)
hash := sha256.Sum256(data)
return hash[:]
}
// BenchmarkHashingComparison compares different hashing strategies
func BenchmarkHashingComparison(b *testing.B) {
item := generateItem("test-1")
b.Run("JSON_Marshal_Then_Hash", func(b *testing.B) {
for i := 0; i < b.N; i++ {
data, _ := json.Marshal(item)
hash := sha256.Sum256(data)
_ = hash
}
})
b.Run("Direct_Field_Concatenation", func(b *testing.B) {
for i := 0; i < b.N; i++ {
var buf bytes.Buffer
buf.WriteString(item.ID)
buf.WriteString(item.Name)
for _, tag := range item.Tags {
buf.WriteString(tag)
}
buf.WriteString(fmt.Sprintf("%d", item.Count))
buf.WriteString(fmt.Sprintf("%f", item.Value))
buf.WriteString(fmt.Sprintf("%t", item.IsEnabled))
hash := sha256.Sum256(buf.Bytes())
_ = hash
}
})
}
// BenchmarkWithMemoryTracking demonstrates memory allocation tracking
func BenchmarkWithMemoryTracking(b *testing.B) {
// Run with: go test -bench=BenchmarkWithMemoryTracking -benchmem
b.Run("WithPreallocation", func(b *testing.B) {
for i := 0; i < b.N; i++ {
// Preallocate slice with capacity
data := make([]Item, 0, 1000)
for j := 0; j < 1000; j++ {
id := fmt.Sprintf("item-%d", j)
data = append(data, generateItem(id))
}
_ = data
}
})
b.Run("WithoutPreallocation", func(b *testing.B) {
for i := 0; i < b.N; i++ {
// No preallocation
var data []Item
for j := 0; j < 1000; j++ {
id := fmt.Sprintf("item-%d", j)
data = append(data, generateItem(id))
}
_ = data
}
})
}
To run these benchmarks:
# Run all benchmarks
go test -bench=. ./performance
# Run specific benchmark
go test -bench=BenchmarkJSONMarshal ./performance
# Run benchmarks with memory allocation statistics
go test -bench=. -benchmem ./performance
# Run benchmarks with more iterations for statistical significance
go test -bench=. -benchtime=5s ./performance
# Compare benchmarks before and after changes
go test -bench=. -benchmem ./performance > before.txt
# Make changes
go test -bench=. -benchmem ./performance > after.txt
benchstat before.txt after.txt
These benchmarks demonstrate:
- Basic benchmarking: Using
testing.B
to measure performance - Parallel benchmarks: Testing concurrent performance with
b.RunParallel
- Sub-benchmarks: Using
b.Run
to organize related benchmarks - Memory tracking: Measuring allocations with
-benchmem
- Comparison benchmarks: Comparing different implementations
CPU and Memory Profiling
Profiling helps identify performance bottlenecks in your code:
package main
import (
"flag"
"fmt"
"log"
"os"
"runtime"
"runtime/pprof"
"sync"
"time"
"crypto/sha256"
)
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
var memprofile = flag.String("memprofile", "", "write memory profile to file")
// Worker represents a task processor
type Worker struct {
ID int
Tasks chan Task
Results chan Result
QuitChan chan bool
wg *sync.WaitGroup
}
// Task represents a unit of work
type Task struct {
ID int
Payload string
Strength int // Computational intensity
}
// Result represents the outcome of processing a task
type Result struct {
TaskID int
WorkerID int
Output string
TimeNanos int64
}
// NewWorker creates a new worker
func NewWorker(id int, tasks chan Task, results chan Result, wg *sync.WaitGroup) *Worker {
return &Worker{
ID: id,
Tasks: tasks,
Results: results,
QuitChan: make(chan bool),
wg: wg,
}
}
// Start begins the worker's processing loop
func (w *Worker) Start() {
go func() {
defer w.wg.Done()
for {
select {
case task := <-w.Tasks:
// Process the task
result := w.processTask(task)
w.Results <- result
case <-w.QuitChan:
return
}
}
}()
}
// Stop signals the worker to stop processing
func (w *Worker) Stop() {
go func() {
w.QuitChan <- true
}()
}
// processTask handles the actual work
func (w *Worker) processTask(task Task) Result {
// Simulate CPU-intensive work
start := time.Now()
// This is our "hot" function that will show up in CPU profiles
output := performComputation(task.Payload, task.Strength)
elapsed := time.Since(start)
return Result{
TaskID: task.ID,
WorkerID: w.ID,
Output: output,
TimeNanos: elapsed.Nanoseconds(),
}
}
// performComputation is a CPU-intensive function
func performComputation(input string, strength int) string {
// Create a large slice to show up in memory profiles
data := make([]byte, 0, strength*1000)
// Perform some CPU-intensive work
for i := 0; i < strength; i++ {
h := sha256.New()
h.Write([]byte(input))
hash := h.Sum(nil)
data = append(data, hash...)
input = fmt.Sprintf("%x", hash)
}
return fmt.Sprintf("%x", sha256.Sum256(data))
}
func main() {
flag.Parse()
// CPU profiling
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
log.Fatal("could not create CPU profile: ", err)
}
defer f.Close()
if err := pprof.StartCPUProfile(f); err != nil {
log.Fatal("could not start CPU profile: ", err)
}
defer pprof.StopCPUProfile()
}
// Run the workload
runWorkload()
// Memory profiling
if *memprofile != "" {
f, err := os.Create(*memprofile)
if err != nil {
log.Fatal("could not create memory profile: ", err)
}
defer f.Close()
runtime.GC() // Get up-to-date statistics
if err := pprof.WriteHeapProfile(f); err != nil {
log.Fatal("could not write memory profile: ", err)
}
}
}
func runWorkload() {
numWorkers := runtime.NumCPU()
numTasks := 100
// Create channels
tasks := make(chan Task, numTasks)
results := make(chan Result, numTasks)
// Create worker pool
var wg sync.WaitGroup
wg.Add(numWorkers)
workers := make([]*Worker, numWorkers)
for i := 0; i < numWorkers; i++ {
workers[i] = NewWorker(i, tasks, results, &wg)
workers[i].Start()
}
// Generate tasks
go func() {
for i := 0; i < numTasks; i++ {
tasks <- Task{
ID: i,
Payload: fmt.Sprintf("task-%d", i),
Strength: i % 10, // Vary computational intensity
}
}
close(tasks)
}()
// Collect results
go func() {
for i := 0; i < numTasks; i++ {
result := <-results
fmt.Printf("Task %d completed by Worker %d in %d ns\n",
result.TaskID, result.WorkerID, result.TimeNanos)
}
}()
// Wait for all workers to finish
wg.Wait()
}
To run with profiling:
# CPU profiling
go build -o app
./app -cpuprofile=cpu.prof
# Memory profiling
./app -memprofile=mem.prof
# Analyze profiles
go tool pprof -http=:8080 cpu.prof
go tool pprof -http=:8080 mem.prof
This profiling example demonstrates:
- CPU profiling: Capturing CPU usage patterns
- Memory profiling: Tracking heap allocations
- Profile visualization: Using pprof’s web interface
- Hotspot identification: Finding performance bottlenecks
- Workload simulation: Creating realistic test scenarios
Benchmarking HTTP Handlers
For web services, benchmarking HTTP handlers is crucial:
package api
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/gin-gonic/gin"
)
// BenchmarkProductListHandler benchmarks the product listing endpoint
func BenchmarkProductListHandler(b *testing.B) {
// Setup
gin.SetMode(gin.ReleaseMode) // Disable debug mode for benchmarking
router := gin.New()
// Create mock service with controlled data size
mockService := &MockProductService{}
// Register handler
handler := NewProductHandler(mockService)
router.GET("/products", handler.ListProducts)
// Benchmark with different dataset sizes
benchmarks := []struct {
name string
numItems int
setupMock func(*MockProductService, int)
}{
{
name: "small_10_items",
numItems: 10,
setupMock: func(m *MockProductService, n int) {
products := generateTestProducts(n)
m.On("ListProducts", mock.Anything, 100, 0).Return(products, nil)
},
},
{
name: "medium_100_items",
numItems: 100,
setupMock: func(m *MockProductService, n int) {
products := generateTestProducts(n)
m.On("ListProducts", mock.Anything, 100, 0).Return(products, nil)
},
},
{
name: "large_1000_items",
numItems: 1000,
setupMock: func(m *MockProductService, n int) {
products := generateTestProducts(n)
m.On("ListProducts", mock.Anything, 1000, 0).Return(products, nil)
},
},
}
for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
// Setup mock for this benchmark
mockService := new(MockProductService)
bm.setupMock(mockService, bm.numItems)
// Create handler with this mock
handler := NewProductHandler(mockService)
router := gin.New()
router.GET("/products", handler.ListProducts)
// Create request
req, _ := http.NewRequest(http.MethodGet, "/products", nil)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Create a response recorder for each iteration
w := httptest.NewRecorder()
// Serve the request
router.ServeHTTP(w, req)
// Verify response code (but don't parse body in benchmark)
if w.Code != http.StatusOK {
b.Fatalf("Expected status code 200, got %d", w.Code)
}
}
})
}
}
// BenchmarkProductCreateHandler benchmarks the product creation endpoint
func BenchmarkProductCreateHandler(b *testing.B) {
// Setup
gin.SetMode(gin.ReleaseMode) // Disable debug mode for benchmarking
// Prepare test data
product := &models.Product{
Name: "Test Product",
Description: "A test product for benchmarking",
Price: 99.99,
CategoryID: "cat-123",
}
// Serialize once outside the benchmark loop
jsonData, _ := json.Marshal(product)
// Benchmark with different response scenarios
benchmarks := []struct {
name string
setupMock func(*MockProductService)
delay time.Duration // Simulate processing time
}{
{
name: "fast_response",
setupMock: func(m *MockProductService) {
m.On("CreateProduct", mock.Anything, mock.Anything).Return(nil)
},
delay: 0,
},
{
name: "medium_response_time",
setupMock: func(m *MockProductService) {
m.On("CreateProduct", mock.Anything, mock.Anything).
Run(func(args mock.Arguments) {
time.Sleep(10 * time.Millisecond)
}).
Return(nil)
},
delay: 10 * time.Millisecond,
},
{
name: "slow_response_time",
setupMock: func(m *MockProductService) {
m.On("CreateProduct", mock.Anything, mock.Anything).
Run(func(args mock.Arguments) {
time.Sleep(50 * time.Millisecond)
}).
Return(nil)
},
delay: 50 * time.Millisecond,
},
}
for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
// Setup mock for this benchmark
mockService := new(MockProductService)
bm.setupMock(mockService)
// Create handler with this mock
handler := NewProductHandler(mockService)
router := gin.New()
router.POST("/products", handler.CreateProduct)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Create a new request for each iteration
req, _ := http.NewRequest(http.MethodPost, "/products", bytes.NewBuffer(jsonData))
req.Header.Set("Content-Type", "application/json")
// Create a response recorder
w := httptest.NewRecorder()
// Serve the request
router.ServeHTTP(w, req)
// Verify response code
if w.Code != http.StatusCreated {
b.Fatalf("Expected status code 201, got %d", w.Code)
}
}
})
}
}
// generateTestProducts creates a slice of test products
func generateTestProducts(n int) []*models.Product {
products := make([]*models.Product, n)
now := time.Now()
for i := 0; i < n; i++ {
products[i] = &models.Product{
ID: fmt.Sprintf("prod-%d", i),
Name: fmt.Sprintf("Product %d", i),
Description: fmt.Sprintf("Description for product %d", i),
Price: float64(10 + i%90),
CategoryID: fmt.Sprintf("cat-%d", i%5),
CreatedAt: now.Add(-time.Duration(i) * time.Hour),
UpdatedAt: now,
}
}
return products
}
This HTTP benchmarking demonstrates:
- Handler benchmarking: Measuring API endpoint performance
- Data size impact: Testing with different payload sizes
- Response time simulation: Measuring the impact of backend delays
- Memory allocation tracking: Using
b.ReportAllocs()
to monitor memory usage - Realistic scenarios: Testing with representative data volumes
Analyzing Benchmark Results
Interpreting benchmark results is crucial for making informed optimizations:
package main
import (
"fmt"
"math"
"sort"
"strings"
)
// BenchmarkResult represents the outcome of a benchmark run
type BenchmarkResult struct {
Name string
NsPerOp float64
AllocsPerOp int64
BytesPerOp int64
MBPerSecond float64
Measurements []float64
}
// AnalyzeBenchmarks demonstrates how to analyze benchmark data
func AnalyzeBenchmarks(results []BenchmarkResult) {
// Sort by ns/op (fastest first)
sort.Slice(results, func(i, j int) bool {
return results[i].NsPerOp < results[j].NsPerOp
})
// Print summary table
fmt.Println("Performance Summary (sorted by ns/op):")
fmt.Printf("%-30s %-15s %-15s %-15s %-15s\n",
"Benchmark", "Time (ns/op)", "Allocs (count)", "Memory (B/op)", "Throughput (MB/s)")
fmt.Println(strings.Repeat("-", 90))
for _, r := range results {
fmt.Printf("%-30s %-15.2f %-15d %-15d %-15.2f\n",
r.Name, r.NsPerOp, r.AllocsPerOp, r.BytesPerOp, r.MBPerSecond)
}
// Statistical analysis for a specific benchmark
if len(results) > 0 {
result := results[0]
if len(result.Measurements) > 0 {
mean, stdDev := calculateStats(result.Measurements)
cv := (stdDev / mean) * 100 // Coefficient of variation
fmt.Printf("\nStatistical Analysis for %s:\n", result.Name)
fmt.Printf(" Mean: %.2f ns/op\n", mean)
fmt.Printf(" Standard Deviation: %.2f ns/op\n", stdDev)
fmt.Printf(" Coefficient of Var: %.2f%%\n", cv)
// Interpret the results
fmt.Println("\nInterpretation:")
if cv < 1.0 {
fmt.Println(" Excellent stability (CV < 1%)")
} else if cv < 5.0 {
fmt.Println(" Good stability (CV < 5%)")
} else if cv < 10.0 {
fmt.Println(" Moderate stability (CV < 10%)")
} else {
fmt.Println(" Poor stability (CV >= 10%) - Results may not be reliable")
}
// Performance comparison
if len(results) > 1 {
baseline := results[0]
comparison := results[1]
improvement := (baseline.NsPerOp - comparison.NsPerOp) / baseline.NsPerOp * 100
fmt.Printf("\nComparison (%s vs %s):\n", baseline.Name, comparison.Name)
fmt.Printf(" Time difference: %.2f ns/op (%.2f%%)\n",
baseline.NsPerOp - comparison.NsPerOp, improvement)
fmt.Printf(" Memory difference: %d bytes/op\n",
baseline.BytesPerOp - comparison.BytesPerOp)
fmt.Printf(" Allocation difference: %d allocs/op\n",
baseline.AllocsPerOp - comparison.AllocsPerOp)
}
}
}
}
// calculateStats computes mean and standard deviation
func calculateStats(measurements []float64) (float64, float64) {
sum := 0.0
for _, m := range measurements {
sum += m
}
mean := sum / float64(len(measurements))
sumSquaredDiff := 0.0
for _, m := range measurements {
diff := m - mean
sumSquaredDiff += diff * diff
}
variance := sumSquaredDiff / float64(len(measurements))
stdDev := math.Sqrt(variance)
return mean, stdDev
}
This analysis approach demonstrates:
- Result sorting: Ranking implementations by performance
- Statistical analysis: Computing mean, standard deviation, and coefficient of variation
- Stability assessment: Evaluating the reliability of benchmark results
- Comparative analysis: Quantifying improvements between implementations
- Throughput calculation: Converting timing results to operations per second