Trace Analysis
Go’s execution tracer provides detailed insights into runtime behavior:
package main
import (
"context"
"fmt"
"os"
"runtime/trace"
"sync"
"time"
)
// Function demonstrating various activities for tracing
func runTracedActivities() {
// Create a trace file
f, err := os.Create("trace.out")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create trace file: %v\n", err)
return
}
defer f.Close()
// Start tracing
if err := trace.Start(f); err != nil {
fmt.Fprintf(os.Stderr, "Failed to start trace: %v\n", err)
return
}
defer trace.Stop()
// Create a context for tracing
ctx := context.Background()
// Trace a simple function
ctx, task := trace.NewTask(ctx, "main")
defer task.End()
// Trace a region within a function
trace.WithRegion(ctx, "initialization", func() {
fmt.Println("Initializing...")
time.Sleep(10 * time.Millisecond)
})
// Trace concurrent work
var wg sync.WaitGroup
// Start multiple goroutines with traced regions
for i := 0; i < 10; i++ {
wg.Add(1)
go func(id int) {
defer wg.Done()
// Create a task for this goroutine
_, goroutineTask := trace.NewTask(ctx, fmt.Sprintf("goroutine-%d", id))
defer goroutineTask.End()
// Log an event
trace.Log(ctx, "goroutine-start", fmt.Sprintf("id=%d", id))
// Simulate different phases of work
trace.WithRegion(ctx, "computation", func() {
// CPU-bound work
sum := 0
for j := 0; j < 1000000; j++ {
sum += j
}
trace.Log(ctx, "computation-result", fmt.Sprintf("sum=%d", sum))
})
trace.WithRegion(ctx, "io-simulation", func() {
// Simulate I/O
time.Sleep(time.Duration(id+1) * 10 * time.Millisecond)
})
trace.Log(ctx, "goroutine-end", fmt.Sprintf("id=%d", id))
}(i)
}
// Demonstrate blocking on channel communication
trace.WithRegion(ctx, "channel-communication", func() {
ch := make(chan int)
// Sender
go func() {
senderCtx, task := trace.NewTask(ctx, "sender")
defer task.End()
trace.Log(senderCtx, "send-start", "preparing to send")
time.Sleep(20 * time.Millisecond) // Simulate preparation
trace.WithRegion(senderCtx, "send-operation", func() {
ch <- 42 // This will block until receiver is ready
})
trace.Log(senderCtx, "send-complete", "value sent")
}()
// Receiver (intentionally delayed)
go func() {
receiverCtx, task := trace.NewTask(ctx, "receiver")
defer task.End()
trace.Log(receiverCtx, "receive-delay", "waiting before receiving")
time.Sleep(100 * time.Millisecond) // Delay to demonstrate blocking
trace.WithRegion(receiverCtx, "receive-operation", func() {
val := <-ch
trace.Log(receiverCtx, "received-value", fmt.Sprintf("%d", val))
})
}()
})
// Wait for all goroutines to complete
wg.Wait()
}
func main() {
runTracedActivities()
fmt.Println("Trace complete. Analyze with:")
fmt.Println("go tool trace trace.out")
fmt.Println("\nTrace tool features:")
fmt.Println("1. Timeline view of goroutine execution")
fmt.Println("2. Synchronization events (channel operations, mutex locks)")
fmt.Println("3. System events (GC, scheduler)")
fmt.Println("4. User-defined regions and events")
}
Code-Level Optimization Strategies
Beyond profiling, there are numerous code-level optimizations that can significantly improve Go application performance.
Memory Allocation Optimization
Reducing memory allocations is one of the most effective ways to improve performance:
package main
import (
"fmt"
"runtime"
"strings"
"testing"
"time"
)
// BEFORE: Creates a new slice for each call
func inefficientAppend(slice []int, value int) []int {
return append(slice, value)
}
// AFTER: Preallocates capacity to avoid reallocations
func efficientAppend(slice []int, values ...int) []int {
if cap(slice)-len(slice) < len(values) {
// Need to reallocate
newSlice := make([]int, len(slice), len(slice)+len(values)+100) // Extra capacity
copy(newSlice, slice)
slice = newSlice
}
return append(slice, values...)
}
// BEFORE: Creates many small allocations
func inefficientStringJoin(items []string) string {
result := ""
for _, item := range items {
result += item + ","
}
return result
}
// AFTER: Uses strings.Builder to minimize allocations
func efficientStringJoin(items []string) string {
var builder strings.Builder
builder.Grow(len(items) * 8) // Estimate size
for i, item := range items {
builder.WriteString(item)
if i < len(items)-1 {
builder.WriteByte(',')
}
}
return builder.String()
}
// BEFORE: Allocates a map for each call
func inefficientCounter(text string) map[rune]int {
counts := make(map[rune]int)
for _, char := range text {
counts[char]++
}
return counts
}
// AFTER: Reuses a map to avoid allocations
func efficientCounter(text string, counts map[rune]int) {
// Clear the map
for k := range counts {
delete(counts, k)
}
// Count characters
for _, char := range text {
counts[char]++
}
}
// Benchmark helper
func benchmarkAllocation(b *testing.B, f func()) {
// Warm up
for i := 0; i < 5; i++ {
f()
}
// Reset memory stats
runtime.GC()
var stats runtime.MemStats
runtime.ReadMemStats(&stats)
allocsBefore := stats.TotalAlloc
// Run benchmark
start := time.Now()
for i := 0; i < b.N; i++ {
f()
}
elapsed := time.Since(start)
// Get memory stats
runtime.ReadMemStats(&stats)
allocsAfter := stats.TotalAlloc
// Print results
fmt.Printf("Time: %v, Allocations: %v bytes\n", elapsed, allocsAfter-allocsBefore)
}
func main() {
// Benchmark slice append
fmt.Println("Slice append benchmark:")
benchmarkAllocation(testing.B{N: 10000}, func() {
slice := make([]int, 0)
for i := 0; i < 1000; i++ {
slice = inefficientAppend(slice, i)
}
})
benchmarkAllocation(testing.B{N: 10000}, func() {
slice := make([]int, 0, 1000) // Preallocate
for i := 0; i < 1000; i++ {
slice = append(slice, i) // Direct append is better
}
})
// Benchmark string join
fmt.Println("\nString join benchmark:")
items := make([]string, 1000)
for i := range items {
items[i] = fmt.Sprintf("item-%d", i)
}
benchmarkAllocation(testing.B{N: 100}, func() {
_ = inefficientStringJoin(items)
})
benchmarkAllocation(testing.B{N: 100}, func() {
_ = efficientStringJoin(items)
})
benchmarkAllocation(testing.B{N: 100}, func() {
_ = strings.Join(items, ",") // Built-in is even better
})
// Benchmark counter
fmt.Println("\nCounter benchmark:")
text := strings.Repeat("Go is a great language! ", 100)
benchmarkAllocation(testing.B{N: 1000}, func() {
_ = inefficientCounter(text)
})
benchmarkAllocation(testing.B{N: 1000}, func() {
counts := make(map[rune]int)
efficientCounter(text, counts)
})
}
Compiler Optimizations and Build Flags
Understanding Go’s compiler optimizations can help you write more efficient code:
package main
import (
"fmt"
"os"
"runtime"
"strings"
)
// Function that may be inlined by the compiler
func add(a, b int) int {
return a + b
}
// Function that's too complex to inline
func complexFunction(a, b int) int {
result := 0
for i := 0; i < a; i++ {
if i%2 == 0 {
result += i * b
} else {
result -= i * b
}
if result > 1000 {
result = 1000
}
}
return result
}
// Function with bounds check elimination opportunity
func sumArray(arr []int) int {
sum := 0
for i := 0; i < len(arr); i++ {
sum += arr[i]
}
return sum
}
// Function that demonstrates escape analysis
func createOnStack() int {
x := 42 // Will be allocated on stack
return x
}
func createOnHeap() *int {
x := 42 // Will escape to heap
return &x
}
func main() {
// Print Go version and compiler info
fmt.Printf("Go version: %s\n", runtime.Version())
fmt.Printf("GOOS: %s, GOARCH: %s\n", runtime.GOOS, runtime.GOARCH)
fmt.Printf("GOMAXPROCS: %d\n", runtime.GOMAXPROCS(0))
// Print build flags
fmt.Println("\nBuild flags:")
fmt.Println("To enable inlining and bounds check elimination:")
fmt.Println("go build -gcflags=\"-m\" main.go")
fmt.Println("\nTo disable optimizations (for debugging):")
fmt.Println("go build -gcflags=\"-N -l\" main.go")
fmt.Println("\nTo see assembly output:")
fmt.Println("go build -gcflags=\"-S\" main.go")
// Demonstrate compiler optimizations
fmt.Println("\nCompiler optimization examples:")
// Function inlining
result := 0
for i := 0; i < 1000000; i++ {
result = add(result, i) // Likely to be inlined
}
fmt.Printf("Inlined function result: %d\n", result)
// Complex function (not inlined)
result = complexFunction(100, 5)
fmt.Printf("Complex function result: %d\n", result)
// Bounds check elimination
arr := make([]int, 1000)
for i := range arr {
arr[i] = i
}
sum := sumArray(arr)
fmt.Printf("Array sum: %d\n", sum)
// Escape analysis
stackVal := createOnStack()
heapVal := createOnHeap()
fmt.Printf("Stack value: %d, Heap value: %d\n", stackVal, *heapVal)
// Print instructions for viewing escape analysis
fmt.Println("\nTo view escape analysis decisions:")
fmt.Println("go build -gcflags=\"-m -m\" main.go")
// Print instructions for benchmarking with different flags
fmt.Println("\nTo benchmark with different compiler flags:")
fmt.Println("go test -bench=. -benchmem -gcflags=\"-N -l\" ./...")
fmt.Println("go test -bench=. -benchmem ./...")
}