Optimizing sync.Pool Usage
While sync.Pool
is useful for reducing allocations, it has some nuances that need to be understood:
package main
import (
"fmt"
"runtime"
"sync"
"time"
)
// Object we want to pool
type LargeObject struct {
data [8192]byte
}
func main() {
// Create a pool
pool := &sync.Pool{
New: func() interface{} {
return &LargeObject{}
},
}
// Measure allocations before using the pool
var stats runtime.MemStats
runtime.ReadMemStats(&stats)
allocsBefore := stats.TotalAlloc
// Use objects from the pool
for i := 0; i < 100000; i++ {
obj := pool.Get().(*LargeObject)
// Simulate work with the object
obj.data[0] = byte(i)
// Important: Reset the object before returning it to the pool
// This is a best practice to avoid data leaks
for j := range obj.data {
obj.data[j] = 0
}
pool.Put(obj)
// Every 10000 iterations, trigger a GC to demonstrate pool behavior
if i > 0 && i%10000 == 0 {
runtime.GC()
}
}
// Measure allocations after using the pool
runtime.ReadMemStats(&stats)
allocsAfter := stats.TotalAlloc
fmt.Printf("Total allocations: %d bytes\n", allocsAfter-allocsBefore)
fmt.Printf("Average allocation per iteration: %.2f bytes\n",
float64(allocsAfter-allocsBefore)/100000)
// Demonstrate that pool objects are cleared during GC
obj := pool.Get().(*LargeObject)
pool.Put(obj)
// Force GC
fmt.Println("Forcing GC...")
runtime.GC()
time.Sleep(time.Millisecond) // Give GC time to run
// Try to get the object back - a new one will be created
// because the pool was cleared during GC
newObj := pool.Get().(*LargeObject)
fmt.Printf("Object address before GC: %p\n", obj)
fmt.Printf("Object address after GC: %p\n", newObj)
fmt.Printf("Same object: %v\n", obj == newObj)
}
Zero-Allocation Techniques
For latency-critical applications, achieving zero allocations in hot paths can be crucial:
package main
import (
"fmt"
"strings"
"testing"
)
// Allocating version - creates new strings
func concatWithAlloc(strs []string) string {
result := ""
for _, s := range strs {
result += s
}
return result
}
// Pre-allocating version - more efficient
func concatWithPrealloc(strs []string) string {
// Calculate total length needed
totalLen := 0
for _, s := range strs {
totalLen += len(s)
}
// Pre-allocate the exact size needed
var builder strings.Builder
builder.Grow(totalLen)
// Build the string
for _, s := range strs {
builder.WriteString(s)
}
return builder.String()
}
func main() {
testStrings := []string{
"This", " is", " a", " test", " of", " string", " concatenation",
" to", " demonstrate", " allocation", " differences", ".",
}
// Benchmark allocating version
allocResult := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = concatWithAlloc(testStrings)
}
})
// Benchmark pre-allocating version
preallocResult := testing.Benchmark(func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_ = concatWithPrealloc(testStrings)
}
})
fmt.Printf("Allocating version:\n")
fmt.Printf(" Operations: %d\n", allocResult.N)
fmt.Printf(" Allocations per op: %d\n", allocResult.AllocsPerOp())
fmt.Printf(" Bytes per op: %d\n", allocResult.AllocedBytesPerOp())
fmt.Printf(" Nanoseconds per op: %d\n\n", allocResult.NsPerOp())
fmt.Printf("Pre-allocating version:\n")
fmt.Printf(" Operations: %d\n", preallocResult.N)
fmt.Printf(" Allocations per op: %d\n", preallocResult.AllocsPerOp())
fmt.Printf(" Bytes per op: %d\n", preallocResult.AllocedBytesPerOp())
fmt.Printf(" Nanoseconds per op: %d\n", preallocResult.NsPerOp())
}
Custom Allocators
For specialized use cases, implementing custom allocators can provide significant performance benefits:
package main
import (
"fmt"
"sync"
"time"
"unsafe"
)
// A fixed-size block allocator for a specific object size
type FixedSizeAllocator struct {
blockSize int
blocks []byte
freeList []int
mu sync.Mutex
allocations int
}
// Create a new fixed-size allocator
func NewFixedSizeAllocator(blockSize, capacity int) *FixedSizeAllocator {
blocks := make([]byte, blockSize*capacity)
freeList := make([]int, capacity)
// Initialize free list
for i := 0; i < capacity; i++ {
freeList[i] = i
}
return &FixedSizeAllocator{
blockSize: blockSize,
blocks: blocks,
freeList: freeList,
}
}
// Allocate a block
func (a *FixedSizeAllocator) Allocate() unsafe.Pointer {
a.mu.Lock()
defer a.mu.Unlock()
if len(a.freeList) == 0 {
panic("allocator out of memory")
}
// Get index from free list
blockIndex := a.freeList[len(a.freeList)-1]
a.freeList = a.freeList[:len(a.freeList)-1]
// Calculate block address
blockOffset := blockIndex * a.blockSize
blockPtr := unsafe.Pointer(&a.blocks[blockOffset])
a.allocations++
return blockPtr
}
// Free a block
func (a *FixedSizeAllocator) Free(ptr unsafe.Pointer) {
a.mu.Lock()
defer a.mu.Unlock()
// Calculate block index
blockAddr := uintptr(ptr)
baseAddr := uintptr(unsafe.Pointer(&a.blocks[0]))
offset := blockAddr - baseAddr
blockIndex := int(offset) / a.blockSize
// Add back to free list
a.freeList = append(a.freeList, blockIndex)
a.allocations--
}
// Example object to allocate
type MyObject struct {
id int
data [128]byte
}
func main() {
// Create allocator for MyObject
objectSize := int(unsafe.Sizeof(MyObject{}))
allocator := NewFixedSizeAllocator(objectSize, 10000)
fmt.Printf("Object size: %d bytes\n", objectSize)
// Benchmark standard allocation
startStandard := time.Now()
standardObjects := make([]*MyObject, 10000)
for i := 0; i < 10000; i++ {
obj := &MyObject{id: i}
standardObjects[i] = obj
}
standardDuration := time.Since(startStandard)
// Free standard objects to allow GC
for i := range standardObjects {
standardObjects[i] = nil
}
// Benchmark custom allocator
startCustom := time.Now()
customObjects := make([]unsafe.Pointer, 10000)
for i := 0; i < 10000; i++ {
ptr := allocator.Allocate()
obj := (*MyObject)(ptr)
obj.id = i
customObjects[i] = ptr
}
customDuration := time.Since(startCustom)
// Free custom objects
for _, ptr := range customObjects {
allocator.Free(ptr)
}
fmt.Printf("Standard allocation: %v\n", standardDuration)
fmt.Printf("Custom allocation: %v\n", customDuration)
fmt.Printf("Performance improvement: %.2fx\n",
float64(standardDuration)/float64(customDuration))
}