Memory Reuse
// Inefficient: Creates a new vector for each batch
fn process_batches_inefficient(data: &[i32], batch_size: usize) -> Vec<i32> {
let mut results = Vec::new();
for batch in data.chunks(batch_size) {
let processed = batch.iter().map(|&x| x * x).collect::<Vec<_>>();
results.extend_from_slice(&processed);
}
results
}
// Efficient: Reuses a buffer for each batch
fn process_batches_efficient(data: &[i32], batch_size: usize) -> Vec<i32> {
let mut results = Vec::with_capacity(data.len());
let mut buffer = Vec::with_capacity(batch_size);
for batch in data.chunks(batch_size) {
buffer.clear();
buffer.extend(batch.iter().map(|&x| x * x));
results.extend_from_slice(&buffer);
}
results
}
Stack vs. Heap Allocation
// Heap allocation (potentially slower)
fn process_with_heap(size: usize) -> usize {
let data = vec![0; size]; // Allocates on the heap
data.iter().sum()
}
// Stack allocation (potentially faster for small arrays)
fn process_with_stack() -> usize {
let data = [0; 1024]; // Allocates on the stack (fixed size)
data.iter().sum()
}
// Using Box for large stack allocations
fn process_large_data() -> usize {
// This would overflow the stack if allocated directly
let data = Box::new([0; 1_000_000]);
data.iter().sum()
}
Algorithmic Optimizations
Often, the biggest performance gains come from algorithmic improvements:
Choosing the Right Algorithm
use std::collections::{HashMap, HashSet};
use std::time::Instant;
// O(n²) approach
fn find_duplicates_quadratic(data: &[i32]) -> Vec<i32> {
let mut duplicates = Vec::new();
for i in 0..data.len() {
for j in i+1..data.len() {
if data[i] == data[j] && !duplicates.contains(&data[i]) {
duplicates.push(data[i]);
}
}
}
duplicates
}
// O(n) approach
fn find_duplicates_linear(data: &[i32]) -> Vec<i32> {
let mut seen = HashSet::new();
let mut duplicates = HashSet::new();
for &item in data {
if !seen.insert(item) {
duplicates.insert(item);
}
}
duplicates.into_iter().collect()
}
fn main() {
// Generate test data
let mut data = Vec::with_capacity(10_000);
for i in 0..10_000 {
data.push(i % 1000); // This ensures some duplicates
}
// Measure quadratic approach
let start = Instant::now();
let duplicates1 = find_duplicates_quadratic(&data);
let duration1 = start.elapsed();
// Measure linear approach
let start = Instant::now();
let duplicates2 = find_duplicates_linear(&data);
let duration2 = start.elapsed();
println!("Quadratic approach: {:?}", duration1);
println!("Linear approach: {:?}", duration2);
println!("Speedup: {:.2}x", duration1.as_secs_f64() / duration2.as_secs_f64());
}
Memoization and Caching
use std::collections::HashMap;
// Without memoization
fn fibonacci(n: u64) -> u64 {
match n {
0 => 0,
1 => 1,
_ => fibonacci(n - 1) + fibonacci(n - 2),
}
}
// With memoization
fn fibonacci_memoized(n: u64, cache: &mut HashMap<u64, u64>) -> u64 {
if let Some(&result) = cache.get(&n) {
return result;
}
let result = match n {
0 => 0,
1 => 1,
_ => fibonacci_memoized(n - 1, cache) + fibonacci_memoized(n - 2, cache),
};
cache.insert(n, result);
result
}
// Using a more efficient algorithm
fn fibonacci_iterative(n: u64) -> u64 {
if n <= 1 {
return n;
}
let mut a = 0;
let mut b = 1;
for _ in 2..=n {
let temp = a + b;
a = b;
b = temp;
}
b
}
Lazy Evaluation
use std::iter;
// Eager evaluation (computes all values upfront)
fn sum_of_squares_eager(n: usize) -> u64 {
let mut squares = Vec::with_capacity(n);
// Generate all squares first
for i in 0..n {
squares.push((i as u64).pow(2));
}
// Then sum them
squares.iter().sum()
}
// Lazy evaluation (computes values on demand)
fn sum_of_squares_lazy(n: usize) -> u64 {
(0..n).map(|i| (i as u64).pow(2)).sum()
}
// Infinite lazy sequence
fn fibonacci_sequence() -> impl Iterator<Item = u64> {
let mut a = 0;
let mut b = 1;
iter::successors(Some(0), move |_| {
let next = a + b;
a = b;
b = next;
Some(a)
})
}
fn main() {
// Take the first 10 Fibonacci numbers
let fibs: Vec<_> = fibonacci_sequence().take(10).collect();
println!("{:?}", fibs);
}
Concurrency and Parallelism
Leveraging multiple cores can dramatically improve performance: