Memory Reuse

// Inefficient: Creates a new vector for each batch
fn process_batches_inefficient(data: &[i32], batch_size: usize) -> Vec<i32> {
    let mut results = Vec::new();
    
    for batch in data.chunks(batch_size) {
        let processed = batch.iter().map(|&x| x * x).collect::<Vec<_>>();
        results.extend_from_slice(&processed);
    }
    
    results
}

// Efficient: Reuses a buffer for each batch
fn process_batches_efficient(data: &[i32], batch_size: usize) -> Vec<i32> {
    let mut results = Vec::with_capacity(data.len());
    let mut buffer = Vec::with_capacity(batch_size);
    
    for batch in data.chunks(batch_size) {
        buffer.clear();
        buffer.extend(batch.iter().map(|&x| x * x));
        results.extend_from_slice(&buffer);
    }
    
    results
}

Stack vs. Heap Allocation

// Heap allocation (potentially slower)
fn process_with_heap(size: usize) -> usize {
    let data = vec![0; size]; // Allocates on the heap
    data.iter().sum()
}

// Stack allocation (potentially faster for small arrays)
fn process_with_stack() -> usize {
    let data = [0; 1024]; // Allocates on the stack (fixed size)
    data.iter().sum()
}

// Using Box for large stack allocations
fn process_large_data() -> usize {
    // This would overflow the stack if allocated directly
    let data = Box::new([0; 1_000_000]);
    data.iter().sum()
}

Algorithmic Optimizations

Often, the biggest performance gains come from algorithmic improvements:

Choosing the Right Algorithm

use std::collections::{HashMap, HashSet};
use std::time::Instant;

// O(n²) approach
fn find_duplicates_quadratic(data: &[i32]) -> Vec<i32> {
    let mut duplicates = Vec::new();
    
    for i in 0..data.len() {
        for j in i+1..data.len() {
            if data[i] == data[j] && !duplicates.contains(&data[i]) {
                duplicates.push(data[i]);
            }
        }
    }
    
    duplicates
}

// O(n) approach
fn find_duplicates_linear(data: &[i32]) -> Vec<i32> {
    let mut seen = HashSet::new();
    let mut duplicates = HashSet::new();
    
    for &item in data {
        if !seen.insert(item) {
            duplicates.insert(item);
        }
    }
    
    duplicates.into_iter().collect()
}

fn main() {
    // Generate test data
    let mut data = Vec::with_capacity(10_000);
    for i in 0..10_000 {
        data.push(i % 1000); // This ensures some duplicates
    }
    
    // Measure quadratic approach
    let start = Instant::now();
    let duplicates1 = find_duplicates_quadratic(&data);
    let duration1 = start.elapsed();
    
    // Measure linear approach
    let start = Instant::now();
    let duplicates2 = find_duplicates_linear(&data);
    let duration2 = start.elapsed();
    
    println!("Quadratic approach: {:?}", duration1);
    println!("Linear approach: {:?}", duration2);
    println!("Speedup: {:.2}x", duration1.as_secs_f64() / duration2.as_secs_f64());
}

Memoization and Caching

use std::collections::HashMap;

// Without memoization
fn fibonacci(n: u64) -> u64 {
    match n {
        0 => 0,
        1 => 1,
        _ => fibonacci(n - 1) + fibonacci(n - 2),
    }
}

// With memoization
fn fibonacci_memoized(n: u64, cache: &mut HashMap<u64, u64>) -> u64 {
    if let Some(&result) = cache.get(&n) {
        return result;
    }
    
    let result = match n {
        0 => 0,
        1 => 1,
        _ => fibonacci_memoized(n - 1, cache) + fibonacci_memoized(n - 2, cache),
    };
    
    cache.insert(n, result);
    result
}

// Using a more efficient algorithm
fn fibonacci_iterative(n: u64) -> u64 {
    if n <= 1 {
        return n;
    }
    
    let mut a = 0;
    let mut b = 1;
    
    for _ in 2..=n {
        let temp = a + b;
        a = b;
        b = temp;
    }
    
    b
}

Lazy Evaluation

use std::iter;

// Eager evaluation (computes all values upfront)
fn sum_of_squares_eager(n: usize) -> u64 {
    let mut squares = Vec::with_capacity(n);
    
    // Generate all squares first
    for i in 0..n {
        squares.push((i as u64).pow(2));
    }
    
    // Then sum them
    squares.iter().sum()
}

// Lazy evaluation (computes values on demand)
fn sum_of_squares_lazy(n: usize) -> u64 {
    (0..n).map(|i| (i as u64).pow(2)).sum()
}

// Infinite lazy sequence
fn fibonacci_sequence() -> impl Iterator<Item = u64> {
    let mut a = 0;
    let mut b = 1;
    
    iter::successors(Some(0), move |_| {
        let next = a + b;
        a = b;
        b = next;
        Some(a)
    })
}

fn main() {
    // Take the first 10 Fibonacci numbers
    let fibs: Vec<_> = fibonacci_sequence().take(10).collect();
    println!("{:?}", fibs);
}

Concurrency and Parallelism

Leveraging multiple cores can dramatically improve performance: