Parallel Iterators with Rayon
use rayon::prelude::*;
use std::time::Instant;
// Sequential implementation
fn sum_of_squares_sequential(data: &[i32]) -> i64 {
data.iter()
.map(|&x| (x as i64).pow(2))
.sum()
}
// Parallel implementation
fn sum_of_squares_parallel(data: &[i32]) -> i64 {
data.par_iter()
.map(|&x| (x as i64).pow(2))
.sum()
}
fn main() {
// Generate test data
let data: Vec<i32> = (0..10_000_000).collect();
// Measure sequential implementation
let start = Instant::now();
let result1 = sum_of_squares_sequential(&data);
let duration1 = start.elapsed();
// Measure parallel implementation
let start = Instant::now();
let result2 = sum_of_squares_parallel(&data);
let duration2 = start.elapsed();
assert_eq!(result1, result2);
println!("Sequential: {:?}", duration1);
println!("Parallel: {:?}", duration2);
println!("Speedup: {:.2}x", duration1.as_secs_f64() / duration2.as_secs_f64());
}
Thread Pools
use std::sync::mpsc;
use std::thread;
use threadpool::ThreadPool;
fn process_data_sequential(data: &[i32]) -> Vec<i32> {
data.iter().map(|&x| {
// Simulate expensive computation
thread::sleep(std::time::Duration::from_millis(1));
x * x
}).collect()
}
fn process_data_parallel(data: &[i32], num_threads: usize) -> Vec<i32> {
let pool = ThreadPool::new(num_threads);
let (tx, rx) = mpsc::channel();
for (i, &item) in data.iter().enumerate() {
let tx = tx.clone();
pool.execute(move || {
// Simulate expensive computation
thread::sleep(std::time::Duration::from_millis(1));
tx.send((i, item * item)).expect("Channel send failed");
});
}
// Drop the original sender to allow the channel to close
drop(tx);
// Collect results in the correct order
let mut results = vec![0; data.len()];
for (i, result) in rx.iter() {
results[i] = result;
}
results
}
Async/Await for I/O-Bound Tasks
use tokio::fs::File;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use futures::stream::{self, StreamExt};
async fn process_file(path: &str) -> Result<usize, std::io::Error> {
let mut file = File::open(path).await?;
let mut contents = Vec::new();
file.read_to_end(&mut contents).await?;
// Process the contents
let processed = contents.iter().map(|&b| b.wrapping_add(1)).collect::<Vec<_>>();
// Write the processed contents
let mut output = File::create(format!("{}.processed", path)).await?;
output.write_all(&processed).await?;
Ok(processed.len())
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let files = vec!["file1.txt", "file2.txt", "file3.txt"];
// Process files concurrently
let results = stream::iter(files)
.map(|path| async move {
match process_file(path).await {
Ok(size) => println!("Processed {} ({} bytes)", path, size),
Err(e) => eprintln!("Error processing {}: {}", path, e),
}
})
.buffer_unordered(10) // Process up to 10 files concurrently
.collect::<Vec<_>>()
.await;
Ok(())
}
Compiler and Build Optimizations
Rust’s compiler offers various optimization options:
Optimization Levels
# Cargo.toml
# Debug profile with some optimizations
[profile.dev]
opt-level = 1 # Basic optimizations
debug = true # Include debug info
# Release profile with maximum optimizations
[profile.release]
opt-level = 3 # Maximum optimizations
lto = "fat" # Link-time optimization
codegen-units = 1 # Optimize across the entire codebase
panic = "abort" # Smaller binary size by not unwinding on panic
strip = true # Strip symbols from binary
Link-Time Optimization (LTO)
# Cargo.toml
[profile.release]
# Enable LTO for better cross-module optimizations
lto = true # Default LTO
# lto = "thin" # Faster compilation, slightly less optimization
# lto = "fat" # Maximum optimization, slower compilation
Profile-Guided Optimization (PGO)
# Step 1: Compile with instrumentation
RUSTFLAGS="-Cprofile-generate=/tmp/pgo-data" cargo build --release
# Step 2: Run the program to collect profile data
./target/release/my_program --typical-workload
# Step 3: Merge the profile data
llvm-profdata merge -o /tmp/pgo-data/merged.profdata /tmp/pgo-data
# Step 4: Compile with the profile data
RUSTFLAGS="-Cprofile-use=/tmp/pgo-data/merged.profdata" cargo build --release