Benchmarking | WebReference

Benchmarking is essential for writing high-performance Rust code. Rust provides excellent tools for measuring and improving performance, with Criterion.rs being the gold standard for benchmarking.

Getting Started with Criterion.rs

Basic Setup

Add to Cargo.toml:

[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }

[[bench]]
name = "my_benchmark"
harness = false

Your First Benchmark

benches/my_benchmark.rs:

use criterion::{black_box, criterion_group, criterion_main, Criterion};

fn fibonacci_recursive(n: u64) -> u64 {
    match n {
        0 => 1,
        1 => 1,
        n => fibonacci_recursive(n - 1) + fibonacci_recursive(n - 2),
    }
}

fn fibonacci_iterative(n: u64) -> u64 {
    let mut a = 0;
    let mut b = 1;
    
    for _ in 0..n {
        let temp = a;
        a = b;
        b = temp + b;
    }
    
    b
}

fn criterion_benchmark(c: &mut Criterion) {
    c.bench_function("fibonacci recursive 20", |b| {
        b.iter(|| fibonacci_recursive(black_box(20)))
    });
    
    c.bench_function("fibonacci iterative 20", |b| {
        b.iter(|| fibonacci_iterative(black_box(20)))
    });
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

Running Benchmarks

# Run all benchmarks
cargo bench

# Run specific benchmark
cargo bench fibonacci

# Generate detailed reports
cargo bench -- --output-format html

# Compare with baseline
cargo bench -- --save-baseline my_baseline

# Compare against baseline
cargo bench -- --baseline my_baseline

Advanced Benchmarking Techniques

Parameterized Benchmarks

use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};

fn sort_algorithms_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("sorting_algorithms");
    
    // Test different input sizes
    for size in [100, 1000, 10000].iter() {
        let mut data: Vec<i32> = (0..*size).rev().collect(); // Worst case: reverse sorted
        
        group.bench_with_input(BenchmarkId::new("bubble_sort", size), size, |b, &size| {
            b.iter_with_setup(
                || data.clone(),
                |mut data| bubble_sort(black_box(&mut data))
            );
        });
        
        group.bench_with_input(BenchmarkId::new("quick_sort", size), size, |b, &size| {
            b.iter_with_setup(
                || data.clone(),
                |mut data| quick_sort(black_box(&mut data))
            );
        });
        
        group.bench_with_input(BenchmarkId::new("rust_sort", size), size, |b, &size| {
            b.iter_with_setup(
                || data.clone(),
                |mut data| data.sort_unstable()
            );
        });
    }
    
    group.finish();
}

fn data_structure_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("data_structures");
    
    // Configure benchmark parameters
    group.sample_size(1000);
    group.measurement_time(std::time::Duration::from_secs(10));
    
    // Test different data structures for lookups
    for size in [1000, 10000, 100000].iter() {
        let vec_data: Vec<i32> = (0..*size).collect();
        let mut hash_map = std::collections::HashMap::new();
        let mut btree_map = std::collections::BTreeMap::new();
        
        for &item in &vec_data {
            hash_map.insert(item, item);
            btree_map.insert(item, item);
        }
        
        let search_key = size / 2;
        
        group.bench_with_input(BenchmarkId::new("vec_linear_search", size), size, |b, _| {
            b.iter(|| vec_data.iter().find(|&&x| x == black_box(search_key)))
        });
        
        group.bench_with_input(BenchmarkId::new("vec_binary_search", size), size, |b, _| {
            b.iter(|| vec_data.binary_search(&black_box(search_key)))
        });
        
        group.bench_with_input(BenchmarkId::new("hashmap_lookup", size), size, |b, _| {
            b.iter(|| hash_map.get(&black_box(search_key)))
        });
        
        group.bench_with_input(BenchmarkId::new("btreemap_lookup", size), size, |b, _| {
            b.iter(|| btree_map.get(&black_box(search_key)))
        });
    }
    
    group.finish();
}

fn bubble_sort(arr: &mut [i32]) {
    let len = arr.len();
    for i in 0..len {
        for j in 0..len - 1 - i {
            if arr[j] > arr[j + 1] {
                arr.swap(j, j + 1);
            }
        }
    }
}

fn quick_sort(arr: &mut [i32]) {
    if arr.len() <= 1 {
        return;
    }
    
    let pivot = partition(arr);
    quick_sort(&mut arr[0..pivot]);
    quick_sort(&mut arr[pivot + 1..]);
}

fn partition(arr: &mut [i32]) -> usize {
    let len = arr.len();
    let pivot = len - 1;
    let mut i = 0;
    
    for j in 0..len - 1 {
        if arr[j] <= arr[pivot] {
            arr.swap(i, j);
            i += 1;
        }
    }
    
    arr.swap(i, pivot);
    i
}

criterion_group!(
    benches,
    sort_algorithms_benchmark,
    data_structure_benchmark
);
criterion_main!(benches);

Memory Usage Benchmarks

use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};

fn memory_allocation_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("memory_allocation");
    
    // Pre-allocate vs reallocate
    group.bench_function("vec_push_reallocate", |b| {
        b.iter(|| {
            let mut vec = Vec::new();
            for i in 0..1000 {
                vec.push(black_box(i));
            }
            vec
        })
    });
    
    group.bench_function("vec_with_capacity", |b| {
        b.iter(|| {
            let mut vec = Vec::with_capacity(1000);
            for i in 0..1000 {
                vec.push(black_box(i));
            }
            vec
        })
    });
    
    // String building strategies
    group.bench_function("string_concatenation", |b| {
        b.iter(|| {
            let mut result = String::new();
            for i in 0..100 {
                result = result + &black_box(i).to_string();
            }
            result
        })
    });
    
    group.bench_function("string_push_str", |b| {
        b.iter(|| {
            let mut result = String::new();
            for i in 0..100 {
                result.push_str(&black_box(i).to_string());
            }
            result
        })
    });
    
    group.bench_function("string_with_capacity", |b| {
        b.iter(|| {
            let mut result = String::with_capacity(1000);
            for i in 0..100 {
                result.push_str(&black_box(i).to_string());
            }
            result
        })
    });
    
    group.finish();
}

// Object pooling benchmark
struct Pool<T> {
    objects: std::sync::Mutex<Vec<T>>,
    factory: fn() -> T,
}

impl<T> Pool<T> {
    fn new(factory: fn() -> T) -> Self {
        Pool {
            objects: std::sync::Mutex::new(Vec::new()),
            factory,
        }
    }
    
    fn get(&self) -> T {
        let mut objects = self.objects.lock().unwrap();
        objects.pop().unwrap_or_else(|| (self.factory)())
    }
    
    fn put(&self, obj: T) {
        let mut objects = self.objects.lock().unwrap();
        if objects.len() < 100 { // Limit pool size
            objects.push(obj);
        }
    }
}

fn object_pooling_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("object_pooling");
    
    let pool = Pool::new(|| Vec::<i32>::with_capacity(1000));
    
    group.bench_function("no_pooling", |b| {
        b.iter(|| {
            let mut vec = Vec::with_capacity(1000);
            for i in 0..1000 {
                vec.push(black_box(i));
            }
            // Vec is dropped here
        })
    });
    
    group.bench_function("with_pooling", |b| {
        b.iter(|| {
            let mut vec = pool.get();
            vec.clear();
            for i in 0..1000 {
                vec.push(black_box(i));
            }
            pool.put(vec);
        })
    });
    
    group.finish();
}

criterion_group!(
    benches,
    memory_allocation_benchmark,
    object_pooling_benchmark
);
criterion_main!(benches);

Async Benchmarking

Tokio Runtime Benchmarks

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use tokio::runtime::Runtime;

async fn async_computation(n: u64) -> u64 {
    // Simulate async work
    tokio::time::sleep(tokio::time::Duration::from_nanos(n * 100)).await;
    n * 2
}

async fn parallel_async_work(tasks: usize) -> Vec<u64> {
    let handles: Vec<_> = (0..tasks)
        .map(|i| tokio::spawn(async_computation(i as u64)))
        .collect();
    
    let mut results = Vec::new();
    for handle in handles {
        results.push(handle.await.unwrap());
    }
    results
}

fn async_benchmark(c: &mut Criterion) {
    let rt = Runtime::new().unwrap();
    
    let mut group = c.benchmark_group("async_operations");
    
    group.bench_function("single_async_task", |b| {
        b.to_async(&rt).iter(|| async_computation(black_box(10)))
    });
    
    for task_count in [1, 10, 100].iter() {
        group.bench_with_input(
            BenchmarkId::new("parallel_tasks", task_count),
            task_count,
            |b, &task_count| {
                b.to_async(&rt).iter(|| parallel_async_work(black_box(task_count)))
            },
        );
    }
    
    group.finish();
}

// HTTP client benchmarks
async fn make_http_request(client: &reqwest::Client, url: &str) -> Result<String, reqwest::Error> {
    let response = client.get(url).send().await?;
    response.text().await
}

fn http_benchmark(c: &mut Criterion) {
    let rt = Runtime::new().unwrap();
    let client = reqwest::Client::new();
    
    let mut group = c.benchmark_group("http_requests");
    
    // Note: This would require a test server or mock service
    group.bench_function("single_request", |b| {
        b.to_async(&rt).iter(|| {
            make_http_request(&client, black_box("http://httpbin.org/json"))
        })
    });
    
    group.bench_function("concurrent_requests", |b| {
        b.to_async(&rt).iter(|| async {
            let handles: Vec<_> = (0..10)
                .map(|_| make_http_request(&client, "http://httpbin.org/json"))
                .collect();
            
            futures::future::try_join_all(handles).await
        })
    });
    
    group.finish();
}

criterion_group!(benches, async_benchmark, http_benchmark);
criterion_main!(benches);

Micro vs Macro Benchmarks

Micro-benchmarks

use criterion::{black_box, criterion_group, criterion_main, Criterion};

// Micro-benchmark: Focus on small, isolated operations
fn string_operations_micro(c: &mut Criterion) {
    let mut group = c.benchmark_group("string_micro");
    
    let test_string = "Hello, World! This is a test string for benchmarking.";
    
    group.bench_function("string_clone", |b| {
        b.iter(|| black_box(test_string).to_owned())
    });
    
    group.bench_function("string_chars_count", |b| {
        b.iter(|| black_box(test_string).chars().count())
    });
    
    group.bench_function("string_len", |b| {
        b.iter(|| black_box(test_string).len())
    });
    
    group.bench_function("string_contains", |b| {
        b.iter(|| black_box(test_string).contains(black_box("test")))
    });
    
    group.bench_function("string_split", |b| {
        b.iter(|| black_box(test_string).split(' ').collect::<Vec<_>>())
    });
    
    group.finish();
}

// Math operations micro-benchmarks
fn math_operations_micro(c: &mut Criterion) {
    let mut group = c.benchmark_group("math_micro");
    
    let x = 12345.6789f64;
    let y = 9876.5432f64;
    
    group.bench_function("float_add", |b| {
        b.iter(|| black_box(x) + black_box(y))
    });
    
    group.bench_function("float_mul", |b| {
        b.iter(|| black_box(x) * black_box(y))
    });
    
    group.bench_function("float_div", |b| {
        b.iter(|| black_box(x) / black_box(y))
    });
    
    group.bench_function("float_sqrt", |b| {
        b.iter(|| black_box(x).sqrt())
    });
    
    group.bench_function("float_sin", |b| {
        b.iter(|| black_box(x).sin())
    });
    
    group.finish();
}

criterion_group!(
    micro_benches,
    string_operations_micro,
    math_operations_micro
);

Macro-benchmarks

// Macro-benchmark: Test complete workflows or larger operations
fn json_processing_macro(c: &mut Criterion) {
    let mut group = c.benchmark_group("json_macro");
    
    let sample_data = r#"
    {
        "users": [
            {"id": 1, "name": "Alice", "posts": [{"title": "Hello", "content": "World"}]},
            {"id": 2, "name": "Bob", "posts": [{"title": "Test", "content": "Data"}]}
        ],
        "metadata": {
            "version": "1.0",
            "created": "2023-01-01"
        }
    }
    "#;
    
    group.bench_function("parse_and_process_json", |b| {
        b.iter(|| {
            let data: serde_json::Value = serde_json::from_str(black_box(sample_data)).unwrap();
            
            // Process the data
            let mut user_count = 0;
            let mut post_count = 0;
            
            if let Some(users) = data["users"].as_array() {
                user_count = users.len();
                for user in users {
                    if let Some(posts) = user["posts"].as_array() {
                        post_count += posts.len();
                    }
                }
            }
            
            (user_count, post_count)
        })
    });
    
    group.finish();
}

// File I/O macro-benchmark
fn file_io_macro(c: &mut Criterion) {
    use std::io::{Write, Read};
    use tempfile::NamedTempFile;
    
    let mut group = c.benchmark_group("file_io_macro");
    
    let test_data = "x".repeat(10000); // 10KB of data
    
    group.bench_function("write_read_file", |b| {
        b.iter(|| {
            // Write data to temporary file
            let mut temp_file = NamedTempFile::new().unwrap();
            temp_file.write_all(black_box(test_data.as_bytes())).unwrap();
            temp_file.flush().unwrap();
            
            // Read data back
            let mut file = std::fs::File::open(temp_file.path()).unwrap();
            let mut buffer = String::new();
            file.read_to_string(&mut buffer).unwrap();
            
            buffer.len()
        })
    });
    
    group.finish();
}

criterion_group!(
    macro_benches,
    json_processing_macro,
    file_io_macro
);

criterion_main!(micro_benches, macro_benches);

Statistical Analysis and Reporting

Custom Measurement and Analysis

use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId, Throughput};
use std::time::Duration;

fn throughput_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("throughput");
    
    for size in [1_000, 10_000, 100_000].iter() {
        // Set throughput for bytes processed
        group.throughput(Throughput::Bytes(*size as u64));
        
        group.bench_with_input(BenchmarkId::new("process_bytes", size), size, |b, &size| {
            let data = vec![0u8; size];
            b.iter(|| {
                // Simulate processing each byte
                let mut sum = 0u64;
                for &byte in black_box(&data) {
                    sum = sum.wrapping_add(byte as u64);
                }
                sum
            });
        });
    }
    
    group.finish();
}

fn statistical_analysis_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("statistical_analysis");
    
    // Configure measurement parameters
    group.sample_size(1000);                                    // Number of samples
    group.measurement_time(Duration::from_secs(30));            // Total measurement time
    group.warm_up_time(Duration::from_secs(3));                 // Warm-up time
    group.confidence_level(0.95);                               // Confidence level
    group.significance_level(0.05);                             // Significance level
    group.noise_threshold(0.02);                                // Noise threshold (2%)
    
    group.bench_function("algorithm_with_variance", |b| {
        use rand::Rng;
        let mut rng = rand::thread_rng();
        
        b.iter(|| {
            // Simulate algorithm with some variance
            let work_amount = rng.gen_range(1000..2000);
            let mut sum = 0;
            for i in 0..work_amount {
                sum += black_box(i * i);
            }
            sum
        });
    });
    
    group.finish();
}

criterion_group!(
    benches,
    throughput_benchmark,
    statistical_analysis_benchmark
);
criterion_main!(benches);

Custom Measurement

use criterion::{criterion_group, criterion_main, Criterion, measurement::Measurement};
use std::time::{Duration, Instant};

// Custom measurement that tracks both time and memory allocations
struct CustomMeasurement;

impl Measurement for CustomMeasurement {
    type Intermediate = (Duration, usize);
    type Value = (f64, f64);

    fn start(&self) -> Self::Intermediate {
        // In a real implementation, you'd hook into the allocator here
        let start_time = Instant::now();
        let start_allocs = 0; // Placeholder for allocation count
        (start_time.elapsed(), start_allocs)
    }

    fn end(&self, i: Self::Intermediate) -> Self::Value {
        let end_time = Instant::now();
        let end_allocs = 0; // Placeholder for allocation count
        
        let elapsed = end_time.duration_since(i.0);
        let allocs = end_allocs - i.1;
        
        (elapsed.as_nanos() as f64, allocs as f64)
    }

    fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value {
        (v1.0 + v2.0, v1.1 + v2.1)
    }

    fn zero(&self) -> Self::Value {
        (0.0, 0.0)
    }

    fn to_f64(&self, value: &Self::Value) -> f64 {
        value.0 // Return time in nanoseconds
    }

    fn formatter(&self) -> &dyn criterion::measurement::ValueFormatter {
        &criterion::measurement::WallTime
    }
}

fn custom_measurement_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("custom_measurement");
    
    group.bench_function("allocation_heavy", |b| {
        b.iter(|| {
            let mut vecs = Vec::new();
            for i in 0..100 {
                let mut vec = Vec::with_capacity(i);
                for j in 0..i {
                    vec.push(black_box(j));
                }
                vecs.push(vec);
            }
            vecs
        });
    });
    
    group.finish();
}

criterion_group!(benches, custom_measurement_benchmark);
criterion_main!(benches);

Profiling and Optimization

CPU Profiling Integration

// Cargo.toml additions for profiling
/*
[profile.release]
debug = true  # Enable debug symbols for profiling

[profile.bench]
debug = true
*/

use criterion::{black_box, criterion_group, criterion_main, Criterion, profiler::PProfProfiler};

// CPU-intensive algorithm for profiling
fn matrix_multiplication(a: &[Vec<f64>], b: &[Vec<f64>]) -> Vec<Vec<f64>> {
    let rows_a = a.len();
    let cols_a = a[0].len();
    let cols_b = b[0].len();
    
    let mut result = vec![vec![0.0; cols_b]; rows_a];
    
    for i in 0..rows_a {
        for j in 0..cols_b {
            for k in 0..cols_a {
                result[i][j] += a[i][k] * b[k][j];
            }
        }
    }
    
    result
}

fn cpu_intensive_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("cpu_intensive");
    
    // Enable profiling
    group.with_profiler(PProfProfiler::new(100, pprof::Output::Flamegraph(None)));
    
    for size in [10, 50, 100].iter() {
        let matrix_a: Vec<Vec<f64>> = (0..*size)
            .map(|i| (0..*size).map(|j| (i * j) as f64).collect())
            .collect();
        
        let matrix_b: Vec<Vec<f64>> = (0..*size)
            .map(|i| (0..*size).map(|j| (i + j) as f64).collect())
            .collect();
        
        group.bench_with_input(
            BenchmarkId::new("matrix_mult", size),
            size,
            |b, _| {
                b.iter(|| matrix_multiplication(black_box(&matrix_a), black_box(&matrix_b)))
            },
        );
    }
    
    group.finish();
}

criterion_group!(benches, cpu_intensive_benchmark);
criterion_main!(benches);

Memory Profiling

# Using valgrind for memory profiling
cargo bench --bench memory_benchmark -- --profile-time=5

# Using heaptrack
heaptrack cargo bench --bench memory_benchmark

# Using massif (valgrind tool)
valgrind --tool=massif cargo bench --bench memory_benchmark

# Generate flamegraphs
cargo install flamegraph
cargo flamegraph --bench cpu_benchmark

Optimization Techniques

use criterion::{black_box, criterion_group, criterion_main, Criterion};

// Compare different optimization approaches
fn optimization_comparison(c: &mut Criterion) {
    let mut group = c.benchmark_group("optimization_comparison");
    
    let data: Vec<i32> = (0..10000).collect();
    
    // Naive approach
    group.bench_function("naive_sum", |b| {
        b.iter(|| {
            let mut sum = 0;
            for item in black_box(&data) {
                sum += item;
            }
            sum
        });
    });
    
    // Iterator approach
    group.bench_function("iterator_sum", |b| {
        b.iter(|| black_box(&data).iter().sum::<i32>())
    });
    
    // Parallel processing with rayon
    group.bench_function("parallel_sum", |b| {
        b.iter(|| {
            use rayon::prelude::*;
            black_box(&data).par_iter().sum::<i32>()
        });
    });
    
    // SIMD optimization (requires nightly or specific CPU features)
    group.bench_function("simd_sum", |b| {
        b.iter(|| {
            // Simplified SIMD example
            let mut sum = 0;
            let chunks = black_box(&data).chunks_exact(4);
            let remainder = chunks.remainder();
            
            for chunk in chunks {
                sum += chunk[0] + chunk[1] + chunk[2] + chunk[3];
            }
            
            for &item in remainder {
                sum += item;
            }
            
            sum
        });
    });
    
    group.finish();
}

// Cache-friendly vs cache-unfriendly access patterns
fn cache_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("cache_access");
    
    const SIZE: usize = 1024 * 1024; // 1MB of data
    let data: Vec<i32> = (0..SIZE).map(|i| i as i32).collect();
    
    // Sequential access (cache-friendly)
    group.bench_function("sequential_access", |b| {
        b.iter(|| {
            let mut sum = 0;
            for i in 0..SIZE {
                sum += black_box(data[i]);
            }
            sum
        });
    });
    
    // Random access (cache-unfriendly)
    group.bench_function("random_access", |b| {
        let indices: Vec<usize> = {
            use rand::seq::SliceRandom;
            let mut indices: Vec<usize> = (0..SIZE).collect();
            indices.shuffle(&mut rand::thread_rng());
            indices
        };
        
        b.iter(|| {
            let mut sum = 0;
            for &i in &indices {
                sum += black_box(data[i]);
            }
            sum
        });
    });
    
    // Strided access
    group.bench_function("strided_access", |b| {
        b.iter(|| {
            let mut sum = 0;
            let stride = 64; // Every 64th element
            for i in (0..SIZE).step_by(stride) {
                sum += black_box(data[i]);
            }
            sum
        });
    });
    
    group.finish();
}

criterion_group!(
    benches,
    optimization_comparison,
    cache_benchmark
);
criterion_main!(benches);

Real-World Benchmarking Examples

Database Performance

use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};
use std::collections::HashMap;

// Simulate different database-like operations
#[derive(Clone)]
struct InMemoryDb {
    data: HashMap<String, String>,
}

impl InMemoryDb {
    fn new() -> Self {
        InMemoryDb {
            data: HashMap::new(),
        }
    }
    
    fn insert(&mut self, key: String, value: String) {
        self.data.insert(key, value);
    }
    
    fn get(&self, key: &str) -> Option<&String> {
        self.data.get(key)
    }
    
    fn scan(&self, prefix: &str) -> Vec<(&String, &String)> {
        self.data
            .iter()
            .filter(|(k, _)| k.starts_with(prefix))
            .collect()
    }
}

fn database_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("database_operations");
    
    for size in [1_000, 10_000, 100_000].iter() {
        let mut db = InMemoryDb::new();
        
        // Pre-populate database
        for i in 0..*size {
            db.insert(format!("key_{:06}", i), format!("value_{}", i));
        }
        
        group.bench_with_input(
            BenchmarkId::new("insert", size),
            size,
            |b, _| {
                let mut db = db.clone();
                let mut counter = *size;
                b.iter(|| {
                    db.insert(
                        format!("new_key_{}", counter),
                        black_box(format!("new_value_{}", counter))
                    );
                    counter += 1;
                });
            },
        );
        
        group.bench_with_input(
            BenchmarkId::new("lookup", size),
            size,
            |b, &size| {
                b.iter(|| {
                    let key = format!("key_{:06}", black_box(size / 2));
                    db.get(&key)
                });
            },
        );
        
        group.bench_with_input(
            BenchmarkId::new("scan", size),
            size,
            |b, _| {
                b.iter(|| db.scan(black_box("key_0001")));
            },
        );
    }
    
    group.finish();
}

criterion_group!(benches, database_benchmark);
criterion_main!(benches);

Serialization Performance

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use serde::{Deserialize, Serialize};

#[derive(Serialize, Deserialize, Clone)]
struct TestData {
    id: u64,
    name: String,
    values: Vec<f64>,
    metadata: std::collections::HashMap<String, String>,
}

impl TestData {
    fn generate(size: usize) -> Self {
        let mut metadata = std::collections::HashMap::new();
        for i in 0..10 {
            metadata.insert(format!("key_{}", i), format!("value_{}", i));
        }
        
        TestData {
            id: 12345,
            name: "Test Object".to_string(),
            values: (0..size).map(|i| i as f64 * 0.1).collect(),
            metadata,
        }
    }
}

fn serialization_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("serialization");
    
    let test_data = TestData::generate(1000);
    
    // JSON serialization
    group.bench_function("json_serialize", |b| {
        b.iter(|| serde_json::to_string(black_box(&test_data)).unwrap())
    });
    
    let json_data = serde_json::to_string(&test_data).unwrap();
    group.bench_function("json_deserialize", |b| {
        b.iter(|| {
            let _: TestData = serde_json::from_str(black_box(&json_data)).unwrap();
        })
    });
    
    // Binary serialization with bincode
    group.bench_function("bincode_serialize", |b| {
        b.iter(|| bincode::serialize(black_box(&test_data)).unwrap())
    });
    
    let binary_data = bincode::serialize(&test_data).unwrap();
    group.bench_function("bincode_deserialize", |b| {
        b.iter(|| {
            let _: TestData = bincode::deserialize(black_box(&binary_data)).unwrap();
        })
    });
    
    // MessagePack serialization
    group.bench_function("msgpack_serialize", |b| {
        b.iter(|| rmp_serde::to_vec(black_box(&test_data)).unwrap())
    });
    
    let msgpack_data = rmp_serde::to_vec(&test_data).unwrap();
    group.bench_function("msgpack_deserialize", |b| {
        b.iter(|| {
            let _: TestData = rmp_serde::from_slice(black_box(&msgpack_data)).unwrap();
        })
    });
    
    group.finish();
}

criterion_group!(benches, serialization_benchmark);
criterion_main!(benches);

Continuous Integration and Performance Regression

CI Integration

.github/workflows/benchmark.yml:

name: Benchmark

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]

jobs:
  benchmark:
    runs-on: ubuntu-latest
    
    steps:
    - uses: actions/checkout@v3
    
    - name: Install Rust
      uses: actions-rs/toolchain@v1
      with:
        toolchain: stable
        override: true
        
    - name: Cache Cargo
      uses: actions/cache@v3
      with:
        path: |
          ~/.cargo/bin/
          ~/.cargo/registry/index/
          ~/.cargo/registry/cache/
          ~/.cargo/git/db/
          target/
        key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
        
    - name: Run benchmarks
      run: cargo bench --bench main_benchmark
      
    - name: Store benchmark result
      uses: benchmark-action/github-action-benchmark@v1
      with:
        tool: 'cargo'
        output-file-path: target/criterion/main_benchmark/base/estimates.json
        github-token: ${{ secrets.GITHUB_TOKEN }}
        auto-push: true
        # Show alert when performance degrades
        alert-threshold: '200%'
        comment-on-alert: true
        fail-on-alert: true

Performance Regression Detection

// Cargo.toml
/*
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
iai = "0.1"  # Instruction-level benchmarking
*/

use criterion::{black_box, criterion_group, criterion_main, Criterion};

// Critical path benchmarks that should not regress
fn critical_path_benchmarks(c: &mut Criterion) {
    let mut group = c.benchmark_group("critical_path");
    
    // Set strict performance thresholds
    group.noise_threshold(0.01); // 1% noise threshold
    group.confidence_level(0.99); // 99% confidence
    
    // Core algorithm that must maintain performance
    group.bench_function("core_algorithm", |b| {
        let data: Vec<i32> = (0..10000).collect();
        b.iter(|| {
            // This is a critical algorithm - any regression should be caught
            data.iter()
                .filter(|&&x| x % 2 == 0)
                .map(|&x| x * x)
                .sum::<i32>()
        });
    });
    
    // Memory allocation patterns that shouldn't regress
    group.bench_function("allocation_pattern", |b| {
        b.iter(|| {
            let mut vecs = Vec::with_capacity(100);
            for i in 0..100 {
                let mut vec = Vec::with_capacity(i);
                for j in 0..i {
                    vec.push(black_box(j));
                }
                vecs.push(vec);
            }
            vecs
        });
    });
    
    group.finish();
}

// Instruction-level benchmarking for precise measurement
use iai::main;

fn iai_fibonacci(n: u64) -> u64 {
    match n {
        0 => 1,
        1 => 1,
        n => iai_fibonacci(n - 1) + iai_fibonacci(n - 2),
    }
}

fn iai_fibonacci_20() -> u64 {
    iai_fibonacci(20)
}

fn iai_vector_sum() -> i32 {
    (0..10000).sum()
}

// IAI benchmarks count CPU instructions and are deterministic
iai::main!(iai_fibonacci_20, iai_vector_sum);

criterion_group!(benches, critical_path_benchmarks);
criterion_main!(benches);

Best Practices for Benchmarking

Avoiding Common Pitfalls

use criterion::{black_box, criterion_group, criterion_main, Criterion};

fn benchmarking_pitfalls(c: &mut Criterion) {
    let mut group = c.benchmark_group("pitfalls");
    
    // PITFALL 1: Dead code elimination
    // BAD: Compiler might optimize away the computation
    group.bench_function("bad_dead_code", |b| {
        b.iter(|| {
            let result = expensive_computation(42);
            // Result is not used - might be optimized away!
        });
    });
    
    // GOOD: Use black_box to prevent optimization
    group.bench_function("good_black_box", |b| {
        b.iter(|| {
            let result = expensive_computation(black_box(42));
            black_box(result); // Prevent dead code elimination
        });
    });
    
    // PITFALL 2: Constant folding
    // BAD: Using constants that compiler can optimize
    group.bench_function("bad_constant_folding", |b| {
        b.iter(|| {
            expensive_computation(42) // Compiler might precompute this
        });
    });
    
    // GOOD: Use black_box for inputs
    group.bench_function("good_prevent_folding", |b| {
        b.iter(|| {
            expensive_computation(black_box(42))
        });
    });
    
    // PITFALL 3: Setup overhead in benchmark
    // BAD: Including setup time in measurement
    group.bench_function("bad_setup_overhead", |b| {
        b.iter(|| {
            let data = vec![1, 2, 3, 4, 5]; // Setup inside iter
            process_data(&data)
        });
    });
    
    // GOOD: Use iter_with_setup to exclude setup
    group.bench_function("good_separate_setup", |b| {
        b.iter_with_setup(
            || vec![1, 2, 3, 4, 5], // Setup
            |data| process_data(&data) // Actual benchmark
        );
    });
    
    // PITFALL 4: Incorrect batching
    let mut shared_state = Vec::new();
    
    // BAD: State persists between iterations
    group.bench_function("bad_shared_state", |b| {
        b.iter(|| {
            shared_state.push(expensive_computation(black_box(42)));
            shared_state.len() // State grows, affecting later measurements
        });
    });
    
    // GOOD: Reset state between iterations
    group.bench_function("good_isolated_state", |b| {
        b.iter_with_setup(
            || Vec::new(), // Fresh state for each iteration
            |mut state| {
                state.push(expensive_computation(black_box(42)));
                state.len()
            }
        );
    });
    
    group.finish();
}

fn expensive_computation(n: i32) -> i32 {
    // Simulate some work
    (0..n).map(|i| i * i).sum()
}

fn process_data(data: &[i32]) -> i32 {
    data.iter().sum()
}

// Best practices for benchmark organization
fn well_organized_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("string_operations");
    
    // Configure group settings
    group.sample_size(1000);
    group.measurement_time(std::time::Duration::from_secs(10));
    group.warm_up_time(std::time::Duration::from_secs(2));
    
    // Test with multiple input sizes to understand scaling
    for size in [100, 1000, 10000].iter() {
        let test_string = "a".repeat(*size);
        
        group.bench_with_input(
            BenchmarkId::new("uppercase", size),
            size,
            |b, _| {
                b.iter(|| black_box(&test_string).to_uppercase())
            },
        );
        
        group.bench_with_input(
            BenchmarkId::new("reverse", size),
            size,
            |b, _| {
                b.iter(|| black_box(&test_string).chars().rev().collect::<String>())
            },
        );
    }
    
    group.finish();
}

criterion_group!(
    benches,
    benchmarking_pitfalls,
    well_organized_benchmark
);
criterion_main!(benches);

Benchmarking Guidelines

Use black_box to prevent compiler optimizations
Separate setup from measurement using iter_with_setup
Test multiple input sizes to understand algorithmic complexity
Run benchmarks on dedicated hardware for consistent results
Use sufficient sample sizes for statistical significance
Compare against baselines to detect regressions
Document benchmark assumptions and environment requirements
Focus on representative workloads rather than synthetic tests

Benchmarking in Rust provides powerful tools for measuring and optimizing performance. Use Criterion.rs for statistical rigor, combine micro and macro benchmarks for comprehensive coverage, and integrate performance testing into your development workflow to catch regressions early.