Benchmarking
Benchmarking is essential for writing high-performance Rust code. Rust provides excellent tools for measuring and improving performance, with Criterion.rs being the gold standard for benchmarking.
Getting Started with Criterion.rs
Basic Setup
Add to Cargo.toml
:
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "my_benchmark"
harness = false
Your First Benchmark
benches/my_benchmark.rs
:
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn fibonacci_recursive(n: u64) -> u64 {
match n {
0 => 1,
1 => 1,
n => fibonacci_recursive(n - 1) + fibonacci_recursive(n - 2),
}
}
fn fibonacci_iterative(n: u64) -> u64 {
let mut a = 0;
let mut b = 1;
for _ in 0..n {
let temp = a;
a = b;
b = temp + b;
}
b
}
fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("fibonacci recursive 20", |b| {
b.iter(|| fibonacci_recursive(black_box(20)))
});
c.bench_function("fibonacci iterative 20", |b| {
b.iter(|| fibonacci_iterative(black_box(20)))
});
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
Running Benchmarks
# Run all benchmarks
cargo bench
# Run specific benchmark
cargo bench fibonacci
# Generate detailed reports
cargo bench -- --output-format html
# Compare with baseline
cargo bench -- --save-baseline my_baseline
# Compare against baseline
cargo bench -- --baseline my_baseline
Advanced Benchmarking Techniques
Parameterized Benchmarks
use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};
fn sort_algorithms_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("sorting_algorithms");
// Test different input sizes
for size in [100, 1000, 10000].iter() {
let mut data: Vec<i32> = (0..*size).rev().collect(); // Worst case: reverse sorted
group.bench_with_input(BenchmarkId::new("bubble_sort", size), size, |b, &size| {
b.iter_with_setup(
|| data.clone(),
|mut data| bubble_sort(black_box(&mut data))
);
});
group.bench_with_input(BenchmarkId::new("quick_sort", size), size, |b, &size| {
b.iter_with_setup(
|| data.clone(),
|mut data| quick_sort(black_box(&mut data))
);
});
group.bench_with_input(BenchmarkId::new("rust_sort", size), size, |b, &size| {
b.iter_with_setup(
|| data.clone(),
|mut data| data.sort_unstable()
);
});
}
group.finish();
}
fn data_structure_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("data_structures");
// Configure benchmark parameters
group.sample_size(1000);
group.measurement_time(std::time::Duration::from_secs(10));
// Test different data structures for lookups
for size in [1000, 10000, 100000].iter() {
let vec_data: Vec<i32> = (0..*size).collect();
let mut hash_map = std::collections::HashMap::new();
let mut btree_map = std::collections::BTreeMap::new();
for &item in &vec_data {
hash_map.insert(item, item);
btree_map.insert(item, item);
}
let search_key = size / 2;
group.bench_with_input(BenchmarkId::new("vec_linear_search", size), size, |b, _| {
b.iter(|| vec_data.iter().find(|&&x| x == black_box(search_key)))
});
group.bench_with_input(BenchmarkId::new("vec_binary_search", size), size, |b, _| {
b.iter(|| vec_data.binary_search(&black_box(search_key)))
});
group.bench_with_input(BenchmarkId::new("hashmap_lookup", size), size, |b, _| {
b.iter(|| hash_map.get(&black_box(search_key)))
});
group.bench_with_input(BenchmarkId::new("btreemap_lookup", size), size, |b, _| {
b.iter(|| btree_map.get(&black_box(search_key)))
});
}
group.finish();
}
fn bubble_sort(arr: &mut [i32]) {
let len = arr.len();
for i in 0..len {
for j in 0..len - 1 - i {
if arr[j] > arr[j + 1] {
arr.swap(j, j + 1);
}
}
}
}
fn quick_sort(arr: &mut [i32]) {
if arr.len() <= 1 {
return;
}
let pivot = partition(arr);
quick_sort(&mut arr[0..pivot]);
quick_sort(&mut arr[pivot + 1..]);
}
fn partition(arr: &mut [i32]) -> usize {
let len = arr.len();
let pivot = len - 1;
let mut i = 0;
for j in 0..len - 1 {
if arr[j] <= arr[pivot] {
arr.swap(i, j);
i += 1;
}
}
arr.swap(i, pivot);
i
}
criterion_group!(
benches,
sort_algorithms_benchmark,
data_structure_benchmark
);
criterion_main!(benches);
Memory Usage Benchmarks
use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};
fn memory_allocation_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("memory_allocation");
// Pre-allocate vs reallocate
group.bench_function("vec_push_reallocate", |b| {
b.iter(|| {
let mut vec = Vec::new();
for i in 0..1000 {
vec.push(black_box(i));
}
vec
})
});
group.bench_function("vec_with_capacity", |b| {
b.iter(|| {
let mut vec = Vec::with_capacity(1000);
for i in 0..1000 {
vec.push(black_box(i));
}
vec
})
});
// String building strategies
group.bench_function("string_concatenation", |b| {
b.iter(|| {
let mut result = String::new();
for i in 0..100 {
result = result + &black_box(i).to_string();
}
result
})
});
group.bench_function("string_push_str", |b| {
b.iter(|| {
let mut result = String::new();
for i in 0..100 {
result.push_str(&black_box(i).to_string());
}
result
})
});
group.bench_function("string_with_capacity", |b| {
b.iter(|| {
let mut result = String::with_capacity(1000);
for i in 0..100 {
result.push_str(&black_box(i).to_string());
}
result
})
});
group.finish();
}
// Object pooling benchmark
struct Pool<T> {
objects: std::sync::Mutex<Vec<T>>,
factory: fn() -> T,
}
impl<T> Pool<T> {
fn new(factory: fn() -> T) -> Self {
Pool {
objects: std::sync::Mutex::new(Vec::new()),
factory,
}
}
fn get(&self) -> T {
let mut objects = self.objects.lock().unwrap();
objects.pop().unwrap_or_else(|| (self.factory)())
}
fn put(&self, obj: T) {
let mut objects = self.objects.lock().unwrap();
if objects.len() < 100 { // Limit pool size
objects.push(obj);
}
}
}
fn object_pooling_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("object_pooling");
let pool = Pool::new(|| Vec::<i32>::with_capacity(1000));
group.bench_function("no_pooling", |b| {
b.iter(|| {
let mut vec = Vec::with_capacity(1000);
for i in 0..1000 {
vec.push(black_box(i));
}
// Vec is dropped here
})
});
group.bench_function("with_pooling", |b| {
b.iter(|| {
let mut vec = pool.get();
vec.clear();
for i in 0..1000 {
vec.push(black_box(i));
}
pool.put(vec);
})
});
group.finish();
}
criterion_group!(
benches,
memory_allocation_benchmark,
object_pooling_benchmark
);
criterion_main!(benches);
Async Benchmarking
Tokio Runtime Benchmarks
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use tokio::runtime::Runtime;
async fn async_computation(n: u64) -> u64 {
// Simulate async work
tokio::time::sleep(tokio::time::Duration::from_nanos(n * 100)).await;
n * 2
}
async fn parallel_async_work(tasks: usize) -> Vec<u64> {
let handles: Vec<_> = (0..tasks)
.map(|i| tokio::spawn(async_computation(i as u64)))
.collect();
let mut results = Vec::new();
for handle in handles {
results.push(handle.await.unwrap());
}
results
}
fn async_benchmark(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let mut group = c.benchmark_group("async_operations");
group.bench_function("single_async_task", |b| {
b.to_async(&rt).iter(|| async_computation(black_box(10)))
});
for task_count in [1, 10, 100].iter() {
group.bench_with_input(
BenchmarkId::new("parallel_tasks", task_count),
task_count,
|b, &task_count| {
b.to_async(&rt).iter(|| parallel_async_work(black_box(task_count)))
},
);
}
group.finish();
}
// HTTP client benchmarks
async fn make_http_request(client: &reqwest::Client, url: &str) -> Result<String, reqwest::Error> {
let response = client.get(url).send().await?;
response.text().await
}
fn http_benchmark(c: &mut Criterion) {
let rt = Runtime::new().unwrap();
let client = reqwest::Client::new();
let mut group = c.benchmark_group("http_requests");
// Note: This would require a test server or mock service
group.bench_function("single_request", |b| {
b.to_async(&rt).iter(|| {
make_http_request(&client, black_box("http://httpbin.org/json"))
})
});
group.bench_function("concurrent_requests", |b| {
b.to_async(&rt).iter(|| async {
let handles: Vec<_> = (0..10)
.map(|_| make_http_request(&client, "http://httpbin.org/json"))
.collect();
futures::future::try_join_all(handles).await
})
});
group.finish();
}
criterion_group!(benches, async_benchmark, http_benchmark);
criterion_main!(benches);
Micro vs Macro Benchmarks
Micro-benchmarks
use criterion::{black_box, criterion_group, criterion_main, Criterion};
// Micro-benchmark: Focus on small, isolated operations
fn string_operations_micro(c: &mut Criterion) {
let mut group = c.benchmark_group("string_micro");
let test_string = "Hello, World! This is a test string for benchmarking.";
group.bench_function("string_clone", |b| {
b.iter(|| black_box(test_string).to_owned())
});
group.bench_function("string_chars_count", |b| {
b.iter(|| black_box(test_string).chars().count())
});
group.bench_function("string_len", |b| {
b.iter(|| black_box(test_string).len())
});
group.bench_function("string_contains", |b| {
b.iter(|| black_box(test_string).contains(black_box("test")))
});
group.bench_function("string_split", |b| {
b.iter(|| black_box(test_string).split(' ').collect::<Vec<_>>())
});
group.finish();
}
// Math operations micro-benchmarks
fn math_operations_micro(c: &mut Criterion) {
let mut group = c.benchmark_group("math_micro");
let x = 12345.6789f64;
let y = 9876.5432f64;
group.bench_function("float_add", |b| {
b.iter(|| black_box(x) + black_box(y))
});
group.bench_function("float_mul", |b| {
b.iter(|| black_box(x) * black_box(y))
});
group.bench_function("float_div", |b| {
b.iter(|| black_box(x) / black_box(y))
});
group.bench_function("float_sqrt", |b| {
b.iter(|| black_box(x).sqrt())
});
group.bench_function("float_sin", |b| {
b.iter(|| black_box(x).sin())
});
group.finish();
}
criterion_group!(
micro_benches,
string_operations_micro,
math_operations_micro
);
Macro-benchmarks
// Macro-benchmark: Test complete workflows or larger operations
fn json_processing_macro(c: &mut Criterion) {
let mut group = c.benchmark_group("json_macro");
let sample_data = r#"
{
"users": [
{"id": 1, "name": "Alice", "posts": [{"title": "Hello", "content": "World"}]},
{"id": 2, "name": "Bob", "posts": [{"title": "Test", "content": "Data"}]}
],
"metadata": {
"version": "1.0",
"created": "2023-01-01"
}
}
"#;
group.bench_function("parse_and_process_json", |b| {
b.iter(|| {
let data: serde_json::Value = serde_json::from_str(black_box(sample_data)).unwrap();
// Process the data
let mut user_count = 0;
let mut post_count = 0;
if let Some(users) = data["users"].as_array() {
user_count = users.len();
for user in users {
if let Some(posts) = user["posts"].as_array() {
post_count += posts.len();
}
}
}
(user_count, post_count)
})
});
group.finish();
}
// File I/O macro-benchmark
fn file_io_macro(c: &mut Criterion) {
use std::io::{Write, Read};
use tempfile::NamedTempFile;
let mut group = c.benchmark_group("file_io_macro");
let test_data = "x".repeat(10000); // 10KB of data
group.bench_function("write_read_file", |b| {
b.iter(|| {
// Write data to temporary file
let mut temp_file = NamedTempFile::new().unwrap();
temp_file.write_all(black_box(test_data.as_bytes())).unwrap();
temp_file.flush().unwrap();
// Read data back
let mut file = std::fs::File::open(temp_file.path()).unwrap();
let mut buffer = String::new();
file.read_to_string(&mut buffer).unwrap();
buffer.len()
})
});
group.finish();
}
criterion_group!(
macro_benches,
json_processing_macro,
file_io_macro
);
criterion_main!(micro_benches, macro_benches);
Statistical Analysis and Reporting
Custom Measurement and Analysis
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId, Throughput};
use std::time::Duration;
fn throughput_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("throughput");
for size in [1_000, 10_000, 100_000].iter() {
// Set throughput for bytes processed
group.throughput(Throughput::Bytes(*size as u64));
group.bench_with_input(BenchmarkId::new("process_bytes", size), size, |b, &size| {
let data = vec![0u8; size];
b.iter(|| {
// Simulate processing each byte
let mut sum = 0u64;
for &byte in black_box(&data) {
sum = sum.wrapping_add(byte as u64);
}
sum
});
});
}
group.finish();
}
fn statistical_analysis_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("statistical_analysis");
// Configure measurement parameters
group.sample_size(1000); // Number of samples
group.measurement_time(Duration::from_secs(30)); // Total measurement time
group.warm_up_time(Duration::from_secs(3)); // Warm-up time
group.confidence_level(0.95); // Confidence level
group.significance_level(0.05); // Significance level
group.noise_threshold(0.02); // Noise threshold (2%)
group.bench_function("algorithm_with_variance", |b| {
use rand::Rng;
let mut rng = rand::thread_rng();
b.iter(|| {
// Simulate algorithm with some variance
let work_amount = rng.gen_range(1000..2000);
let mut sum = 0;
for i in 0..work_amount {
sum += black_box(i * i);
}
sum
});
});
group.finish();
}
criterion_group!(
benches,
throughput_benchmark,
statistical_analysis_benchmark
);
criterion_main!(benches);
Custom Measurement
use criterion::{criterion_group, criterion_main, Criterion, measurement::Measurement};
use std::time::{Duration, Instant};
// Custom measurement that tracks both time and memory allocations
struct CustomMeasurement;
impl Measurement for CustomMeasurement {
type Intermediate = (Duration, usize);
type Value = (f64, f64);
fn start(&self) -> Self::Intermediate {
// In a real implementation, you'd hook into the allocator here
let start_time = Instant::now();
let start_allocs = 0; // Placeholder for allocation count
(start_time.elapsed(), start_allocs)
}
fn end(&self, i: Self::Intermediate) -> Self::Value {
let end_time = Instant::now();
let end_allocs = 0; // Placeholder for allocation count
let elapsed = end_time.duration_since(i.0);
let allocs = end_allocs - i.1;
(elapsed.as_nanos() as f64, allocs as f64)
}
fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value {
(v1.0 + v2.0, v1.1 + v2.1)
}
fn zero(&self) -> Self::Value {
(0.0, 0.0)
}
fn to_f64(&self, value: &Self::Value) -> f64 {
value.0 // Return time in nanoseconds
}
fn formatter(&self) -> &dyn criterion::measurement::ValueFormatter {
&criterion::measurement::WallTime
}
}
fn custom_measurement_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("custom_measurement");
group.bench_function("allocation_heavy", |b| {
b.iter(|| {
let mut vecs = Vec::new();
for i in 0..100 {
let mut vec = Vec::with_capacity(i);
for j in 0..i {
vec.push(black_box(j));
}
vecs.push(vec);
}
vecs
});
});
group.finish();
}
criterion_group!(benches, custom_measurement_benchmark);
criterion_main!(benches);
Profiling and Optimization
CPU Profiling Integration
// Cargo.toml additions for profiling
/*
[profile.release]
debug = true # Enable debug symbols for profiling
[profile.bench]
debug = true
*/
use criterion::{black_box, criterion_group, criterion_main, Criterion, profiler::PProfProfiler};
// CPU-intensive algorithm for profiling
fn matrix_multiplication(a: &[Vec<f64>], b: &[Vec<f64>]) -> Vec<Vec<f64>> {
let rows_a = a.len();
let cols_a = a[0].len();
let cols_b = b[0].len();
let mut result = vec![vec![0.0; cols_b]; rows_a];
for i in 0..rows_a {
for j in 0..cols_b {
for k in 0..cols_a {
result[i][j] += a[i][k] * b[k][j];
}
}
}
result
}
fn cpu_intensive_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("cpu_intensive");
// Enable profiling
group.with_profiler(PProfProfiler::new(100, pprof::Output::Flamegraph(None)));
for size in [10, 50, 100].iter() {
let matrix_a: Vec<Vec<f64>> = (0..*size)
.map(|i| (0..*size).map(|j| (i * j) as f64).collect())
.collect();
let matrix_b: Vec<Vec<f64>> = (0..*size)
.map(|i| (0..*size).map(|j| (i + j) as f64).collect())
.collect();
group.bench_with_input(
BenchmarkId::new("matrix_mult", size),
size,
|b, _| {
b.iter(|| matrix_multiplication(black_box(&matrix_a), black_box(&matrix_b)))
},
);
}
group.finish();
}
criterion_group!(benches, cpu_intensive_benchmark);
criterion_main!(benches);
Memory Profiling
# Using valgrind for memory profiling
cargo bench --bench memory_benchmark -- --profile-time=5
# Using heaptrack
heaptrack cargo bench --bench memory_benchmark
# Using massif (valgrind tool)
valgrind --tool=massif cargo bench --bench memory_benchmark
# Generate flamegraphs
cargo install flamegraph
cargo flamegraph --bench cpu_benchmark
Optimization Techniques
use criterion::{black_box, criterion_group, criterion_main, Criterion};
// Compare different optimization approaches
fn optimization_comparison(c: &mut Criterion) {
let mut group = c.benchmark_group("optimization_comparison");
let data: Vec<i32> = (0..10000).collect();
// Naive approach
group.bench_function("naive_sum", |b| {
b.iter(|| {
let mut sum = 0;
for item in black_box(&data) {
sum += item;
}
sum
});
});
// Iterator approach
group.bench_function("iterator_sum", |b| {
b.iter(|| black_box(&data).iter().sum::<i32>())
});
// Parallel processing with rayon
group.bench_function("parallel_sum", |b| {
b.iter(|| {
use rayon::prelude::*;
black_box(&data).par_iter().sum::<i32>()
});
});
// SIMD optimization (requires nightly or specific CPU features)
group.bench_function("simd_sum", |b| {
b.iter(|| {
// Simplified SIMD example
let mut sum = 0;
let chunks = black_box(&data).chunks_exact(4);
let remainder = chunks.remainder();
for chunk in chunks {
sum += chunk[0] + chunk[1] + chunk[2] + chunk[3];
}
for &item in remainder {
sum += item;
}
sum
});
});
group.finish();
}
// Cache-friendly vs cache-unfriendly access patterns
fn cache_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("cache_access");
const SIZE: usize = 1024 * 1024; // 1MB of data
let data: Vec<i32> = (0..SIZE).map(|i| i as i32).collect();
// Sequential access (cache-friendly)
group.bench_function("sequential_access", |b| {
b.iter(|| {
let mut sum = 0;
for i in 0..SIZE {
sum += black_box(data[i]);
}
sum
});
});
// Random access (cache-unfriendly)
group.bench_function("random_access", |b| {
let indices: Vec<usize> = {
use rand::seq::SliceRandom;
let mut indices: Vec<usize> = (0..SIZE).collect();
indices.shuffle(&mut rand::thread_rng());
indices
};
b.iter(|| {
let mut sum = 0;
for &i in &indices {
sum += black_box(data[i]);
}
sum
});
});
// Strided access
group.bench_function("strided_access", |b| {
b.iter(|| {
let mut sum = 0;
let stride = 64; // Every 64th element
for i in (0..SIZE).step_by(stride) {
sum += black_box(data[i]);
}
sum
});
});
group.finish();
}
criterion_group!(
benches,
optimization_comparison,
cache_benchmark
);
criterion_main!(benches);
Real-World Benchmarking Examples
Database Performance
use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};
use std::collections::HashMap;
// Simulate different database-like operations
#[derive(Clone)]
struct InMemoryDb {
data: HashMap<String, String>,
}
impl InMemoryDb {
fn new() -> Self {
InMemoryDb {
data: HashMap::new(),
}
}
fn insert(&mut self, key: String, value: String) {
self.data.insert(key, value);
}
fn get(&self, key: &str) -> Option<&String> {
self.data.get(key)
}
fn scan(&self, prefix: &str) -> Vec<(&String, &String)> {
self.data
.iter()
.filter(|(k, _)| k.starts_with(prefix))
.collect()
}
}
fn database_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("database_operations");
for size in [1_000, 10_000, 100_000].iter() {
let mut db = InMemoryDb::new();
// Pre-populate database
for i in 0..*size {
db.insert(format!("key_{:06}", i), format!("value_{}", i));
}
group.bench_with_input(
BenchmarkId::new("insert", size),
size,
|b, _| {
let mut db = db.clone();
let mut counter = *size;
b.iter(|| {
db.insert(
format!("new_key_{}", counter),
black_box(format!("new_value_{}", counter))
);
counter += 1;
});
},
);
group.bench_with_input(
BenchmarkId::new("lookup", size),
size,
|b, &size| {
b.iter(|| {
let key = format!("key_{:06}", black_box(size / 2));
db.get(&key)
});
},
);
group.bench_with_input(
BenchmarkId::new("scan", size),
size,
|b, _| {
b.iter(|| db.scan(black_box("key_0001")));
},
);
}
group.finish();
}
criterion_group!(benches, database_benchmark);
criterion_main!(benches);
Serialization Performance
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Clone)]
struct TestData {
id: u64,
name: String,
values: Vec<f64>,
metadata: std::collections::HashMap<String, String>,
}
impl TestData {
fn generate(size: usize) -> Self {
let mut metadata = std::collections::HashMap::new();
for i in 0..10 {
metadata.insert(format!("key_{}", i), format!("value_{}", i));
}
TestData {
id: 12345,
name: "Test Object".to_string(),
values: (0..size).map(|i| i as f64 * 0.1).collect(),
metadata,
}
}
}
fn serialization_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("serialization");
let test_data = TestData::generate(1000);
// JSON serialization
group.bench_function("json_serialize", |b| {
b.iter(|| serde_json::to_string(black_box(&test_data)).unwrap())
});
let json_data = serde_json::to_string(&test_data).unwrap();
group.bench_function("json_deserialize", |b| {
b.iter(|| {
let _: TestData = serde_json::from_str(black_box(&json_data)).unwrap();
})
});
// Binary serialization with bincode
group.bench_function("bincode_serialize", |b| {
b.iter(|| bincode::serialize(black_box(&test_data)).unwrap())
});
let binary_data = bincode::serialize(&test_data).unwrap();
group.bench_function("bincode_deserialize", |b| {
b.iter(|| {
let _: TestData = bincode::deserialize(black_box(&binary_data)).unwrap();
})
});
// MessagePack serialization
group.bench_function("msgpack_serialize", |b| {
b.iter(|| rmp_serde::to_vec(black_box(&test_data)).unwrap())
});
let msgpack_data = rmp_serde::to_vec(&test_data).unwrap();
group.bench_function("msgpack_deserialize", |b| {
b.iter(|| {
let _: TestData = rmp_serde::from_slice(black_box(&msgpack_data)).unwrap();
})
});
group.finish();
}
criterion_group!(benches, serialization_benchmark);
criterion_main!(benches);
Continuous Integration and Performance Regression
CI Integration
.github/workflows/benchmark.yml
:
name: Benchmark
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
benchmark:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Cache Cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Run benchmarks
run: cargo bench --bench main_benchmark
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@v1
with:
tool: 'cargo'
output-file-path: target/criterion/main_benchmark/base/estimates.json
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: true
# Show alert when performance degrades
alert-threshold: '200%'
comment-on-alert: true
fail-on-alert: true
Performance Regression Detection
// Cargo.toml
/*
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
iai = "0.1" # Instruction-level benchmarking
*/
use criterion::{black_box, criterion_group, criterion_main, Criterion};
// Critical path benchmarks that should not regress
fn critical_path_benchmarks(c: &mut Criterion) {
let mut group = c.benchmark_group("critical_path");
// Set strict performance thresholds
group.noise_threshold(0.01); // 1% noise threshold
group.confidence_level(0.99); // 99% confidence
// Core algorithm that must maintain performance
group.bench_function("core_algorithm", |b| {
let data: Vec<i32> = (0..10000).collect();
b.iter(|| {
// This is a critical algorithm - any regression should be caught
data.iter()
.filter(|&&x| x % 2 == 0)
.map(|&x| x * x)
.sum::<i32>()
});
});
// Memory allocation patterns that shouldn't regress
group.bench_function("allocation_pattern", |b| {
b.iter(|| {
let mut vecs = Vec::with_capacity(100);
for i in 0..100 {
let mut vec = Vec::with_capacity(i);
for j in 0..i {
vec.push(black_box(j));
}
vecs.push(vec);
}
vecs
});
});
group.finish();
}
// Instruction-level benchmarking for precise measurement
use iai::main;
fn iai_fibonacci(n: u64) -> u64 {
match n {
0 => 1,
1 => 1,
n => iai_fibonacci(n - 1) + iai_fibonacci(n - 2),
}
}
fn iai_fibonacci_20() -> u64 {
iai_fibonacci(20)
}
fn iai_vector_sum() -> i32 {
(0..10000).sum()
}
// IAI benchmarks count CPU instructions and are deterministic
iai::main!(iai_fibonacci_20, iai_vector_sum);
criterion_group!(benches, critical_path_benchmarks);
criterion_main!(benches);
Best Practices for Benchmarking
Avoiding Common Pitfalls
use criterion::{black_box, criterion_group, criterion_main, Criterion};
fn benchmarking_pitfalls(c: &mut Criterion) {
let mut group = c.benchmark_group("pitfalls");
// PITFALL 1: Dead code elimination
// BAD: Compiler might optimize away the computation
group.bench_function("bad_dead_code", |b| {
b.iter(|| {
let result = expensive_computation(42);
// Result is not used - might be optimized away!
});
});
// GOOD: Use black_box to prevent optimization
group.bench_function("good_black_box", |b| {
b.iter(|| {
let result = expensive_computation(black_box(42));
black_box(result); // Prevent dead code elimination
});
});
// PITFALL 2: Constant folding
// BAD: Using constants that compiler can optimize
group.bench_function("bad_constant_folding", |b| {
b.iter(|| {
expensive_computation(42) // Compiler might precompute this
});
});
// GOOD: Use black_box for inputs
group.bench_function("good_prevent_folding", |b| {
b.iter(|| {
expensive_computation(black_box(42))
});
});
// PITFALL 3: Setup overhead in benchmark
// BAD: Including setup time in measurement
group.bench_function("bad_setup_overhead", |b| {
b.iter(|| {
let data = vec![1, 2, 3, 4, 5]; // Setup inside iter
process_data(&data)
});
});
// GOOD: Use iter_with_setup to exclude setup
group.bench_function("good_separate_setup", |b| {
b.iter_with_setup(
|| vec![1, 2, 3, 4, 5], // Setup
|data| process_data(&data) // Actual benchmark
);
});
// PITFALL 4: Incorrect batching
let mut shared_state = Vec::new();
// BAD: State persists between iterations
group.bench_function("bad_shared_state", |b| {
b.iter(|| {
shared_state.push(expensive_computation(black_box(42)));
shared_state.len() // State grows, affecting later measurements
});
});
// GOOD: Reset state between iterations
group.bench_function("good_isolated_state", |b| {
b.iter_with_setup(
|| Vec::new(), // Fresh state for each iteration
|mut state| {
state.push(expensive_computation(black_box(42)));
state.len()
}
);
});
group.finish();
}
fn expensive_computation(n: i32) -> i32 {
// Simulate some work
(0..n).map(|i| i * i).sum()
}
fn process_data(data: &[i32]) -> i32 {
data.iter().sum()
}
// Best practices for benchmark organization
fn well_organized_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("string_operations");
// Configure group settings
group.sample_size(1000);
group.measurement_time(std::time::Duration::from_secs(10));
group.warm_up_time(std::time::Duration::from_secs(2));
// Test with multiple input sizes to understand scaling
for size in [100, 1000, 10000].iter() {
let test_string = "a".repeat(*size);
group.bench_with_input(
BenchmarkId::new("uppercase", size),
size,
|b, _| {
b.iter(|| black_box(&test_string).to_uppercase())
},
);
group.bench_with_input(
BenchmarkId::new("reverse", size),
size,
|b, _| {
b.iter(|| black_box(&test_string).chars().rev().collect::<String>())
},
);
}
group.finish();
}
criterion_group!(
benches,
benchmarking_pitfalls,
well_organized_benchmark
);
criterion_main!(benches);
Benchmarking Guidelines
- Use
black_box
to prevent compiler optimizations - Separate setup from measurement using
iter_with_setup
- Test multiple input sizes to understand algorithmic complexity
- Run benchmarks on dedicated hardware for consistent results
- Use sufficient sample sizes for statistical significance
- Compare against baselines to detect regressions
- Document benchmark assumptions and environment requirements
- Focus on representative workloads rather than synthetic tests
Benchmarking in Rust provides powerful tools for measuring and optimizing performance. Use Criterion.rs for statistical rigor, combine micro and macro benchmarks for comprehensive coverage, and integrate performance testing into your development workflow to catch regressions early.