Smart Pointers
Smart pointers are data structures that act like pointers but have additional metadata and capabilities. They provide automatic memory management, enable multiple ownership scenarios, and allow for safe concurrent programming. Understanding smart pointers is crucial for advanced Rust programming.
Box<T> - Heap Allocation
Box<T>
provides the simplest form of smart pointer - it allocates data on the heap and provides a pointer to that data.
Basic Box Usage
fn main() {
// Allocate an integer on the heap
let boxed_int = Box::new(42);
println!("Boxed value: {}", boxed_int);
// Box automatically implements Deref, so you can use it like a reference
let value = *boxed_int;
println!("Dereferenced value: {}", value);
// Box is automatically deallocated when it goes out of scope
}
// Common use case: Large data that should live on heap
struct LargeData {
data: [u8; 1000000], // 1MB array
}
fn create_large_data() -> Box<LargeData> {
Box::new(LargeData {
data: [0; 1000000],
})
}
Recursive Data Structures
Box<T>
is essential for creating recursive data structures since it has a known size:
#[derive(Debug)]
enum List {
Cons(i32, Box<List>),
Nil,
}
impl List {
fn new() -> List {
List::Nil
}
fn prepend(self, value: i32) -> List {
List::Cons(value, Box::new(self))
}
fn len(&self) -> usize {
match self {
List::Cons(_, tail) => 1 + tail.len(),
List::Nil => 0,
}
}
fn stringify(&self) -> String {
match self {
List::Cons(head, tail) => {
format!("{}, {}", head, tail.stringify())
}
List::Nil => format!("Nil"),
}
}
}
fn main() {
let list = List::new()
.prepend(1)
.prepend(2)
.prepend(3);
println!("List: {}", list.stringify());
println!("Length: {}", list.len());
}
Binary Tree with Box
#[derive(Debug)]
struct TreeNode {
value: i32,
left: Option<Box<TreeNode>>,
right: Option<Box<TreeNode>>,
}
impl TreeNode {
fn new(value: i32) -> Self {
TreeNode {
value,
left: None,
right: None,
}
}
fn insert(&mut self, value: i32) {
if value < self.value {
match &mut self.left {
Some(left) => left.insert(value),
None => self.left = Some(Box::new(TreeNode::new(value))),
}
} else {
match &mut self.right {
Some(right) => right.insert(value),
None => self.right = Some(Box::new(TreeNode::new(value))),
}
}
}
fn search(&self, value: i32) -> bool {
match value.cmp(&self.value) {
std::cmp::Ordering::Equal => true,
std::cmp::Ordering::Less => {
self.left.as_ref().map_or(false, |left| left.search(value))
}
std::cmp::Ordering::Greater => {
self.right.as_ref().map_or(false, |right| right.search(value))
}
}
}
fn inorder_traversal(&self) -> Vec<i32> {
let mut result = Vec::new();
self.inorder_helper(&mut result);
result
}
fn inorder_helper(&self, result: &mut Vec<i32>) {
if let Some(left) = &self.left {
left.inorder_helper(result);
}
result.push(self.value);
if let Some(right) = &self.right {
right.inorder_helper(result);
}
}
}
fn main() {
let mut tree = TreeNode::new(5);
tree.insert(3);
tree.insert(7);
tree.insert(1);
tree.insert(9);
println!("Tree: {:?}", tree);
println!("Inorder traversal: {:?}", tree.inorder_traversal());
println!("Contains 7: {}", tree.search(7));
println!("Contains 4: {}", tree.search(4));
}
Rc<T> - Reference Counting
Rc<T>
(Reference Counted) enables multiple owners of the same data. It keeps track of references and deallocates when the count reaches zero.
Basic Rc Usage
use std::rc::Rc;
fn main() {
// Create an Rc
let data = Rc::new(String::from("Hello, Rc!"));
println!("Reference count: {}", Rc::strong_count(&data));
// Clone the Rc (increases reference count)
let data2 = Rc::clone(&data);
println!("Reference count after clone: {}", Rc::strong_count(&data));
{
let data3 = Rc::clone(&data);
println!("Reference count in inner scope: {}", Rc::strong_count(&data));
} // data3 dropped here
println!("Reference count after inner scope: {}", Rc::strong_count(&data));
// Both data and data2 can access the value
println!("Data from data: {}", *data);
println!("Data from data2: {}", *data2);
} // Reference count becomes 0, memory is freed
Shared Ownership in Data Structures
use std::rc::Rc;
#[derive(Debug)]
struct Node {
value: i32,
children: Vec<Rc<Node>>,
}
impl Node {
fn new(value: i32) -> Rc<Self> {
Rc::new(Node {
value,
children: Vec::new(),
})
}
}
fn main() {
let leaf1 = Node::new(1);
let leaf2 = Node::new(2);
let branch = Rc::new(Node {
value: 3,
children: vec![Rc::clone(&leaf1), Rc::clone(&leaf2)],
});
let root = Rc::new(Node {
value: 0,
children: vec![Rc::clone(&leaf1), Rc::clone(&branch)],
});
println!("Leaf1 reference count: {}", Rc::strong_count(&leaf1));
println!("Branch reference count: {}", Rc::strong_count(&branch));
println!("Tree structure: {:#?}", root);
}
RefCell<T> - Interior Mutability
RefCell<T>
provides interior mutability - the ability to mutate data even when there are immutable references to it. It enforces borrowing rules at runtime.
Basic RefCell Usage
use std::cell::RefCell;
fn main() {
let data = RefCell::new(42);
// Immutable borrow
{
let borrowed = data.borrow();
println!("Immutable borrow: {}", *borrowed);
// Can have multiple immutable borrows
let borrowed2 = data.borrow();
println!("Another immutable borrow: {}", *borrowed2);
} // Immutable borrows dropped here
// Mutable borrow
{
let mut borrowed_mut = data.borrow_mut();
*borrowed_mut = 100;
println!("Modified through mutable borrow: {}", *borrowed_mut);
// Cannot have another borrow while mutable borrow exists
}
println!("Final value: {}", *data.borrow());
}
Mock Object Pattern
use std::cell::RefCell;
trait Messenger {
fn send(&self, message: &str);
}
struct MockMessenger {
sent_messages: RefCell<Vec<String>>,
}
impl MockMessenger {
fn new() -> MockMessenger {
MockMessenger {
sent_messages: RefCell::new(vec![]),
}
}
}
impl Messenger for MockMessenger {
fn send(&self, message: &str) {
// Even though self is immutable, we can mutate the interior
self.sent_messages.borrow_mut().push(String::from(message));
}
}
struct LimitTracker<'a> {
messenger: &'a dyn Messenger,
value: usize,
max: usize,
}
impl<'a> LimitTracker<'a> {
fn new(messenger: &'a dyn Messenger, max: usize) -> LimitTracker<'a> {
LimitTracker {
messenger,
value: 0,
max,
}
}
fn set_value(&mut self, value: usize) {
self.value = value;
let percentage_of_max = self.value as f64 / self.max as f64;
if percentage_of_max >= 1.0 {
self.messenger.send("Error: You are over your quota!");
} else if percentage_of_max >= 0.9 {
self.messenger.send("Urgent warning: You've used up over 90% of your quota!");
} else if percentage_of_max >= 0.75 {
self.messenger.send("Warning: You've used up over 75% of your quota");
}
}
}
fn main() {
let mock_messenger = MockMessenger::new();
let mut limit_tracker = LimitTracker::new(&mock_messenger, 100);
limit_tracker.set_value(80);
limit_tracker.set_value(95);
limit_tracker.set_value(105);
println!("Messages sent: {:#?}", mock_messenger.sent_messages.borrow());
}
Rc<RefCell<T>> - Multiple Owners with Mutability
Combining Rc<T>
and RefCell<T>
allows multiple owners of mutable data:
use std::cell::RefCell;
use std::rc::Rc;
#[derive(Debug)]
struct Node {
value: i32,
children: RefCell<Vec<Rc<Node>>>,
parent: RefCell<Option<Rc<Node>>>,
}
impl Node {
fn new(value: i32) -> Rc<Self> {
Rc::new(Node {
value,
children: RefCell::new(Vec::new()),
parent: RefCell::new(None),
})
}
fn add_child(parent: &Rc<Node>, child: Rc<Node>) {
// Set parent of child
*child.parent.borrow_mut() = Some(Rc::clone(parent));
// Add child to parent's children
parent.children.borrow_mut().push(child);
}
fn print_tree(node: &Rc<Node>, indent: usize) {
println!("{:indent$}{}", "", node.value, indent = indent);
for child in node.children.borrow().iter() {
Self::print_tree(child, indent + 2);
}
}
}
fn main() {
let root = Node::new(1);
let child1 = Node::new(2);
let child2 = Node::new(3);
let grandchild = Node::new(4);
Node::add_child(&root, child1.clone());
Node::add_child(&root, child2.clone());
Node::add_child(&child1, grandchild.clone());
println!("Tree structure:");
Node::print_tree(&root, 0);
println!("Grandchild's parent: {}",
grandchild.parent.borrow().as_ref().unwrap().value);
}
Arc<T> - Atomic Reference Counting
Arc<T>
(Atomically Reference Counted) is the thread-safe version of Rc<T>
, enabling shared ownership across threads:
use std::sync::Arc;
use std::thread;
fn main() {
let data = Arc::new(vec![1, 2, 3, 4, 5]);
let mut handles = vec![];
for i in 0..3 {
let data_clone = Arc::clone(&data);
let handle = thread::spawn(move || {
println!("Thread {}: {:?}", i, data_clone);
data_clone.iter().sum::<i32>()
});
handles.push(handle);
}
for handle in handles {
let sum = handle.join().unwrap();
println!("Sum from thread: {}", sum);
}
println!("Original data: {:?}", data);
}
Mutex<T> - Mutual Exclusion
Mutex<T>
provides thread-safe mutable access to data:
use std::sync::{Arc, Mutex};
use std::thread;
fn main() {
let counter = Arc::new(Mutex::new(0));
let mut handles = vec![];
for _ in 0..10 {
let counter_clone = Arc::clone(&counter);
let handle = thread::spawn(move || {
let mut num = counter_clone.lock().unwrap();
*num += 1;
});
handles.push(handle);
}
for handle in handles {
handle.join().unwrap();
}
println!("Final counter value: {}", *counter.lock().unwrap());
}
Producer-Consumer with Arc<Mutex<T>>
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::Duration;
use std::collections::VecDeque;
fn main() {
let buffer = Arc::new(Mutex::new(VecDeque::new()));
let buffer_producer = Arc::clone(&buffer);
let buffer_consumer = Arc::clone(&buffer);
// Producer thread
let producer = thread::spawn(move || {
for i in 0..5 {
thread::sleep(Duration::from_millis(100));
let mut queue = buffer_producer.lock().unwrap();
queue.push_back(i);
println!("Produced: {}", i);
}
});
// Consumer thread
let consumer = thread::spawn(move || {
loop {
thread::sleep(Duration::from_millis(150));
let mut queue = buffer_consumer.lock().unwrap();
if let Some(item) = queue.pop_front() {
println!("Consumed: {}", item);
} else {
println!("Buffer empty");
}
// Simple termination condition
if queue.is_empty() {
thread::sleep(Duration::from_millis(200));
if queue.is_empty() {
break;
}
}
}
});
producer.join().unwrap();
consumer.join().unwrap();
println!("Remaining items: {:?}", *buffer.lock().unwrap());
}
Weak<T> - Breaking Reference Cycles
Weak<T>
provides a non-owning reference that doesn't affect the reference count, helping to break reference cycles:
use std::cell::RefCell;
use std::rc::{Rc, Weak};
#[derive(Debug)]
struct Node {
value: i32,
children: RefCell<Vec<Rc<Node>>>,
parent: RefCell<Weak<Node>>, // Weak reference to avoid cycles
}
impl Node {
fn new(value: i32) -> Rc<Self> {
Rc::new(Node {
value,
children: RefCell::new(Vec::new()),
parent: RefCell::new(Weak::new()),
})
}
fn add_child(parent: &Rc<Node>, child: &Rc<Node>) {
// Set weak reference to parent
*child.parent.borrow_mut() = Rc::downgrade(parent);
// Add strong reference to child
parent.children.borrow_mut().push(Rc::clone(child));
}
fn get_parent(&self) -> Option<Rc<Node>> {
self.parent.borrow().upgrade()
}
}
impl Drop for Node {
fn drop(&mut self) {
println!("Dropping node with value: {}", self.value);
}
}
fn main() {
let parent = Node::new(1);
let child1 = Node::new(2);
let child2 = Node::new(3);
Node::add_child(&parent, &child1);
Node::add_child(&parent, &child2);
println!("Parent strong count: {}", Rc::strong_count(&parent));
println!("Child1 strong count: {}", Rc::strong_count(&child1));
// Access parent through child
if let Some(parent_ref) = child1.get_parent() {
println!("Child1's parent value: {}", parent_ref.value);
}
// No reference cycles - all nodes will be properly dropped
}
Advanced Smart Pointer Patterns
Custom Smart Pointer
use std::ops::Deref;
struct MyBox<T>(T);
impl<T> MyBox<T> {
fn new(x: T) -> MyBox<T> {
MyBox(x)
}
}
impl<T> Deref for MyBox<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<T> Drop for MyBox<T> {
fn drop(&mut self) {
println!("Dropping MyBox");
}
}
fn hello(name: &str) {
println!("Hello, {}!", name);
}
fn main() {
let m = MyBox::new(String::from("Rust"));
// Deref coercion: MyBox<String> -> &String -> &str
hello(&m);
// Explicit dereferencing
println!("Value: {}", *m);
}
Shared State Machine
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::Duration;
#[derive(Debug, Clone)]
enum State {
Idle,
Processing,
Complete,
Error(String),
}
struct StateMachine {
state: Arc<Mutex<State>>,
}
impl StateMachine {
fn new() -> Self {
StateMachine {
state: Arc::new(Mutex::new(State::Idle)),
}
}
fn start_processing(&self) {
let state = Arc::clone(&self.state);
thread::spawn(move || {
// Transition to processing
{
let mut current_state = state.lock().unwrap();
*current_state = State::Processing;
println!("Started processing");
}
// Simulate work
thread::sleep(Duration::from_millis(1000));
// Transition to complete
{
let mut current_state = state.lock().unwrap();
*current_state = State::Complete;
println!("Processing complete");
}
});
}
fn get_state(&self) -> State {
self.state.lock().unwrap().clone()
}
fn wait_for_completion(&self) {
loop {
match self.get_state() {
State::Complete | State::Error(_) => break,
_ => {
thread::sleep(Duration::from_millis(100));
}
}
}
}
}
fn main() {
let state_machine = StateMachine::new();
println!("Initial state: {:?}", state_machine.get_state());
state_machine.start_processing();
// Monitor state changes
let monitor_state = Arc::clone(&state_machine.state);
let monitor_handle = thread::spawn(move || {
let mut last_state = State::Idle;
loop {
let current_state = monitor_state.lock().unwrap().clone();
if !matches!(current_state, State::Idle) {
if !std::mem::discriminant(¤t_state) == std::mem::discriminant(&last_state) {
println!("State changed to: {:?}", current_state);
last_state = current_state.clone();
}
if matches!(current_state, State::Complete | State::Error(_)) {
break;
}
}
thread::sleep(Duration::from_millis(50));
}
});
state_machine.wait_for_completion();
monitor_handle.join().unwrap();
println!("Final state: {:?}", state_machine.get_state());
}
Performance Considerations
When to Use Each Smart Pointer
// Box<T>: Single ownership, heap allocation
fn use_box() -> Box<Vec<i32>> {
// Good for large data that needs heap allocation
Box::new(vec![1; 1000000])
}
// Rc<T>: Multiple ownership, single-threaded
fn use_rc() -> (std::rc::Rc<String>, std::rc::Rc<String>) {
let data = std::rc::Rc::new(String::from("shared data"));
(std::rc::Rc::clone(&data), std::rc::Rc::clone(&data))
}
// Arc<T>: Multiple ownership, multi-threaded
fn use_arc() -> (std::sync::Arc<String>, std::sync::Arc<String>) {
let data = std::sync::Arc::new(String::from("shared data"));
(std::sync::Arc::clone(&data), std::sync::Arc::clone(&data))
}
// RefCell<T>: Interior mutability, single-threaded
fn use_refcell() -> std::cell::RefCell<i32> {
std::cell::RefCell::new(42)
}
// Mutex<T>: Interior mutability, multi-threaded
fn use_mutex() -> std::sync::Mutex<i32> {
std::sync::Mutex::new(42)
}
Best Practices
1. Choose the Right Smart Pointer
// Good: Use Box for heap allocation
struct LargeStruct {
data: [u8; 1000000],
}
fn create_large() -> Box<LargeStruct> {
Box::new(LargeStruct { data: [0; 1000000] })
}
// Good: Use Rc for shared ownership in single-threaded code
use std::rc::Rc;
fn share_data() -> (Rc<String>, Rc<String>) {
let data = Rc::new(String::from("shared"));
(Rc::clone(&data), Rc::clone(&data))
}
// Good: Use Arc for shared ownership across threads
use std::sync::Arc;
fn share_across_threads() -> (Arc<String>, Arc<String>) {
let data = Arc::new(String::from("shared"));
(Arc::clone(&data), Arc::clone(&data))
}
2. Avoid Reference Cycles
use std::rc::{Rc, Weak};
use std::cell::RefCell;
// Good: Use Weak references to break cycles
struct Parent {
children: RefCell<Vec<Rc<Child>>>,
}
struct Child {
parent: RefCell<Weak<Parent>>, // Weak reference prevents cycle
}
// Avoid: Strong references in both directions create cycles
/*
struct BadParent {
children: RefCell<Vec<Rc<BadChild>>>,
}
struct BadChild {
parent: RefCell<Rc<BadParent>>, // Strong reference creates cycle
}
*/
3. Use Clone Sparingly with Rc/Arc
use std::rc::Rc;
// Good: Clone only when needed
fn process_shared_data(data: &Rc<String>) {
println!("Processing: {}", data);
// Don't clone unless you need to store it
}
// Avoid: Unnecessary cloning
fn avoid_unnecessary_clone(data: &Rc<String>) -> Rc<String> {
let cloned = Rc::clone(data); // Unnecessary if we're just reading
println!("Processing: {}", cloned);
cloned // Only clone if returning or storing
}
4. Handle Mutex Poisoning
use std::sync::{Arc, Mutex};
fn safe_mutex_access(data: &Arc<Mutex<i32>>) -> Result<i32, String> {
match data.lock() {
Ok(guard) => Ok(*guard),
Err(poisoned) => {
// Recover from poisoned mutex if possible
let guard = poisoned.into_inner();
Err(format!("Mutex was poisoned, recovered value: {}", *guard))
}
}
}
5. Use try_lock When Appropriate
use std::sync::{Arc, Mutex};
use std::time::Duration;
fn non_blocking_access(data: &Arc<Mutex<i32>>) -> Option<i32> {
match data.try_lock() {
Ok(guard) => Some(*guard),
Err(_) => {
println!("Could not acquire lock, continuing...");
None
}
}
}
fn timeout_access(data: &Arc<Mutex<i32>>) -> Option<i32> {
// Note: try_lock_for is not available in std, but shown for concept
// You would need to use external crates like parking_lot for this
match data.try_lock() {
Ok(guard) => Some(*guard),
Err(_) => None,
}
}
Smart pointers are essential tools for memory management and concurrent programming in Rust. They enable patterns that would be difficult or impossible with simple references, while maintaining Rust's safety guarantees. Choose the appropriate smart pointer based on your ownership and thread-safety requirements.