Unsafe Rust | WebReference

Unsafe Rust allows you to bypass Rust's safety guarantees when necessary for systems programming, performance optimization, or interfacing with other languages. Understanding unsafe Rust is crucial for advanced systems programming while maintaining safety through careful reasoning.

Understanding Unsafe Rust

Unsafe Rust gives you access to four additional capabilities:

Dereference raw pointers
Call unsafe functions or methods
Access or modify mutable static variables
Implement unsafe traits

When to Use Unsafe

// Good reasons to use unsafe:
// 1. Implementing low-level data structures
// 2. Interfacing with C libraries
// 3. Performance-critical code with proven safety
// 4. Hardware or OS interaction

// Example: Implementing a custom allocator
use std::alloc::{alloc, dealloc, Layout};

struct CustomVec<T> {
    ptr: *mut T,
    len: usize,
    capacity: usize,
}

impl<T> CustomVec<T> {
    fn new() -> Self {
        CustomVec {
            ptr: std::ptr::null_mut(),
            len: 0,
            capacity: 0,
        }
    }
    
    fn push(&mut self, item: T) {
        if self.len == self.capacity {
            self.grow();
        }
        
        unsafe {
            // SAFETY: We've ensured capacity > len, so this write is safe
            std::ptr::write(self.ptr.add(self.len), item);
        }
        self.len += 1;
    }
    
    fn pop(&mut self) -> Option<T> {
        if self.len == 0 {
            None
        } else {
            self.len -= 1;
            unsafe {
                // SAFETY: We've decremented len, so this read is safe
                Some(std::ptr::read(self.ptr.add(self.len)))
            }
        }
    }
    
    fn grow(&mut self) {
        let new_capacity = if self.capacity == 0 { 1 } else { self.capacity * 2 };
        let new_layout = Layout::array::<T>(new_capacity).unwrap();
        
        let new_ptr = if self.capacity == 0 {
            unsafe { alloc(new_layout) as *mut T }
        } else {
            let old_layout = Layout::array::<T>(self.capacity).unwrap();
            unsafe {
                std::alloc::realloc(
                    self.ptr as *mut u8,
                    old_layout,
                    new_layout.size(),
                ) as *mut T
            }
        };
        
        if new_ptr.is_null() {
            panic!("Allocation failed");
        }
        
        self.ptr = new_ptr;
        self.capacity = new_capacity;
    }
}

impl<T> Drop for CustomVec<T> {
    fn drop(&mut self) {
        if self.capacity != 0 {
            // Drop all elements first
            while self.len > 0 {
                self.pop();
            }
            
            // Deallocate memory
            let layout = Layout::array::<T>(self.capacity).unwrap();
            unsafe {
                dealloc(self.ptr as *mut u8, layout);
            }
        }
    }
}

fn main() {
    let mut vec = CustomVec::new();
    vec.push(1);
    vec.push(2);
    vec.push(3);
    
    while let Some(item) = vec.pop() {
        println!("Popped: {}", item);
    }
}

Raw Pointers

Raw pointers bypass Rust's borrowing rules and safety guarantees.

Creating and Using Raw Pointers

fn main() {
    let mut num = 5;
    
    // Create raw pointers from references
    let r1 = &num as *const i32;      // Immutable raw pointer
    let r2 = &mut num as *mut i32;    // Mutable raw pointer
    
    // Create raw pointers from addresses
    let address = 0x012345usize;
    let r3 = address as *const i32;
    
    println!("Raw pointer r1: {:p}", r1);
    println!("Raw pointer r2: {:p}", r2);
    
    // Dereferencing requires unsafe
    unsafe {
        println!("r1 points to: {}", *r1);
        *r2 = 10;
        println!("r2 now points to: {}", *r2);
    }
    
    println!("num is now: {}", num);
}

Pointer Arithmetic

fn pointer_arithmetic_example() {
    let arr = [1, 2, 3, 4, 5];
    let mut ptr = arr.as_ptr();
    
    unsafe {
        for i in 0..arr.len() {
            println!("Element {}: {}", i, *ptr);
            ptr = ptr.add(1); // Move to next element
        }
    }
    
    // Reset pointer and iterate backwards
    ptr = arr.as_ptr().wrapping_add(arr.len() - 1);
    unsafe {
        for i in (0..arr.len()).rev() {
            println!("Backwards {}: {}", i, *ptr);
            if i > 0 {
                ptr = ptr.sub(1);
            }
        }
    }
}

fn main() {
    pointer_arithmetic_example();
}

Safe Wrapper for Unsafe Operations

struct SafeSlice<'a, T> {
    ptr: *const T,
    len: usize,
    _marker: std::marker::PhantomData<&'a T>,
}

impl<'a, T> SafeSlice<'a, T> {
    fn new(slice: &'a [T]) -> Self {
        SafeSlice {
            ptr: slice.as_ptr(),
            len: slice.len(),
            _marker: std::marker::PhantomData,
        }
    }
    
    fn get(&self, index: usize) -> Option<&'a T> {
        if index < self.len {
            unsafe {
                // SAFETY: We've bounds-checked the index
                Some(&*self.ptr.add(index))
            }
        } else {
            None
        }
    }
    
    fn len(&self) -> usize {
        self.len
    }
    
    fn iter(&self) -> SafeSliceIter<'a, T> {
        SafeSliceIter {
            ptr: self.ptr,
            end: unsafe { self.ptr.add(self.len) },
            _marker: std::marker::PhantomData,
        }
    }
}

struct SafeSliceIter<'a, T> {
    ptr: *const T,
    end: *const T,
    _marker: std::marker::PhantomData<&'a T>,
}

impl<'a, T> Iterator for SafeSliceIter<'a, T> {
    type Item = &'a T;
    
    fn next(&mut self) -> Option<Self::Item> {
        if self.ptr == self.end {
            None
        } else {
            unsafe {
                let current = &*self.ptr;
                self.ptr = self.ptr.add(1);
                Some(current)
            }
        }
    }
}

fn main() {
    let data = [1, 2, 3, 4, 5];
    let safe_slice = SafeSlice::new(&data);
    
    // Safe access through our wrapper
    println!("Element 2: {:?}", safe_slice.get(2));
    println!("Element 10: {:?}", safe_slice.get(10));
    
    // Safe iteration
    for (i, value) in safe_slice.iter().enumerate() {
        println!("Index {}: {}", i, value);
    }
}

Unsafe Functions and Methods

Defining Unsafe Functions

unsafe fn dangerous_function() {
    // This function can only be called from unsafe blocks
    println!("This is a dangerous function!");
}

// Safe wrapper around unsafe operation
fn safe_split_at_mut<T>(slice: &mut [T], mid: usize) -> (&mut [T], &mut [T]) {
    let len = slice.len();
    let ptr = slice.as_mut_ptr();
    
    assert!(mid <= len, "Index {} out of bounds for slice of length {}", mid, len);
    
    unsafe {
        (
            std::slice::from_raw_parts_mut(ptr, mid),
            std::slice::from_raw_parts_mut(ptr.add(mid), len - mid),
        )
    }
}

fn main() {
    unsafe {
        dangerous_function();
    }
    
    let mut arr = [1, 2, 3, 4, 5, 6];
    let (left, right) = safe_split_at_mut(&mut arr, 3);
    println!("Left: {:?}", left);
    println!("Right: {:?}", right);
}

Unsafe Methods in Implementations

struct Buffer {
    data: Vec<u8>,
}

impl Buffer {
    fn new(size: usize) -> Self {
        Buffer {
            data: vec![0; size],
        }
    }
    
    // Safe method
    fn get(&self, index: usize) -> Option<u8> {
        self.data.get(index).copied()
    }
    
    // Unsafe method for performance-critical code
    unsafe fn get_unchecked(&self, index: usize) -> u8 {
        // SAFETY: Caller must ensure index is within bounds
        *self.data.get_unchecked(index)
    }
    
    // Safe wrapper that documents preconditions
    fn get_fast(&self, index: usize) -> u8 {
        debug_assert!(index < self.data.len(), "Index out of bounds");
        unsafe {
            // SAFETY: We've asserted bounds in debug mode
            self.get_unchecked(index)
        }
    }
}

fn main() {
    let buffer = Buffer::new(10);
    
    // Safe access
    println!("Safe get: {:?}", buffer.get(5));
    
    // Unsafe access (caller's responsibility to ensure safety)
    unsafe {
        println!("Unsafe get: {}", buffer.get_unchecked(5));
    }
    
    // Fast access with debug assertions
    println!("Fast get: {}", buffer.get_fast(5));
}

Unsafe Traits

Some traits are unsafe to implement because they have invariants that the compiler cannot verify.

Send and Sync Traits

use std::cell::UnsafeCell;

// Example: A thread-safe cell for Copy types
struct ThreadSafeCell<T> {
    value: UnsafeCell<T>,
}

impl<T> ThreadSafeCell<T> {
    fn new(value: T) -> Self {
        ThreadSafeCell {
            value: UnsafeCell::new(value),
        }
    }
    
    fn get(&self) -> T 
    where 
        T: Copy,
    {
        unsafe {
            // SAFETY: We only support Copy types, so reading is always safe
            *self.value.get()
        }
    }
    
    fn set(&self, value: T) {
        unsafe {
            // SAFETY: We're replacing the entire value atomically
            *self.value.get() = value;
        }
    }
}

// SAFETY: ThreadSafeCell<T> can be sent between threads if T can be sent
unsafe impl<T: Send> Send for ThreadSafeCell<T> {}

// SAFETY: ThreadSafeCell<T> can be shared between threads for Copy types
// because Copy types can be safely read concurrently
unsafe impl<T: Copy> Sync for ThreadSafeCell<T> {}

fn main() {
    use std::sync::Arc;
    use std::thread;
    
    let cell = Arc::new(ThreadSafeCell::new(42));
    let mut handles = vec![];
    
    for i in 0..5 {
        let cell_clone = Arc::clone(&cell);
        let handle = thread::spawn(move || {
            cell_clone.set(i);
            println!("Thread {} set value to {}", i, cell_clone.get());
        });
        handles.push(handle);
    }
    
    for handle in handles {
        handle.join().unwrap();
    }
}

Custom Unsafe Trait

// Unsafe trait for types that can be safely zeroed
unsafe trait Zeroable {
    fn zeroed() -> Self;
}

// SAFETY: Primitive integers can be safely zeroed
unsafe impl Zeroable for i32 {
    fn zeroed() -> Self {
        0
    }
}

unsafe impl Zeroable for u32 {
    fn zeroed() -> Self {
        0
    }
}

// SAFETY: Arrays of Zeroable types can be safely zeroed
unsafe impl<T: Zeroable, const N: usize> Zeroable for [T; N] {
    fn zeroed() -> Self {
        // This is safe because T implements Zeroable
        unsafe { std::mem::zeroed() }
    }
}

// Note: We should NOT implement Zeroable for types with
// validity invariants like bool, char, or references

fn create_zeroed<T: Zeroable>() -> T {
    T::zeroed()
}

fn main() {
    let zero_int: i32 = create_zeroed();
    let zero_array: [u32; 5] = create_zeroed();
    
    println!("Zeroed int: {}", zero_int);
    println!("Zeroed array: {:?}", zero_array);
}

Memory Manipulation

Working with Uninitialized Memory

use std::mem::{self, MaybeUninit};

fn create_initialized_array() -> [i32; 1000] {
    // Efficient initialization of large arrays
    let mut arr: [MaybeUninit<i32>; 1000] = unsafe {
        MaybeUninit::uninit().assume_init()
    };
    
    // Initialize each element
    for (i, elem) in arr.iter_mut().enumerate() {
        elem.write(i as i32);
    }
    
    // Convert to initialized array
    unsafe {
        // SAFETY: We've initialized all elements above
        mem::transmute::<[MaybeUninit<i32>; 1000], [i32; 1000]>(arr)
    }
}

fn read_values_efficiently<T: Copy>(slice: &[T]) -> Vec<T> {
    let mut vec = Vec::with_capacity(slice.len());
    
    unsafe {
        // Copy data directly into uninitialized memory
        std::ptr::copy_nonoverlapping(
            slice.as_ptr(),
            vec.as_mut_ptr(),
            slice.len(),
        );
        
        // Set length after copying data
        vec.set_len(slice.len());
    }
    
    vec
}

fn main() {
    let arr = create_initialized_array();
    println!("First 10 elements: {:?}", &arr[..10]);
    
    let source = [1, 2, 3, 4, 5];
    let copied = read_values_efficiently(&source);
    println!("Copied: {:?}", copied);
}

Memory Layout and Alignment

use std::alloc::{alloc, dealloc, Layout};
use std::mem;

#[repr(C)]
struct PackedData {
    flag: u8,
    value: u64,
    count: u32,
}

fn demonstrate_memory_layout() {
    println!("Size of PackedData: {}", mem::size_of::<PackedData>());
    println!("Alignment of PackedData: {}", mem::align_of::<PackedData>());
    
    // Manual memory allocation
    let layout = Layout::new::<PackedData>();
    let ptr = unsafe { alloc(layout) as *mut PackedData };
    
    if ptr.is_null() {
        panic!("Allocation failed");
    }
    
    unsafe {
        // Initialize the allocated memory
        (*ptr).flag = 1;
        (*ptr).value = 0x123456789ABCDEF0;
        (*ptr).count = 42;
        
        println!("Flag: {}", (*ptr).flag);
        println!("Value: 0x{:X}", (*ptr).value);
        println!("Count: {}", (*ptr).count);
        
        // Clean up
        dealloc(ptr as *mut u8, layout);
    }
}

fn main() {
    demonstrate_memory_layout();
}

Safety Guidelines and Best Practices

1. Document Safety Invariants

/// A ring buffer implementation
/// 
/// # Safety Invariants
/// - `read_pos` and `write_pos` are always less than `capacity`
/// - The number of elements is `(write_pos + capacity - read_pos) % capacity`
/// - Valid elements are at indices `read_pos..write_pos` (wrapping)
struct RingBuffer<T> {
    data: *mut T,
    capacity: usize,
    read_pos: usize,
    write_pos: usize,
}

impl<T> RingBuffer<T> {
    fn new(capacity: usize) -> Self {
        assert!(capacity > 0, "Capacity must be greater than 0");
        
        let layout = Layout::array::<T>(capacity).unwrap();
        let data = unsafe { alloc(layout) as *mut T };
        
        if data.is_null() {
            panic!("Allocation failed");
        }
        
        RingBuffer {
            data,
            capacity,
            read_pos: 0,
            write_pos: 0,
        }
    }
    
    /// Push an element to the buffer
    /// Returns the old element if the buffer was full
    fn push(&mut self, item: T) -> Option<T> {
        let next_write = (self.write_pos + 1) % self.capacity;
        
        if next_write == self.read_pos {
            // Buffer is full, replace the oldest element
            unsafe {
                let old_item = std::ptr::read(self.data.add(self.write_pos));
                std::ptr::write(self.data.add(self.write_pos), item);
                self.write_pos = next_write;
                self.read_pos = (self.read_pos + 1) % self.capacity;
                Some(old_item)
            }
        } else {
            // Buffer has space
            unsafe {
                std::ptr::write(self.data.add(self.write_pos), item);
                self.write_pos = next_write;
                None
            }
        }
    }
    
    fn pop(&mut self) -> Option<T> {
        if self.read_pos == self.write_pos {
            None // Buffer is empty
        } else {
            unsafe {
                let item = std::ptr::read(self.data.add(self.read_pos));
                self.read_pos = (self.read_pos + 1) % self.capacity;
                Some(item)
            }
        }
    }
}

impl<T> Drop for RingBuffer<T> {
    fn drop(&mut self) {
        // Drop all remaining elements
        while self.pop().is_some() {}
        
        // Deallocate memory
        let layout = Layout::array::<T>(self.capacity).unwrap();
        unsafe {
            dealloc(self.data as *mut u8, layout);
        }
    }
}

2. Use Abstractions to Limit Unsafe Scope

// Safe abstraction over unsafe operations
pub struct GrowableArray<T> {
    ptr: *mut T,
    len: usize,
    capacity: usize,
}

impl<T> GrowableArray<T> {
    pub fn new() -> Self {
        GrowableArray {
            ptr: std::ptr::null_mut(),
            len: 0,
            capacity: 0,
        }
    }
    
    pub fn push(&mut self, item: T) {
        if self.len == self.capacity {
            self.grow();
        }
        
        unsafe {
            std::ptr::write(self.ptr.add(self.len), item);
        }
        self.len += 1;
    }
    
    pub fn get(&self, index: usize) -> Option<&T> {
        if index < self.len {
            unsafe {
                Some(&*self.ptr.add(index))
            }
        } else {
            None
        }
    }
    
    pub fn len(&self) -> usize {
        self.len
    }
    
    // All unsafe code is encapsulated in private methods
    fn grow(&mut self) {
        let new_capacity = if self.capacity == 0 { 1 } else { self.capacity * 2 };
        let new_layout = Layout::array::<T>(new_capacity).unwrap();
        
        let new_ptr = if self.capacity == 0 {
            unsafe { alloc(new_layout) as *mut T }
        } else {
            let old_layout = Layout::array::<T>(self.capacity).unwrap();
            unsafe {
                std::alloc::realloc(
                    self.ptr as *mut u8,
                    old_layout,
                    new_layout.size(),
                ) as *mut T
            }
        };
        
        if new_ptr.is_null() {
            panic!("Allocation failed");
        }
        
        self.ptr = new_ptr;
        self.capacity = new_capacity;
    }
}

// Safe public interface - users never need to write unsafe code
fn main() {
    let mut arr = GrowableArray::new();
    
    for i in 0..10 {
        arr.push(i);
    }
    
    for i in 0..arr.len() {
        if let Some(value) = arr.get(i) {
            println!("arr[{}] = {}", i, value);
        }
    }
}

3. Test Unsafe Code Thoroughly

#[cfg(test)]
mod tests {
    use super::*;
    
    #[test]
    fn test_custom_vec_basic_operations() {
        let mut vec = CustomVec::new();
        
        // Test empty
        assert_eq!(vec.pop(), None);
        
        // Test push and pop
        vec.push(1);
        vec.push(2);
        vec.push(3);
        
        assert_eq!(vec.pop(), Some(3));
        assert_eq!(vec.pop(), Some(2));
        assert_eq!(vec.pop(), Some(1));
        assert_eq!(vec.pop(), None);
    }
    
    #[test]
    fn test_growth() {
        let mut vec = CustomVec::new();
        
        // Push more than initial capacity
        for i in 0..100 {
            vec.push(i);
        }
        
        // Verify all elements
        for i in (0..100).rev() {
            assert_eq!(vec.pop(), Some(i));
        }
    }
    
    // Use tools like Miri for additional safety testing
    // cargo +nightly miri test
}

Common Unsafe Patterns

1. Transmutation (Use Sparingly)

fn transmute_example() {
    // Safe transmutation between compatible types
    let bytes: [u8; 4] = [0x12, 0x34, 0x56, 0x78];
    let as_u32: u32 = unsafe {
        std::mem::transmute(bytes)
    };
    println!("Bytes as u32: 0x{:08X}", as_u32);
    
    // Prefer safer alternatives when possible
    let as_u32_safe = u32::from_le_bytes(bytes);
    println!("Safe conversion: 0x{:08X}", as_u32_safe);
}

2. Union Types

#[repr(C)]
union Value {
    int_val: i32,
    float_val: f32,
    bytes: [u8; 4],
}

fn union_example() {
    let mut val = Value { int_val: 42 };
    
    unsafe {
        println!("As int: {}", val.int_val);
        println!("As float: {}", val.float_val);
        println!("As bytes: {:?}", val.bytes);
        
        val.float_val = 3.14;
        println!("After setting float: {:?}", val.bytes);
    }
}

3. Inline Assembly

fn inline_assembly_example() {
    let input: u64 = 42;
    let output: u64;
    
    unsafe {
        std::arch::asm!(
            "mov {}, {}",
            out(reg) output,
            in(reg) input,
        );
    }
    
    println!("Input: {}, Output: {}", input, output);
}

Best Practices Summary

Minimize Unsafe Code: Use unsafe only when necessary
Document Safety: Always document safety invariants and requirements
Provide Safe Abstractions: Wrap unsafe code in safe interfaces
Test Thoroughly: Use tools like Miri, AddressSanitizer, and Valgrind
Review Carefully: Have unsafe code reviewed by experienced developers
Use Established Libraries: Prefer well-tested crates for common unsafe patterns
Validate Inputs: Check preconditions even in unsafe code
Handle Errors: Don't assume operations will succeed

Unsafe Rust is a powerful tool for systems programming, but it requires careful reasoning about memory safety, data races, and other invariants that safe Rust normally guarantees. Always strive to minimize the scope of unsafe code and provide safe abstractions for your users.