1. rust
  2. /systems
  3. /memory-layout

Memory Layout & Management

Understanding memory layout is crucial for systems programming in Rust. This knowledge enables you to write efficient code, interface with other languages, and optimize performance by controlling how data is stored in memory.

Memory Model Fundamentals

Stack vs Heap

fn demonstrate_stack_heap() {
    // Stack allocated (fast, automatic cleanup)
    let stack_array = [1, 2, 3, 4, 5]; // 20 bytes on stack
    let stack_value = 42i32;           // 4 bytes on stack
    
    // Heap allocated (slower, manual management through smart pointers)
    let heap_vector = vec![1, 2, 3, 4, 5]; // Vector control on stack, data on heap
    let heap_boxed = Box::new(42i32);       // 4 bytes on heap, pointer on stack
    
    println!("Stack array: {:p}", &stack_array);
    println!("Stack value: {:p}", &stack_value);
    println!("Heap vector control: {:p}", &heap_vector);
    println!("Heap vector data: {:p}", heap_vector.as_ptr());
    println!("Heap boxed pointer: {:p}", &heap_boxed);
    println!("Heap boxed value: {:p}", heap_boxed.as_ref());
}

// Stack frame visualization
fn stack_frame_example() {
    let a = 10u32;    // 4 bytes at some stack offset
    let b = 20u64;    // 8 bytes at next aligned position
    let c = [1u8; 3]; // 3 bytes (may have padding after)
    
    // Print addresses to see memory layout
    println!("Address of a (u32): {:p}", &a);
    println!("Address of b (u64): {:p}", &b);
    println!("Address of c (array): {:p}", &c);
    
    // Function call creates new stack frame
    inner_function();
    
    fn inner_function() {
        let local = 42i16; // 2 bytes in new stack frame
        println!("Address of local in inner: {:p}", &local);
    }
}

fn main() {
    demonstrate_stack_heap();
    stack_frame_example();
}

Memory Alignment and Padding

use std::mem;

#[repr(C)]
struct UnoptimizedStruct {
    a: u8,   // 1 byte
    b: u64,  // 8 bytes, needs 8-byte alignment
    c: u16,  // 2 bytes
    d: u8,   // 1 byte
}

#[repr(C)]
struct OptimizedStruct {
    b: u64,  // 8 bytes, largest first
    c: u16,  // 2 bytes
    a: u8,   // 1 byte
    d: u8,   // 1 byte
}

fn demonstrate_alignment() {
    println!("Unoptimized struct:");
    println!("  Size: {} bytes", mem::size_of::<UnoptimizedStruct>());
    println!("  Alignment: {} bytes", mem::align_of::<UnoptimizedStruct>());
    
    println!("Optimized struct:");
    println!("  Size: {} bytes", mem::size_of::<OptimizedStruct>());
    println!("  Alignment: {} bytes", mem::align_of::<OptimizedStruct>());
    
    // Create instances and examine field positions
    let unopt = UnoptimizedStruct { a: 1, b: 2, c: 3, d: 4 };
    let opt = OptimizedStruct { a: 1, b: 2, c: 3, d: 4 };
    
    println!("\nUnoptimized field addresses:");
    println!("  a: {:p} (offset: {})", &unopt.a, offset_of_a_unopt());
    println!("  b: {:p} (offset: {})", &unopt.b, offset_of_b_unopt());
    println!("  c: {:p} (offset: {})", &unopt.c, offset_of_c_unopt());
    println!("  d: {:p} (offset: {})", &unopt.d, offset_of_d_unopt());
    
    println!("\nOptimized field addresses:");
    println!("  b: {:p} (offset: {})", &opt.b, offset_of_b_opt());
    println!("  c: {:p} (offset: {})", &opt.c, offset_of_c_opt());
    println!("  a: {:p} (offset: {})", &opt.a, offset_of_a_opt());
    println!("  d: {:p} (offset: {})", &opt.d, offset_of_d_opt());
}

// Helper functions to calculate field offsets
fn offset_of_a_unopt() -> usize { memoffset::offset_of!(UnoptimizedStruct, a) }
fn offset_of_b_unopt() -> usize { memoffset::offset_of!(UnoptimizedStruct, b) }
fn offset_of_c_unopt() -> usize { memoffset::offset_of!(UnoptimizedStruct, c) }
fn offset_of_d_unopt() -> usize { memoffset::offset_of!(UnoptimizedStruct, d) }

fn offset_of_a_opt() -> usize { memoffset::offset_of!(OptimizedStruct, a) }
fn offset_of_b_opt() -> usize { memoffset::offset_of!(OptimizedStruct, b) }
fn offset_of_c_opt() -> usize { memoffset::offset_of!(OptimizedStruct, c) }
fn offset_of_d_opt() -> usize { memoffset::offset_of!(OptimizedStruct, d) }

Representation Attributes

#[repr(C)] - C-Compatible Layout

// C-compatible struct layout
#[repr(C)]
struct CCompatible {
    x: u32,
    y: u64,
    z: u16,
}

// Equivalent C struct:
// struct CCompatible {
//     uint32_t x;
//     uint64_t y;
//     uint16_t z;
// };

#[repr(C)]
union CUnion {
    as_int: i32,
    as_float: f32,
    as_bytes: [u8; 4],
}

#[repr(C)]
enum CEnum {
    Variant1,
    Variant2,
    Variant3,
}

fn c_layout_example() {
    let c_struct = CCompatible { x: 1, y: 2, z: 3 };
    let c_union = CUnion { as_int: 42 };
    let c_enum = CEnum::Variant2;
    
    println!("C-compatible struct size: {}", mem::size_of::<CCompatible>());
    println!("C-compatible union size: {}", mem::size_of::<CUnion>());
    println!("C-compatible enum size: {}", mem::size_of::<CEnum>());
    
    unsafe {
        println!("Union as int: {}", c_union.as_int);
        println!("Union as float: {}", c_union.as_float);
        println!("Union as bytes: {:?}", c_union.as_bytes);
    }
}

#[repr(packed)] - Removing Padding

#[repr(packed)]
struct PackedStruct {
    a: u8,
    b: u64,
    c: u16,
    d: u8,
}

#[repr(packed(2))] // Align to 2-byte boundaries
struct PackedWithAlignment {
    a: u8,
    b: u64,
    c: u16,
    d: u8,
}

fn packed_layout_example() {
    println!("Regular struct size: {}", mem::size_of::<UnoptimizedStruct>());
    println!("Packed struct size: {}", mem::size_of::<PackedStruct>());
    println!("Packed(2) struct size: {}", mem::size_of::<PackedWithAlignment>());
    
    let packed = PackedStruct { a: 1, b: 2, c: 3, d: 4 };
    
    // WARNING: Taking references to packed fields can cause UB
    // if the field is not properly aligned for its type
    println!("Packed field values: a={}, c={}, d={}", packed.a, packed.c, packed.d);
    
    // Safer: copy the value before taking reference
    let b_value = packed.b;
    println!("Packed b value: {}", b_value);
}

#[repr(transparent)] - Zero-Cost Wrappers

#[repr(transparent)]
struct Transparent(u64);

#[repr(transparent)]
struct NewType {
    inner: i32,
}

fn transparent_example() {
    // Transparent types have same layout as their inner type
    println!("u64 size: {}", mem::size_of::<u64>());
    println!("Transparent size: {}", mem::size_of::<Transparent>());
    println!("NewType size: {}", mem::size_of::<NewType>());
    
    let transparent = Transparent(42);
    let newtype = NewType { inner: 100 };
    
    // Can transmute safely between transparent type and inner type
    let as_u64: u64 = unsafe { mem::transmute(transparent) };
    let as_i32: i32 = unsafe { mem::transmute(newtype) };
    
    println!("Transmuted values: {}, {}", as_u64, as_i32);
}

#[repr(align(n))] - Custom Alignment

#[repr(align(16))]
struct Aligned16 {
    data: [u8; 16],
}

#[repr(align(64))] // Cache line alignment
struct CacheLineAligned {
    data: [u8; 64],
}

#[repr(C, align(8))]
struct CCompatibleAligned {
    a: u32,
    b: u32,
}

fn alignment_example() {
    println!("Aligned16 alignment: {}", mem::align_of::<Aligned16>());
    println!("CacheLineAligned alignment: {}", mem::align_of::<CacheLineAligned>());
    
    let aligned = Aligned16 { data: [0; 16] };
    let cache_aligned = CacheLineAligned { data: [0; 64] };
    
    println!("Aligned16 address: {:p}", &aligned);
    println!("CacheLineAligned address: {:p}", &cache_aligned);
    
    // Check alignment
    assert_eq!(aligned.data.as_ptr() as usize % 16, 0);
    assert_eq!(cache_aligned.data.as_ptr() as usize % 64, 0);
}

Enum Memory Layout

Discriminant and Optimization

// Simple enum with discriminant
#[repr(u8)]
enum SimpleEnum {
    A = 10,
    B = 20,
    C = 30,
}

// Enum with data
enum DataEnum {
    Nothing,
    Single(u32),
    Pair(u32, u32),
    Triple(u32, u32, u32),
}

// Option-like enum (null pointer optimization)
enum OptimizedOption<T> {
    Some(Box<T>),
    None,
}

fn enum_layout_example() {
    println!("SimpleEnum size: {}", mem::size_of::<SimpleEnum>());
    println!("DataEnum size: {}", mem::size_of::<DataEnum>());
    println!("Option<Box<u32>> size: {}", mem::size_of::<Option<Box<u32>>>());
    println!("OptimizedOption<u32> size: {}", mem::size_of::<OptimizedOption<u32>>());
    
    // Demonstrate discriminant values
    let simple_a = SimpleEnum::A;
    let simple_b = SimpleEnum::B;
    
    println!("SimpleEnum::A discriminant: {}", simple_a as u8);
    println!("SimpleEnum::B discriminant: {}", simple_b as u8);
    
    // Null pointer optimization in action
    let some_box: Option<Box<u32>> = Some(Box::new(42));
    let none_box: Option<Box<u32>> = None;
    
    println!("Some(Box<u32>) pointer: {:p}", some_box.as_ref().unwrap().as_ref());
    // None is represented as null pointer, no extra discriminant needed
}

// Custom discriminant layout
#[repr(C)]
enum CustomLayout {
    Variant1 { x: u32, y: u32 },
    Variant2 { z: u64 },
    Variant3,
}

fn custom_enum_layout() {
    println!("CustomLayout size: {}", mem::size_of::<CustomLayout>());
    
    let v1 = CustomLayout::Variant1 { x: 1, y: 2 };
    let v2 = CustomLayout::Variant2 { z: 3 };
    let v3 = CustomLayout::Variant3;
    
    // Use mem::discriminant to get type-safe discriminant
    println!("V1 discriminant: {:?}", mem::discriminant(&v1));
    println!("V2 discriminant: {:?}", mem::discriminant(&v2));
    println!("V3 discriminant: {:?}", mem::discriminant(&v3));
}

Dynamic Memory Management

Custom Allocators

use std::alloc::{GlobalAlloc, Layout, System};
use std::sync::atomic::{AtomicUsize, Ordering};

// Tracking allocator wrapper
struct TrackingAllocator {
    inner: System,
    allocated: AtomicUsize,
    deallocated: AtomicUsize,
}

impl TrackingAllocator {
    const fn new() -> Self {
        TrackingAllocator {
            inner: System,
            allocated: AtomicUsize::new(0),
            deallocated: AtomicUsize::new(0),
        }
    }
    
    fn bytes_allocated(&self) -> usize {
        self.allocated.load(Ordering::Relaxed)
    }
    
    fn bytes_deallocated(&self) -> usize {
        self.deallocated.load(Ordering::Relaxed)
    }
    
    fn current_usage(&self) -> usize {
        self.bytes_allocated() - self.bytes_deallocated()
    }
}

unsafe impl GlobalAlloc for TrackingAllocator {
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
        let ptr = self.inner.alloc(layout);
        if !ptr.is_null() {
            self.allocated.fetch_add(layout.size(), Ordering::Relaxed);
        }
        ptr
    }
    
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
        self.inner.dealloc(ptr, layout);
        self.deallocated.fetch_add(layout.size(), Ordering::Relaxed);
    }
}

#[global_allocator]
static TRACKING_ALLOCATOR: TrackingAllocator = TrackingAllocator::new();

fn allocation_tracking_example() {
    println!("Initial usage: {} bytes", TRACKING_ALLOCATOR.current_usage());
    
    {
        let vec: Vec<u64> = (0..1000).collect();
        println!("After allocating Vec<u64>: {} bytes", TRACKING_ALLOCATOR.current_usage());
        
        let string = "Hello, ".repeat(100);
        println!("After allocating String: {} bytes", TRACKING_ALLOCATOR.current_usage());
    } // vec and string dropped here
    
    println!("After dropping: {} bytes", TRACKING_ALLOCATOR.current_usage());
    println!("Total allocated: {} bytes", TRACKING_ALLOCATOR.bytes_allocated());
    println!("Total deallocated: {} bytes", TRACKING_ALLOCATOR.bytes_deallocated());
}

Arena Allocation

use std::cell::Cell;
use std::marker::PhantomData;
use std::ptr::NonNull;

// Simple arena allocator
struct Arena {
    data: Vec<u8>,
    head: Cell<usize>,
}

impl Arena {
    fn new(capacity: usize) -> Self {
        Arena {
            data: vec![0; capacity],
            head: Cell::new(0),
        }
    }
    
    fn allocate<T>(&self, value: T) -> Option<&mut T> {
        let layout = Layout::new::<T>();
        self.allocate_layout(layout).map(|ptr| unsafe {
            let typed_ptr = ptr.as_ptr() as *mut T;
            std::ptr::write(typed_ptr, value);
            &mut *typed_ptr
        })
    }
    
    fn allocate_layout(&self, layout: Layout) -> Option<NonNull<u8>> {
        let current = self.head.get();
        let aligned_start = align_up(current, layout.align());
        let end = aligned_start + layout.size();
        
        if end <= self.data.len() {
            self.head.set(end);
            unsafe {
                Some(NonNull::new_unchecked(
                    self.data.as_ptr().add(aligned_start) as *mut u8
                ))
            }
        } else {
            None
        }
    }
    
    fn reset(&self) {
        self.head.set(0);
    }
    
    fn bytes_used(&self) -> usize {
        self.head.get()
    }
}

fn align_up(addr: usize, align: usize) -> usize {
    (addr + align - 1) & !(align - 1)
}

// Type-safe arena wrapper
struct TypedArena<T> {
    arena: Arena,
    _phantom: PhantomData<T>,
}

impl<T> TypedArena<T> {
    fn new(capacity: usize) -> Self {
        let bytes_needed = capacity * mem::size_of::<T>();
        TypedArena {
            arena: Arena::new(bytes_needed),
            _phantom: PhantomData,
        }
    }
    
    fn alloc(&self, value: T) -> Option<&mut T> {
        self.arena.allocate(value)
    }
}

fn arena_example() {
    let arena = Arena::new(1024);
    
    // Allocate various types
    let int_ref = arena.allocate(42i32).unwrap();
    let string_ref = arena.allocate("Hello".to_string()).unwrap();
    let array_ref = arena.allocate([1u8; 100]).unwrap();
    
    println!("Allocated integer: {}", *int_ref);
    println!("Allocated string: {}", string_ref);
    println!("Array first element: {}", array_ref[0]);
    println!("Arena bytes used: {}", arena.bytes_used());
    
    // Type-safe arena
    let typed_arena = TypedArena::<u64>::new(100);
    let value1 = typed_arena.alloc(12345).unwrap();
    let value2 = typed_arena.alloc(67890).unwrap();
    
    println!("Typed arena values: {}, {}", value1, value2);
}

Memory-Mapped Files and Shared Memory

Memory-Mapped I/O

use std::fs::OpenOptions;
use std::io::Write;
use memmap2::{MmapOptions, MmapMut};

fn create_test_file() -> std::io::Result<()> {
    let mut file = std::fs::File::create("test_data.bin")?;
    // Write some test data
    for i in 0..1000u32 {
        file.write_all(&i.to_le_bytes())?;
    }
    Ok(())
}

fn memory_mapped_example() -> Result<(), Box<dyn std::error::Error>> {
    // Create test file
    create_test_file()?;
    
    // Read-only memory mapping
    let file = std::fs::File::open("test_data.bin")?;
    let mmap = unsafe { MmapOptions::new().map(&file)? };
    
    // Read data directly from memory
    let data_slice = unsafe {
        std::slice::from_raw_parts(
            mmap.as_ptr() as *const u32,
            mmap.len() / 4,
        )
    };
    
    println!("First 10 values: {:?}", &data_slice[..10]);
    println!("Memory-mapped file size: {} bytes", mmap.len());
    
    // Writable memory mapping
    let file = OpenOptions::new()
        .read(true)
        .write(true)
        .open("test_data.bin")?;
    
    let mut mmap_mut = unsafe { MmapOptions::new().map_mut(&file)? };
    
    // Modify data directly in memory
    let data_slice_mut = unsafe {
        std::slice::from_raw_parts_mut(
            mmap_mut.as_mut_ptr() as *mut u32,
            mmap_mut.len() / 4,
        )
    };
    
    // Double all values
    for value in data_slice_mut.iter_mut() {
        *value *= 2;
    }
    
    // Flush changes to disk
    mmap_mut.flush()?;
    
    println!("Modified first 10 values: {:?}", &data_slice_mut[..10]);
    
    // Clean up
    std::fs::remove_file("test_data.bin").ok();
    
    Ok(())
}

Shared Memory (Unix)

use std::ffi::CString;
use std::os::raw::{c_int, c_void};

// Simple shared memory wrapper (Unix-specific)
#[cfg(unix)]
mod shared_memory {
    use super::*;
    
    extern "C" {
        fn shm_open(name: *const i8, oflag: c_int, mode: u32) -> c_int;
        fn shm_unlink(name: *const i8) -> c_int;
        fn ftruncate(fd: c_int, length: i64) -> c_int;
        fn mmap(
            addr: *mut c_void,
            len: usize,
            prot: c_int,
            flags: c_int,
            fd: c_int,
            offset: i64,
        ) -> *mut c_void;
        fn munmap(addr: *mut c_void, len: usize) -> c_int;
    }
    
    const O_CREAT: c_int = 0o100;
    const O_RDWR: c_int = 0o2;
    const PROT_READ: c_int = 1;
    const PROT_WRITE: c_int = 2;
    const MAP_SHARED: c_int = 1;
    const MAP_FAILED: *mut c_void = !0 as *mut c_void;
    
    pub struct SharedMemory {
        name: CString,
        ptr: *mut u8,
        size: usize,
        fd: c_int,
    }
    
    impl SharedMemory {
        pub fn create(name: &str, size: usize) -> Result<Self, &'static str> {
            let c_name = CString::new(name).map_err(|_| "Invalid name")?;
            
            unsafe {
                let fd = shm_open(
                    c_name.as_ptr(),
                    O_CREAT | O_RDWR,
                    0o666,
                );
                
                if fd == -1 {
                    return Err("Failed to create shared memory");
                }
                
                if ftruncate(fd, size as i64) == -1 {
                    return Err("Failed to resize shared memory");
                }
                
                let ptr = mmap(
                    std::ptr::null_mut(),
                    size,
                    PROT_READ | PROT_WRITE,
                    MAP_SHARED,
                    fd,
                    0,
                );
                
                if ptr == MAP_FAILED {
                    return Err("Failed to map shared memory");
                }
                
                Ok(SharedMemory {
                    name: c_name,
                    ptr: ptr as *mut u8,
                    size,
                    fd,
                })
            }
        }
        
        pub fn as_slice(&self) -> &[u8] {
            unsafe { std::slice::from_raw_parts(self.ptr, self.size) }
        }
        
        pub fn as_mut_slice(&mut self) -> &mut [u8] {
            unsafe { std::slice::from_raw_parts_mut(self.ptr, self.size) }
        }
        
        pub fn write_data<T>(&mut self, data: &T) -> Result<(), &'static str> {
            if mem::size_of::<T>() > self.size {
                return Err("Data too large for shared memory");
            }
            
            unsafe {
                std::ptr::copy_nonoverlapping(
                    data as *const T as *const u8,
                    self.ptr,
                    mem::size_of::<T>(),
                );
            }
            
            Ok(())
        }
        
        pub fn read_data<T>(&self) -> Result<T, &'static str> {
            if mem::size_of::<T>() > self.size {
                return Err("Data too large for shared memory");
            }
            
            unsafe {
                let mut data = mem::MaybeUninit::<T>::uninit();
                std::ptr::copy_nonoverlapping(
                    self.ptr,
                    data.as_mut_ptr() as *mut u8,
                    mem::size_of::<T>(),
                );
                Ok(data.assume_init())
            }
        }
    }
    
    impl Drop for SharedMemory {
        fn drop(&mut self) {
            unsafe {
                munmap(self.ptr as *mut c_void, self.size);
                libc::close(self.fd);
                shm_unlink(self.name.as_ptr());
            }
        }
    }
}

#[cfg(unix)]
fn shared_memory_example() {
    use shared_memory::SharedMemory;
    
    #[derive(Debug, Clone, Copy)]
    #[repr(C)]
    struct SharedData {
        counter: u64,
        values: [f64; 10],
    }
    
    let mut shm = SharedMemory::create("/test_shm", 1024)
        .expect("Failed to create shared memory");
    
    let data = SharedData {
        counter: 42,
        values: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
    };
    
    shm.write_data(&data).expect("Failed to write data");
    
    let read_data: SharedData = shm.read_data().expect("Failed to read data");
    println!("Read from shared memory: {:?}", read_data);
}

#[cfg(not(unix))]
fn shared_memory_example() {
    println!("Shared memory example only available on Unix systems");
}

Cache-Friendly Data Structures

Data Structure Layout Optimization

// Array of Structures (AoS) - poor cache locality for partial access
#[derive(Debug)]
struct ParticleAoS {
    x: f32,
    y: f32,
    z: f32,
    vx: f32,
    vy: f32,
    vz: f32,
    mass: f32,
    charge: f32,
}

// Structure of Arrays (SoA) - better cache locality
#[derive(Debug)]
struct ParticlesSoA {
    x: Vec<f32>,
    y: Vec<f32>,
    z: Vec<f32>,
    vx: Vec<f32>,
    vy: Vec<f32>,
    vz: Vec<f32>,
    mass: Vec<f32>,
    charge: Vec<f32>,
    count: usize,
}

impl ParticlesSoA {
    fn new(capacity: usize) -> Self {
        ParticlesSoA {
            x: Vec::with_capacity(capacity),
            y: Vec::with_capacity(capacity),
            z: Vec::with_capacity(capacity),
            vx: Vec::with_capacity(capacity),
            vy: Vec::with_capacity(capacity),
            vz: Vec::with_capacity(capacity),
            mass: Vec::with_capacity(capacity),
            charge: Vec::with_capacity(capacity),
            count: 0,
        }
    }
    
    fn add_particle(&mut self, x: f32, y: f32, z: f32, vx: f32, vy: f32, vz: f32, mass: f32, charge: f32) {
        self.x.push(x);
        self.y.push(y);
        self.z.push(z);
        self.vx.push(vx);
        self.vy.push(vy);
        self.vz.push(vz);
        self.mass.push(mass);
        self.charge.push(charge);
        self.count += 1;
    }
    
    // Cache-friendly: only access position data
    fn update_positions(&mut self, dt: f32) {
        for i in 0..self.count {
            self.x[i] += self.vx[i] * dt;
            self.y[i] += self.vy[i] * dt;
            self.z[i] += self.vz[i] * dt;
        }
    }
}

// Hot/Cold data separation
struct HotColdData {
    // Hot data: frequently accessed
    position: [f32; 3],
    velocity: [f32; 3],
    
    // Cold data: rarely accessed, kept separate
    metadata: Box<ParticleMetadata>,
}

struct ParticleMetadata {
    id: u64,
    name: String,
    creation_time: std::time::SystemTime,
    properties: std::collections::HashMap<String, f64>,
}

fn cache_optimization_example() {
    println!("Memory layout sizes:");
    println!("ParticleAoS: {} bytes", mem::size_of::<ParticleAoS>());
    println!("HotColdData: {} bytes", mem::size_of::<HotColdData>());
    
    // SoA demonstration
    let mut particles = ParticlesSoA::new(1000);
    
    // Add some particles
    for i in 0..1000 {
        particles.add_particle(
            i as f32, i as f32, i as f32,  // position
            1.0, 1.0, 1.0,                 // velocity
            1.0, 1.0,                      // mass, charge
        );
    }
    
    // Update positions (cache-friendly operation)
    particles.update_positions(0.1);
    
    println!("Updated {} particles", particles.count);
}

Memory Pool with Cache Alignment

use std::alloc::{alloc_zeroed, dealloc, Layout};

// Cache-line aligned memory pool
struct AlignedPool<T> {
    memory: *mut T,
    capacity: usize,
    used: usize,
    alignment: usize,
}

impl<T> AlignedPool<T> {
    fn new(capacity: usize, alignment: usize) -> Self {
        let layout = Layout::from_size_align(
            capacity * mem::size_of::<T>(),
            alignment,
        ).expect("Invalid layout");
        
        let memory = unsafe { alloc_zeroed(layout) as *mut T };
        if memory.is_null() {
            panic!("Allocation failed");
        }
        
        AlignedPool {
            memory,
            capacity,
            used: 0,
            alignment,
        }
    }
    
    fn allocate(&mut self) -> Option<&mut T> {
        if self.used < self.capacity {
            unsafe {
                let ptr = self.memory.add(self.used);
                self.used += 1;
                Some(&mut *ptr)
            }
        } else {
            None
        }
    }
    
    fn reset(&mut self) {
        self.used = 0;
    }
    
    fn is_cache_aligned(&self) -> bool {
        (self.memory as usize) % self.alignment == 0
    }
}

impl<T> Drop for AlignedPool<T> {
    fn drop(&mut self) {
        let layout = Layout::from_size_align(
            self.capacity * mem::size_of::<T>(),
            self.alignment,
        ).unwrap();
        
        unsafe {
            dealloc(self.memory as *mut u8, layout);
        }
    }
}

fn aligned_pool_example() {
    // Create pool aligned to cache line (64 bytes)
    let mut pool = AlignedPool::<u64>::new(100, 64);
    
    println!("Pool is cache aligned: {}", pool.is_cache_aligned());
    println!("Pool memory address: {:p}", pool.memory);
    
    // Allocate some values
    for i in 0..10 {
        if let Some(slot) = pool.allocate() {
            *slot = i * i;
        }
    }
    
    println!("Allocated {} items", pool.used);
}

Low-Level Memory Operations

Raw Memory Manipulation

use std::ptr;

fn raw_memory_operations() {
    // Allocate raw memory
    let layout = Layout::from_size_align(1024, 8).unwrap();
    let ptr = unsafe { alloc_zeroed(layout) };
    
    if ptr.is_null() {
        panic!("Allocation failed");
    }
    
    // Write pattern to memory
    unsafe {
        for i in 0..256 {
            ptr::write(ptr.add(i * 4) as *mut u32, i as u32);
        }
    }
    
    // Read back and verify
    unsafe {
        for i in 0..10 {
            let value = ptr::read(ptr.add(i * 4) as *const u32);
            println!("Memory[{}] = {}", i, value);
        }
    }
    
    // Copy memory regions
    unsafe {
        // Copy first 64 bytes to offset 512
        ptr::copy_nonoverlapping(ptr, ptr.add(512), 64);
        
        // Overlapping copy (safe with copy)
        ptr::copy(ptr.add(100), ptr.add(150), 100);
    }
    
    // Fill memory with pattern
    unsafe {
        ptr::write_bytes(ptr.add(800), 0xAB, 100);
    }
    
    // Compare memory regions
    unsafe {
        let comparison = ptr.add(0).cast::<[u8; 64]>()
            .as_ref()
            .unwrap()
            .iter()
            .zip(ptr.add(512).cast::<[u8; 64]>().as_ref().unwrap().iter())
            .all(|(a, b)| a == b);
        
        println!("Memory regions match: {}", comparison);
    }
    
    // Clean up
    unsafe {
        dealloc(ptr, layout);
    }
}

// Atomic memory operations
use std::sync::atomic::{AtomicUsize, Ordering};

fn atomic_memory_example() {
    let atomic_counter = AtomicUsize::new(0);
    
    // Different ordering guarantees
    let relaxed = atomic_counter.load(Ordering::Relaxed);
    let acquire = atomic_counter.load(Ordering::Acquire);
    let seq_cst = atomic_counter.load(Ordering::SeqCst);
    
    println!("Atomic loads: relaxed={}, acquire={}, seq_cst={}", relaxed, acquire, seq_cst);
    
    // Atomic operations
    let old_value = atomic_counter.fetch_add(10, Ordering::SeqCst);
    let new_value = atomic_counter.load(Ordering::SeqCst);
    
    println!("Atomic add: old={}, new={}", old_value, new_value);
    
    // Compare and swap
    let expected = new_value;
    match atomic_counter.compare_exchange_weak(expected, 100, Ordering::SeqCst, Ordering::Relaxed) {
        Ok(previous) => println!("CAS succeeded: previous={}", previous),
        Err(current) => println!("CAS failed: current={}", current),
    }
}

Memory Protection and Virtual Memory

// Memory protection example (Unix-specific)
#[cfg(unix)]
mod memory_protection {
    use std::ptr;
    
    extern "C" {
        fn mprotect(addr: *mut std::ffi::c_void, len: usize, prot: i32) -> i32;
        fn mlock(addr: *const std::ffi::c_void, len: usize) -> i32;
        fn munlock(addr: *const std::ffi::c_void, len: usize) -> i32;
    }
    
    const PROT_READ: i32 = 1;
    const PROT_WRITE: i32 = 2;
    const PROT_EXEC: i32 = 4;
    const PROT_NONE: i32 = 0;
    
    pub fn demonstrate_memory_protection() {
        let layout = Layout::from_size_align(4096, 4096).unwrap(); // Page-aligned
        let ptr = unsafe { alloc_zeroed(layout) };
        
        if ptr.is_null() {
            return;
        }
        
        // Write some data
        unsafe {
            ptr::write(ptr as *mut u64, 0x1234567890ABCDEF);
            println!("Written data: 0x{:X}", ptr::read(ptr as *const u64));
        }
        
        // Make memory read-only
        unsafe {
            if mprotect(ptr as *mut std::ffi::c_void, layout.size(), PROT_READ) == 0 {
                println!("Memory is now read-only");
                
                // Reading still works
                let value = ptr::read(ptr as *const u64);
                println!("Read value: 0x{:X}", value);
                
                // Writing would cause segfault (don't do this!)
                // ptr::write(ptr as *mut u64, 0xDEADBEEF);
            }
        }
        
        // Lock memory (prevent swapping)
        unsafe {
            if mlock(ptr as *const std::ffi::c_void, layout.size()) == 0 {
                println!("Memory locked in RAM");
                
                // Unlock when done
                munlock(ptr as *const std::ffi::c_void, layout.size());
            }
        }
        
        // Make writable again before deallocation
        unsafe {
            mprotect(ptr as *mut std::ffi::c_void, layout.size(), PROT_READ | PROT_WRITE);
            dealloc(ptr, layout);
        }
    }
}

#[cfg(unix)]
fn memory_protection_example() {
    memory_protection::demonstrate_memory_protection();
}

#[cfg(not(unix))]
fn memory_protection_example() {
    println!("Memory protection example only available on Unix systems");
}

Performance Considerations

Memory Access Patterns

// Demonstrate different access patterns
fn access_pattern_benchmark() {
    const SIZE: usize = 1024 * 1024; // 1M elements
    let data = vec![1u32; SIZE];
    
    let start = std::time::Instant::now();
    
    // Sequential access (cache-friendly)
    let mut sum = 0u64;
    for i in 0..SIZE {
        sum += data[i] as u64;
    }
    
    let sequential_time = start.elapsed();
    println!("Sequential sum: {}, time: {:?}", sum, sequential_time);
    
    // Random access (cache-unfriendly)
    let start = std::time::Instant::now();
    let mut sum = 0u64;
    for i in 0..SIZE {
        let index = (i * 997) % SIZE; // Pseudo-random pattern
        sum += data[index] as u64;
    }
    
    let random_time = start.elapsed();
    println!("Random sum: {}, time: {:?}", sum, random_time);
    println!("Random access is {:.2}x slower", 
             random_time.as_secs_f64() / sequential_time.as_secs_f64());
}

// Cache-friendly matrix operations
fn cache_friendly_matrix() {
    const N: usize = 512;
    let mut matrix = vec![vec![1.0f64; N]; N];
    
    let start = std::time::Instant::now();
    
    // Row-major traversal (cache-friendly)
    for i in 0..N {
        for j in 0..N {
            matrix[i][j] *= 2.0;
        }
    }
    
    let row_major_time = start.elapsed();
    
    let start = std::time::Instant::now();
    
    // Column-major traversal (cache-unfriendly)
    for j in 0..N {
        for i in 0..N {
            matrix[i][j] *= 2.0;
        }
    }
    
    let col_major_time = start.elapsed();
    
    println!("Row-major time: {:?}", row_major_time);
    println!("Column-major time: {:?}", col_major_time);
    println!("Column-major is {:.2}x slower", 
             col_major_time.as_secs_f64() / row_major_time.as_secs_f64());
}

Best Practices Summary

  1. Understand Memory Layout: Know how your data is arranged in memory
  2. Minimize Padding: Order struct fields by size (largest first)
  3. Use Appropriate repr: Choose the right representation for your use case
  4. Cache-Friendly Access: Access memory sequentially when possible
  5. Separate Hot/Cold Data: Keep frequently used data together
  6. Consider SoA vs AoS: Structure of Arrays for better cache locality
  7. Align Critical Data: Use alignment annotations for performance-critical structures
  8. Memory Pool for Frequent Allocations: Reduce allocation overhead
  9. Profile Memory Usage: Measure actual memory consumption and access patterns
  10. Use Safe Abstractions: Wrap unsafe memory operations in safe interfaces

Memory layout optimization is crucial for performance in systems programming. Understanding how data is stored and accessed allows you to make informed decisions about data structure design and algorithm implementation. Always measure the impact of optimizations to ensure they provide real benefits in your specific use case.