//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This //! is stable enough to be tested in CI. use std::hint::black_box; use std::{ops, slice}; use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; use iai_callgrind::{library_benchmark, library_benchmark_group, main}; const PAGE_SIZE: usize = 0x1000; // 4 kiB const MAX_ALIGN: usize = 512; // assume we may use avx512 operations one day const MEG1: usize = 1 << 20; // 1 MiB #[derive(Clone)] #[repr(C, align(0x1000))] struct Page([u8; PAGE_SIZE]); /// A buffer that is page-aligned by default, with an optional offset to create a /// misalignment. struct AlignedSlice { buf: Box<[Page]>, len: usize, offset: usize, } impl AlignedSlice { /// Allocate a slice aligned to ALIGN with at least `len` items, with `offset` from /// page alignment. fn new_zeroed(len: usize, offset: usize) -> Self { assert!(offset < PAGE_SIZE); let total_len = len + offset; let items = (total_len / PAGE_SIZE) + if total_len % PAGE_SIZE > 0 { 1 } else { 0 }; let buf = vec![Page([0u8; PAGE_SIZE]); items].into_boxed_slice(); AlignedSlice { buf, len, offset } } } impl ops::Deref for AlignedSlice { type Target = [u8]; fn deref(&self) -> &Self::Target { unsafe { slice::from_raw_parts(self.buf.as_ptr().cast::().add(self.offset), self.len) } } } impl ops::DerefMut for AlignedSlice { fn deref_mut(&mut self) -> &mut Self::Target { unsafe { slice::from_raw_parts_mut( self.buf.as_mut_ptr().cast::().add(self.offset), self.len, ) } } } mod mcpy { use super::*; struct Cfg { len: usize, s_off: usize, d_off: usize, } fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { let Cfg { len, s_off, d_off } = cfg; println!("bytes: {len} bytes, src offset: {s_off}, dst offset: {d_off}"); let mut src = AlignedSlice::new_zeroed(len, s_off); let dst = AlignedSlice::new_zeroed(len, d_off); src.fill(1); (len, src, dst) } #[library_benchmark] #[benches::aligned( // Both aligned args = [ Cfg { len: 16, s_off: 0, d_off: 0 }, Cfg { len: 32, s_off: 0, d_off: 0 }, Cfg { len: 64, s_off: 0, d_off: 0 }, Cfg { len: 512, s_off: 0, d_off: 0 }, Cfg { len: 4096, s_off: 0, d_off: 0 }, Cfg { len: MEG1, s_off: 0, d_off: 0 }, ], setup = setup, )] #[benches::offset( // Both at the same offset args = [ Cfg { len: 16, s_off: 65, d_off: 65 }, Cfg { len: 32, s_off: 65, d_off: 65 }, Cfg { len: 64, s_off: 65, d_off: 65 }, Cfg { len: 512, s_off: 65, d_off: 65 }, Cfg { len: 4096, s_off: 65, d_off: 65 }, Cfg { len: MEG1, s_off: 65, d_off: 65 }, ], setup = setup, )] #[benches::misaligned( // `src` and `dst` both misaligned by different amounts args = [ Cfg { len: 16, s_off: 65, d_off: 66 }, Cfg { len: 32, s_off: 65, d_off: 66 }, Cfg { len: 64, s_off: 65, d_off: 66 }, Cfg { len: 512, s_off: 65, d_off: 66 }, Cfg { len: 4096, s_off: 65, d_off: 66 }, Cfg { len: MEG1, s_off: 65, d_off: 66 }, ], setup = setup, )] fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { unsafe { black_box(memcpy( black_box(dst.as_mut_ptr()), black_box(src.as_ptr()), black_box(len), )); } } library_benchmark_group!(name = memcpy; benchmarks = bench); } mod mset { use super::*; struct Cfg { len: usize, offset: usize, } fn setup(Cfg { len, offset }: Cfg) -> (usize, AlignedSlice) { println!("bytes: {len}, offset: {offset}"); (len, AlignedSlice::new_zeroed(len, offset)) } #[library_benchmark] #[benches::aligned( args = [ Cfg { len: 16, offset: 0 }, Cfg { len: 32, offset: 0 }, Cfg { len: 64, offset: 0 }, Cfg { len: 512, offset: 0 }, Cfg { len: 4096, offset: 0 }, Cfg { len: MEG1, offset: 0 }, ], setup = setup, )] #[benches::offset( args = [ Cfg { len: 16, offset: 65 }, Cfg { len: 32, offset: 65 }, Cfg { len: 64, offset: 65 }, Cfg { len: 512, offset: 65 }, Cfg { len: 4096, offset: 65 }, Cfg { len: MEG1, offset: 65 }, ], setup = setup, )] fn bench((len, mut dst): (usize, AlignedSlice)) { unsafe { black_box(memset( black_box(dst.as_mut_ptr()), black_box(27), black_box(len), )); } } library_benchmark_group!(name = memset; benchmarks = bench); } mod mcmp { use super::*; struct Cfg { len: usize, s_off: usize, d_off: usize, } fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { let Cfg { len, s_off, d_off } = cfg; println!("bytes: {len}, src offset: {s_off}, dst offset: {d_off}"); let b1 = AlignedSlice::new_zeroed(len, s_off); let mut b2 = AlignedSlice::new_zeroed(len, d_off); b2[len - 1] = 1; (len, b1, b2) } #[library_benchmark] #[benches::aligned( // Both aligned args = [ Cfg { len: 16, s_off: 0, d_off: 0 }, Cfg { len: 32, s_off: 0, d_off: 0 }, Cfg { len: 64, s_off: 0, d_off: 0 }, Cfg { len: 512, s_off: 0, d_off: 0 }, Cfg { len: 4096, s_off: 0, d_off: 0 }, Cfg { len: MEG1, s_off: 0, d_off: 0 }, ], setup = setup )] #[benches::offset( // Both at the same offset args = [ Cfg { len: 16, s_off: 65, d_off: 65 }, Cfg { len: 32, s_off: 65, d_off: 65 }, Cfg { len: 64, s_off: 65, d_off: 65 }, Cfg { len: 512, s_off: 65, d_off: 65 }, Cfg { len: 4096, s_off: 65, d_off: 65 }, Cfg { len: MEG1, s_off: 65, d_off: 65 }, ], setup = setup )] #[benches::misaligned( // `src` and `dst` both misaligned by different amounts args = [ Cfg { len: 16, s_off: 65, d_off: 66 }, Cfg { len: 32, s_off: 65, d_off: 66 }, Cfg { len: 64, s_off: 65, d_off: 66 }, Cfg { len: 512, s_off: 65, d_off: 66 }, Cfg { len: 4096, s_off: 65, d_off: 66 }, Cfg { len: MEG1, s_off: 65, d_off: 66 }, ], setup = setup )] fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { unsafe { black_box(memcmp( black_box(dst.as_mut_ptr()), black_box(src.as_ptr()), black_box(len), )); } } library_benchmark_group!(name = memcmp; benchmarks = bench); } mod mmove { use Spread::{Aligned, Large, Medium, Small}; use super::*; struct Cfg { len: usize, spread: Spread, off: usize, } enum Spread { /// `src` and `dst` are close and have the same alignment (or offset). Aligned, /// `src` and `dst` are close. Small, /// `src` and `dst` are halfway offset in the buffer. Medium, /// `src` and `dst` only overlap by a single byte. Large, } // Note that small and large are fn calculate_spread(len: usize, spread: Spread) -> usize { match spread { // Note that this test doesn't make sense for lengths less than len=128 Aligned => { assert!(len > MAX_ALIGN, "aligned memset would have no overlap"); MAX_ALIGN } Small => 1, Medium => (len / 2) + 1, // add 1 so all are misaligned Large => len - 1, } } fn setup_forward(cfg: Cfg) -> (usize, usize, AlignedSlice) { let Cfg { len, spread, off } = cfg; let spread = calculate_spread(len, spread); println!("bytes: {len}, spread: {spread}, offset: {off}, forward"); assert!(spread < len, "memmove tests should have some overlap"); let mut buf = AlignedSlice::new_zeroed(len + spread, off); let mut fill: usize = 0; buf[..len].fill_with(|| { fill += 1; fill as u8 }); (len, spread, buf) } fn setup_backward(cfg: Cfg) -> (usize, usize, AlignedSlice) { let Cfg { len, spread, off } = cfg; let spread = calculate_spread(len, spread); println!("bytes: {len}, spread: {spread}, offset: {off}, backward"); assert!(spread < len, "memmove tests should have some overlap"); let mut buf = AlignedSlice::new_zeroed(len + spread, off); let mut fill: usize = 0; buf[spread..].fill_with(|| { fill += 1; fill as u8 }); (len, spread, buf) } #[library_benchmark] #[benches::aligned( args = [ // Don't test small spreads since there is no overlap Cfg { len: 4096, spread: Aligned, off: 0 }, Cfg { len: MEG1, spread: Aligned, off: 0 }, ], setup = setup_forward )] #[benches::small_spread( args = [ Cfg { len: 16, spread: Small, off: 0 }, Cfg { len: 32, spread: Small, off: 0 }, Cfg { len: 64, spread: Small, off: 0 }, Cfg { len: 512, spread: Small, off: 0 }, Cfg { len: 4096, spread: Small, off: 0 }, Cfg { len: MEG1, spread: Small, off: 0 }, ], setup = setup_forward )] #[benches::medium_spread( args = [ Cfg { len: 16, spread: Medium, off: 0 }, Cfg { len: 32, spread: Medium, off: 0 }, Cfg { len: 64, spread: Medium, off: 0 }, Cfg { len: 512, spread: Medium, off: 0 }, Cfg { len: 4096, spread: Medium, off: 0 }, Cfg { len: MEG1, spread: Medium, off: 0 }, ], setup = setup_forward )] #[benches::large_spread( args = [ Cfg { len: 16, spread: Large, off: 0 }, Cfg { len: 32, spread: Large, off: 0 }, Cfg { len: 64, spread: Large, off: 0 }, Cfg { len: 512, spread: Large, off: 0 }, Cfg { len: 4096, spread: Large, off: 0 }, Cfg { len: MEG1, spread: Large, off: 0 }, ], setup = setup_forward )] #[benches::aligned_off( args = [ Cfg { len: 4096, spread: Aligned, off: 65 }, Cfg { len: MEG1, spread: Aligned, off: 65 }, ], setup = setup_forward )] #[benches::small_spread_off( args = [ Cfg { len: 16, spread: Small, off: 65 }, Cfg { len: 32, spread: Small, off: 65 }, Cfg { len: 64, spread: Small, off: 65 }, Cfg { len: 512, spread: Small, off: 65 }, Cfg { len: 4096, spread: Small, off: 65 }, Cfg { len: MEG1, spread: Small, off: 65 }, ], setup = setup_forward )] #[benches::medium_spread_off( args = [ Cfg { len: 16, spread: Medium, off: 65 }, Cfg { len: 32, spread: Medium, off: 65 }, Cfg { len: 64, spread: Medium, off: 65 }, Cfg { len: 512, spread: Medium, off: 65 }, Cfg { len: 4096, spread: Medium, off: 65 }, Cfg { len: MEG1, spread: Medium, off: 65 }, ], setup = setup_forward )] #[benches::large_spread_off( args = [ Cfg { len: 16, spread: Large, off: 65 }, Cfg { len: 32, spread: Large, off: 65 }, Cfg { len: 64, spread: Large, off: 65 }, Cfg { len: 512, spread: Large, off: 65 }, Cfg { len: 4096, spread: Large, off: 65 }, Cfg { len: MEG1, spread: Large, off: 65 }, ], setup = setup_forward )] fn forward((len, spread, mut buf): (usize, usize, AlignedSlice)) { // Test moving from the start of the buffer toward the end unsafe { black_box(memmove( black_box(buf[spread..].as_mut_ptr()), black_box(buf.as_ptr()), black_box(len), )); } } #[library_benchmark] #[benches::aligned( args = [ // Don't test small spreads since there is no overlap Cfg { len: 4096, spread: Aligned, off: 0 }, Cfg { len: MEG1, spread: Aligned, off: 0 }, ], setup = setup_backward )] #[benches::small_spread( args = [ Cfg { len: 16, spread: Small, off: 0 }, Cfg { len: 32, spread: Small, off: 0 }, Cfg { len: 64, spread: Small, off: 0 }, Cfg { len: 512, spread: Small, off: 0 }, Cfg { len: 4096, spread: Small, off: 0 }, Cfg { len: MEG1, spread: Small, off: 0 }, ], setup = setup_backward )] #[benches::medium_spread( args = [ Cfg { len: 16, spread: Medium, off: 0 }, Cfg { len: 32, spread: Medium, off: 0 }, Cfg { len: 64, spread: Medium, off: 0 }, Cfg { len: 512, spread: Medium, off: 0 }, Cfg { len: 4096, spread: Medium, off: 0 }, Cfg { len: MEG1, spread: Medium, off: 0 }, ], setup = setup_backward )] #[benches::large_spread( args = [ Cfg { len: 16, spread: Large, off: 0 }, Cfg { len: 32, spread: Large, off: 0 }, Cfg { len: 64, spread: Large, off: 0 }, Cfg { len: 512, spread: Large, off: 0 }, Cfg { len: 4096, spread: Large, off: 0 }, Cfg { len: MEG1, spread: Large, off: 0 }, ], setup = setup_backward )] #[benches::aligned_off( args = [ // Don't test small spreads since there is no overlap Cfg { len: 4096, spread: Aligned, off: 65 }, Cfg { len: MEG1, spread: Aligned, off: 65 }, ], setup = setup_backward )] #[benches::small_spread_off( args = [ Cfg { len: 16, spread: Small, off: 65 }, Cfg { len: 32, spread: Small, off: 65 }, Cfg { len: 64, spread: Small, off: 65 }, Cfg { len: 512, spread: Small, off: 65 }, Cfg { len: 4096, spread: Small, off: 65 }, Cfg { len: MEG1, spread: Small, off: 65 }, ], setup = setup_backward )] #[benches::medium_spread_off( args = [ Cfg { len: 16, spread: Medium, off: 65 }, Cfg { len: 32, spread: Medium, off: 65 }, Cfg { len: 64, spread: Medium, off: 65 }, Cfg { len: 512, spread: Medium, off: 65 }, Cfg { len: 4096, spread: Medium, off: 65 }, Cfg { len: MEG1, spread: Medium, off: 65 }, ], setup = setup_backward )] #[benches::large_spread_off( args = [ Cfg { len: 16, spread: Large, off: 65 }, Cfg { len: 32, spread: Large, off: 65 }, Cfg { len: 64, spread: Large, off: 65 }, Cfg { len: 512, spread: Large, off: 65 }, Cfg { len: 4096, spread: Large, off: 65 }, Cfg { len: MEG1, spread: Large, off: 65 }, ], setup = setup_backward )] fn backward((len, spread, mut buf): (usize, usize, AlignedSlice)) { // Test moving from the end of the buffer toward the start unsafe { black_box(memmove( black_box(buf.as_mut_ptr()), black_box(buf[spread..].as_ptr()), black_box(len), )); } } library_benchmark_group!(name = memmove; benchmarks = forward, backward); } use mcmp::memcmp; use mcpy::memcpy; use mmove::memmove; use mset::memset; main!(library_benchmark_groups = memcpy, memset, memcmp, memmove);