diff --git a/CHANGELOG.md b/CHANGELOG.md index 76231ab7..d2cd3a65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,11 +10,18 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Added - Add `Clone` and `PartialEq` implementations to `HistoryBuffer`. +- Added an object pool API. see the `pool::object` module level doc for details ### Changed - [breaking-change] `IndexMap` and `IndexSet` now require that keys implement the `core::hash::Hash` trait instead of the `hash32::Hash` (v0.2.0) trait +- move `pool::singleton::Box` to the `pool::box` module +- renamed `pool::singleton::Pool` to `BoxPool` and moved it into the `pool::box` module +- move `pool::singleton::arc::Arc` to the `pool::arc` module +- renamed `pool::singleton::arc::Pool` to `ArcPool` and moved it into the `pool::arc` module +- [breaking-change] changed the target support of memory pool API to only support 32-bit x86 and a + subset of ARM targets. See the module level documentation of the `pool` module for details - [breaking-change] this crate now depends on `atomic-polyfill` v1.0.1, meaning that targets that require a polyfill need a `critical-section` **v1.x.x** implementation. @@ -26,6 +33,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - [breaking-change] this crate no longer has a Minimum Supported Rust Version (MSRV) guarantee and should be used with the latest stable version of the Rust toolchain. +- [breaking-change] removed the `Init` and `Uninint` type states from `pool::singleton::Box` +- [breaking-change] removed the following `pool::singleton::Box` methods: `freeze`, `forget` and `init` +- [breaking-change] removed the `pool::singleton::arc::ArcInner` type +- [breaking-change] removed support for attributes from `pool!` and `arc_pool!` + ## [v0.7.16] - 2022-08-09 ### Added diff --git a/Cargo.toml b/Cargo.toml index 2408ff8b..1292800e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,8 +18,6 @@ version = "0.8.0" default = ["cas"] cas = ["atomic-polyfill"] ufmt-impl = ["ufmt-write"] -# read the docs before enabling: makes `Pool` Sync on x86_64 -x86-sync-pool = [] # only for tests __trybuild = [] # Enable larger MPMC sizes. diff --git a/build.rs b/build.rs index c2127137..d787a266 100644 --- a/build.rs +++ b/build.rs @@ -1,6 +1,12 @@ #![deny(warnings)] -use std::{env, error::Error}; +use std::{ + env, + error::Error, + fs, + path::Path, + process::{Command, ExitStatus, Stdio}, +}; use rustc_version::Channel; @@ -89,5 +95,63 @@ fn main() -> Result<(), Box> { println!("cargo:rustc-cfg=unstable_channel"); } + match compile_probe(ARM_LLSC_PROBE) { + Some(status) if status.success() => println!("cargo:rustc-cfg=arm_llsc"), + _ => {} + } + Ok(()) } + +const ARM_LLSC_PROBE: &str = r#" +#![no_std] + +// `no_mangle` forces codegen, which makes llvm check the contents of the `asm!` macro +#[no_mangle] +unsafe fn asm() { + core::arch::asm!("clrex"); +} +"#; + +// this function was taken from anyhow v1.0.63 build script +// https://crates.io/crates/anyhow/1.0.63 (last visited 2022-09-02) +// the code is licensed under 'MIT or APACHE-2.0' +fn compile_probe(source: &str) -> Option { + let rustc = env::var_os("RUSTC")?; + let out_dir = env::var_os("OUT_DIR")?; + let probefile = Path::new(&out_dir).join("probe.rs"); + fs::write(&probefile, source).ok()?; + + // Make sure to pick up Cargo rustc configuration. + let mut cmd = if let Some(wrapper) = env::var_os("RUSTC_WRAPPER") { + let mut cmd = Command::new(wrapper); + // The wrapper's first argument is supposed to be the path to rustc. + cmd.arg(rustc); + cmd + } else { + Command::new(rustc) + }; + + cmd.stderr(Stdio::null()) + .arg("--edition=2018") + .arg("--crate-name=probe") + .arg("--crate-type=lib") + .arg("--out-dir") + .arg(out_dir) + .arg(probefile); + + if let Some(target) = env::var_os("TARGET") { + cmd.arg("--target").arg(target); + } + + // If Cargo wants to set RUSTFLAGS, use that. + if let Ok(rustflags) = env::var("CARGO_ENCODED_RUSTFLAGS") { + if !rustflags.is_empty() { + for arg in rustflags.split('\x1f') { + cmd.arg(arg); + } + } + } + + cmd.status().ok() +} diff --git a/src/lib.rs b/src/lib.rs index 0976fabf..36a41ab9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,12 +43,15 @@ //! //! List of currently implemented data structures: //! -//! - [`Arc`](pool/singleton/arc/struct.Arc.html) -- Thread-safe reference-counting pointer backed by a memory pool +//! - [`Arc`](pool/arc/index.html) -- like `std::sync::Arc` but backed by a lock-free memory pool +//! rather than `#[global_allocator]` +//! - [`Box`](pool/boxed/index.html) -- like `std::boxed::Box` but backed by a lock-free memory pool +//! rather than `#[global_allocator]` //! - [`BinaryHeap`](binary_heap/struct.BinaryHeap.html) -- priority queue //! - [`IndexMap`](struct.IndexMap.html) -- hash table //! - [`IndexSet`](struct.IndexSet.html) -- hash set //! - [`LinearMap`](struct.LinearMap.html) -//! - [`Pool`](pool/struct.Pool.html) -- lock-free memory pool +//! - [`Object`](pool/object/index.html) -- objects managed by an object pool //! - [`String`](struct.String.html) //! - [`Vec`](struct.Vec.html) //! - [`mpmc::Q*`](mpmc/index.html) -- multiple producer multiple consumer lock-free queue @@ -83,8 +86,6 @@ pub use histbuf::{HistoryBuffer, OldestOrdered}; pub use indexmap::{Bucket, Entry, FnvIndexMap, IndexMap, OccupiedEntry, Pos, VacantEntry}; pub use indexset::{FnvIndexSet, IndexSet}; pub use linear_map::LinearMap; -#[cfg(all(has_cas, feature = "cas"))] -pub use pool::singleton::arc::Arc; pub use string::String; pub use vec::Vec; @@ -110,7 +111,7 @@ pub mod binary_heap; mod defmt; #[cfg(all(has_cas, feature = "cas"))] pub mod mpmc; -#[cfg(all(has_cas, feature = "cas"))] +#[cfg(any(arm_llsc, target_arch = "x86"))] pub mod pool; pub mod sorted_linked_list; #[cfg(has_atomics)] diff --git a/src/pool.rs b/src/pool.rs new file mode 100644 index 00000000..c5a61829 --- /dev/null +++ b/src/pool.rs @@ -0,0 +1,59 @@ +//! Memory and object pools +//! +//! # Target support +//! +//! This module / API is only available on these compilation targets: +//! +//! - ARM architectures which instruction set include the LDREX, CLREX and STREX instructions, e.g. +//! `thumbv7m-none-eabi` but not `thumbv6m-none-eabi` +//! - 32-bit x86, e.g. `i686-unknown-linux-gnu` +//! +//! # Benchmarks +//! +//! - compilation settings +//! - `codegen-units = 1` +//! - `lto = 'fat'` +//! - `opt-level = 'z'` +//! - compilation target: `thumbv7em-none-eabihf` +//! - CPU: ARM Cortex-M4F +//! +//! - test program: +//! +//! ``` no_run +//! use heapless::box_pool; +//! +//! box_pool!(P: ()); // or `arc_pool!` or `object_pool!` +//! +//! bkpt(); +//! let res = P.alloc(()); +//! bkpt(); +//! +//! if let Ok(boxed) = res { +//! bkpt(); +//! drop(boxed); +//! bkpt(); +//! } +//! # fn bkpt() {} +//! ``` +//! +//! - measurement method: the cycle counter (CYCCNT) register was sampled each time a breakpoint +//! (`bkpt`) was hit. the difference between the "after" and the "before" value of CYCCNT yields the +//! execution time in clock cycles. +//! +//! | API | clock cycles | +//! |------------------------------|--------------| +//! | `BoxPool::alloc` | 23 | +//! | `pool::boxed::Box::drop` | 23 | +//! | `ArcPool::alloc` | 28 | +//! | `pool::arc::Arc::drop` | 59 | +//! | `ObjectPool::request` | 23 | +//! | `pool::object::Object::drop` | 23 | +//! +//! Note that the execution time won't include `T`'s initialization nor `T`'s destructor which will +//! be present in the general case for `Box` and `Arc`. + +mod treiber; + +pub mod arc; +pub mod boxed; +pub mod object; diff --git a/src/pool/arc.rs b/src/pool/arc.rs new file mode 100644 index 00000000..74b815a4 --- /dev/null +++ b/src/pool/arc.rs @@ -0,0 +1,503 @@ +//! `std::sync::Arc`-like API on top of a lock-free memory pool +//! +//! # Example usage +//! +//! ``` +//! use heapless::{arc_pool, pool::arc::{Arc, ArcBlock}}; +//! +//! arc_pool!(P: u128); +//! +//! // cannot allocate without first giving memory blocks to the pool +//! assert!(P.alloc(42).is_err()); +//! +//! // (some `no_std` runtimes have safe APIs to create `&'static mut` references) +//! let block: &'static mut ArcBlock = unsafe { +//! static mut B: ArcBlock = ArcBlock::new(); +//! &mut B +//! }; +//! +//! P.manage(block); +//! +//! let arc = P.alloc(1).unwrap(); +//! +//! // number of smart pointers is limited to the number of blocks managed by the pool +//! let res = P.alloc(2); +//! assert!(res.is_err()); +//! +//! // but cloning does not consume an `ArcBlock` +//! let arc2 = arc.clone(); +//! +//! assert_eq!(1, *arc2); +//! +//! // `arc`'s destructor returns the memory block to the pool +//! drop(arc2); // decrease reference counter +//! drop(arc); // release memory +//! +//! // it's now possible to allocate a new `Arc` smart pointer +//! let res = P.alloc(3); +//! +//! assert!(res.is_ok()); +//! ``` + +// reference counting logic is based on version 1.63.0 of the Rust standard library (`alloc` crate) +// which is licensed under 'MIT or APACHE-2.0' +// https://github.com/rust-lang/rust/blob/1.63.0/library/alloc/src/sync.rs#L235 (last visited +// 2022-09-05) + +use core::{ + fmt, + hash::{Hash, Hasher}, + mem::{ManuallyDrop, MaybeUninit}, + ops, ptr, + sync::atomic::{self, AtomicUsize, Ordering}, +}; + +use super::treiber::{NonNullPtr, Stack, UnionNode}; + +/// Creates a new `ArcPool` singleton with the given `$name` that manages the specified `$data_type` +/// +/// For more extensive documentation see the [module level documentation](pool/arc/index.html) +#[macro_export] +macro_rules! arc_pool { + ($name:ident: $data_type:ty) => { + pub struct $name; + + impl $crate::pool::arc::ArcPool for $name { + type Data = $data_type; + + fn singleton() -> &'static $crate::pool::arc::ArcPoolImpl<$data_type> { + static $name: $crate::pool::arc::ArcPoolImpl<$data_type> = + $crate::pool::arc::ArcPoolImpl::new(); + + &$name + } + } + + impl $name { + /// Inherent method version of `ArcPool::alloc` + #[allow(dead_code)] + pub fn alloc( + &self, + value: $data_type, + ) -> Result<$crate::pool::arc::Arc<$name>, $data_type> { + <$name as $crate::pool::arc::ArcPool>::alloc(value) + } + + /// Inherent method version of `ArcPool::manage` + #[allow(dead_code)] + pub fn manage(&self, block: &'static mut $crate::pool::arc::ArcBlock<$data_type>) { + <$name as $crate::pool::arc::ArcPool>::manage(block) + } + } + }; +} + +/// A singleton that manages `pool::arc::Arc` smart pointers +pub trait ArcPool: Sized { + /// The data type managed by the memory pool + type Data: 'static; + + /// `arc_pool!` implementation detail + #[doc(hidden)] + fn singleton() -> &'static ArcPoolImpl; + + /// Allocate a new `Arc` smart pointer initialized to the given `value` + /// + /// `manage` should be called at least once before calling `alloc` + /// + /// # Errors + /// + /// The `Err`or variant is returned when the memory pool has run out of memory blocks + fn alloc(value: Self::Data) -> Result, Self::Data> { + Ok(Arc { + node_ptr: Self::singleton().alloc(value)?, + }) + } + + /// Add a statically allocated memory block to the memory pool + fn manage(block: &'static mut ArcBlock) { + Self::singleton().manage(block) + } +} + +/// `arc_pool!` implementation detail +// newtype to avoid having to make field types public +#[doc(hidden)] +pub struct ArcPoolImpl { + stack: Stack>>>, +} + +impl ArcPoolImpl { + /// `arc_pool!` implementation detail + #[doc(hidden)] + pub const fn new() -> Self { + Self { + stack: Stack::new(), + } + } + + fn alloc(&self, value: T) -> Result>>>, T> { + if let Some(node_ptr) = self.stack.try_pop() { + let inner = ArcInner { + data: value, + strong: AtomicUsize::new(1), + }; + unsafe { node_ptr.as_ptr().cast::>().write(inner) } + + Ok(node_ptr) + } else { + Err(value) + } + } + + fn manage(&self, block: &'static mut ArcBlock) { + let node: &'static mut _ = &mut block.node; + + unsafe { self.stack.push(NonNullPtr::from_static_mut_ref(node)) } + } +} + +unsafe impl Sync for ArcPoolImpl {} + +/// Like `std::sync::Arc` but managed by memory pool `P` +pub struct Arc

+where + P: ArcPool, +{ + node_ptr: NonNullPtr>>>, +} + +impl

Arc

+where + P: ArcPool, +{ + fn inner(&self) -> &ArcInner { + unsafe { &*self.node_ptr.as_ptr().cast::>() } + } + + fn from_inner(node_ptr: NonNullPtr>>>) -> Self { + Self { node_ptr } + } + + unsafe fn get_mut_unchecked(this: &mut Self) -> &mut P::Data { + &mut *ptr::addr_of_mut!((*this.node_ptr.as_ptr().cast::>()).data) + } + + #[inline(never)] + unsafe fn drop_slow(&mut self) { + // run `P::Data`'s destructor + ptr::drop_in_place(Self::get_mut_unchecked(self)); + + // return memory to pool + P::singleton().stack.push(self.node_ptr); + } +} + +impl

AsRef for Arc

+where + P: ArcPool, +{ + fn as_ref(&self) -> &P::Data { + &**self + } +} + +const MAX_REFCOUNT: usize = (isize::MAX) as usize; + +impl

Clone for Arc

+where + P: ArcPool, +{ + fn clone(&self) -> Self { + let old_size = self.inner().strong.fetch_add(1, Ordering::Relaxed); + + if old_size > MAX_REFCOUNT { + // XXX original code calls `intrinsics::abort` which is unstable API + panic!(); + } + + Self::from_inner(self.node_ptr) + } +} + +impl fmt::Debug for Arc +where + A: ArcPool, + A::Data: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + A::Data::fmt(self, f) + } +} + +impl

ops::Deref for Arc

+where + P: ArcPool, +{ + type Target = P::Data; + + fn deref(&self) -> &Self::Target { + unsafe { &*ptr::addr_of!((*self.node_ptr.as_ptr().cast::>()).data) } + } +} + +impl fmt::Display for Arc +where + A: ArcPool, + A::Data: fmt::Display, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + A::Data::fmt(self, f) + } +} + +impl Drop for Arc +where + A: ArcPool, +{ + fn drop(&mut self) { + if self.inner().strong.fetch_sub(1, Ordering::Release) != 1 { + return; + } + + atomic::fence(Ordering::Acquire); + + unsafe { self.drop_slow() } + } +} + +impl Eq for Arc +where + A: ArcPool, + A::Data: Eq, +{ +} + +impl Hash for Arc +where + A: ArcPool, + A::Data: Hash, +{ + fn hash(&self, state: &mut H) + where + H: Hasher, + { + (**self).hash(state) + } +} + +impl Ord for Arc +where + A: ArcPool, + A::Data: Ord, +{ + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + A::Data::cmp(self, other) + } +} + +impl PartialEq> for Arc +where + A: ArcPool, + B: ArcPool, + A::Data: PartialEq, +{ + fn eq(&self, other: &Arc) -> bool { + A::Data::eq(self, &**other) + } +} + +impl PartialOrd> for Arc +where + A: ArcPool, + B: ArcPool, + A::Data: PartialOrd, +{ + fn partial_cmp(&self, other: &Arc) -> Option { + A::Data::partial_cmp(self, &**other) + } +} + +unsafe impl Send for Arc +where + A: ArcPool, + A::Data: Sync + Send, +{ +} + +unsafe impl Sync for Arc +where + A: ArcPool, + A::Data: Sync + Send, +{ +} + +impl Unpin for Arc where A: ArcPool {} + +struct ArcInner { + data: T, + strong: AtomicUsize, +} + +/// A chunk of memory that an `ArcPool` can manage +pub struct ArcBlock { + node: UnionNode>>, +} + +impl ArcBlock { + /// Creates a new memory block + pub const fn new() -> Self { + Self { + node: UnionNode { + data: ManuallyDrop::new(MaybeUninit::uninit()), + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cannot_alloc_if_empty() { + arc_pool!(P: i32); + + assert_eq!(Err(42), P.alloc(42),); + } + + #[test] + fn can_alloc_if_manages_one_block() { + arc_pool!(P: i32); + + let block = unsafe { + static mut B: ArcBlock = ArcBlock::new(); + &mut B + }; + P.manage(block); + + assert_eq!(42, *P.alloc(42).unwrap()); + } + + #[test] + fn alloc_drop_alloc() { + arc_pool!(P: i32); + + let block = unsafe { + static mut B: ArcBlock = ArcBlock::new(); + &mut B + }; + P.manage(block); + + let arc = P.alloc(1).unwrap(); + + drop(arc); + + assert_eq!(2, *P.alloc(2).unwrap()); + } + + #[test] + fn strong_count_starts_at_one() { + arc_pool!(P: i32); + + let block = unsafe { + static mut B: ArcBlock = ArcBlock::new(); + &mut B + }; + P.manage(block); + + let arc = P.alloc(1).ok().unwrap(); + + assert_eq!(1, arc.inner().strong.load(Ordering::Relaxed)); + } + + #[test] + fn clone_increases_strong_count() { + arc_pool!(P: i32); + + let block = unsafe { + static mut B: ArcBlock = ArcBlock::new(); + &mut B + }; + P.manage(block); + + let arc = P.alloc(1).ok().unwrap(); + + let before = arc.inner().strong.load(Ordering::Relaxed); + + let arc2 = arc.clone(); + + let expected = before + 1; + assert_eq!(expected, arc.inner().strong.load(Ordering::Relaxed)); + assert_eq!(expected, arc2.inner().strong.load(Ordering::Relaxed)); + } + + #[test] + fn drop_decreases_strong_count() { + arc_pool!(P: i32); + + let block = unsafe { + static mut B: ArcBlock = ArcBlock::new(); + &mut B + }; + P.manage(block); + + let arc = P.alloc(1).ok().unwrap(); + let arc2 = arc.clone(); + + let before = arc.inner().strong.load(Ordering::Relaxed); + + drop(arc); + + let expected = before - 1; + assert_eq!(expected, arc2.inner().strong.load(Ordering::Relaxed)); + } + + #[test] + fn runs_destructor_exactly_once_when_strong_count_reaches_zero() { + static COUNT: AtomicUsize = AtomicUsize::new(0); + + pub struct S; + + impl Drop for S { + fn drop(&mut self) { + COUNT.fetch_add(1, Ordering::Relaxed); + } + } + + arc_pool!(P: S); + + let block = unsafe { + static mut B: ArcBlock = ArcBlock::new(); + &mut B + }; + P.manage(block); + + let arc = P.alloc(S).ok().unwrap(); + + assert_eq!(0, COUNT.load(Ordering::Relaxed)); + + drop(arc); + + assert_eq!(1, COUNT.load(Ordering::Relaxed)); + } + + #[test] + fn zst_is_well_aligned() { + #[repr(align(4096))] + pub struct Zst4096; + + arc_pool!(P: Zst4096); + + let block = unsafe { + static mut B: ArcBlock = ArcBlock::new(); + &mut B + }; + P.manage(block); + + let arc = P.alloc(Zst4096).ok().unwrap(); + + let raw = &*arc as *const Zst4096; + assert_eq!(0, raw as usize % 4096); + } +} diff --git a/src/pool/boxed.rs b/src/pool/boxed.rs new file mode 100644 index 00000000..9a1aa4d4 --- /dev/null +++ b/src/pool/boxed.rs @@ -0,0 +1,533 @@ +//! `std::boxed::Box`-like API on top of a lock-free memory pool +//! +//! # Example usage +//! +//! ``` +//! use heapless::{box_pool, pool::boxed::{Box, BoxBlock}}; +//! +//! box_pool!(P: u128); +//! +//! // cannot allocate without first giving memory blocks to the pool +//! assert!(P.alloc(42).is_err()); +//! +//! // (some `no_std` runtimes have safe APIs to create `&'static mut` references) +//! let block: &'static mut BoxBlock = unsafe { +//! static mut B: BoxBlock = BoxBlock::new(); +//! &mut B +//! }; +//! +//! // give block of memory to the pool +//! P.manage(block); +//! +//! // it's now possible to allocate +//! let mut boxed = P.alloc(1).unwrap(); +//! +//! // mutation is possible +//! *boxed += 1; +//! assert_eq!(2, *boxed); +//! +//! // number of boxes is limited to the number of blocks managed by the pool +//! let res = P.alloc(3); +//! assert!(res.is_err()); +//! +//! // give another memory block to the pool +//! P.manage(unsafe { +//! static mut B: BoxBlock = BoxBlock::new(); +//! &mut B +//! }); +//! +//! // cloning also consumes a memory block from the pool +//! let mut separate_box = boxed.clone(); +//! *separate_box += 1; +//! assert_eq!(3, *separate_box); +//! +//! // after the clone it's not possible to allocate again +//! let res = P.alloc(4); +//! assert!(res.is_err()); +//! +//! // `boxed`'s destructor returns the memory block to the pool +//! drop(boxed); +//! +//! // it's possible to allocate again +//! let res = P.alloc(5); +//! +//! assert!(res.is_ok()); +//! ``` + +use core::{ + fmt, + hash::{Hash, Hasher}, + mem::{ManuallyDrop, MaybeUninit}, + ops, ptr, +}; + +use super::treiber::{NonNullPtr, Stack, UnionNode}; + +/// Creates a new `BoxPool` singleton with the given `$name` that manages the specified `$data_type` +/// +/// For more extensive documentation see the [module level documentation](pool/boxed/index.html) +#[macro_export] +macro_rules! box_pool { + ($name:ident: $data_type:ty) => { + pub struct $name; + + impl $crate::pool::boxed::BoxPool for $name { + type Data = $data_type; + + fn singleton() -> &'static $crate::pool::boxed::BoxPoolImpl<$data_type> { + static $name: $crate::pool::boxed::BoxPoolImpl<$data_type> = + $crate::pool::boxed::BoxPoolImpl::new(); + + &$name + } + } + + impl $name { + /// Inherent method version of `BoxPool::alloc` + #[allow(dead_code)] + pub fn alloc( + &self, + value: $data_type, + ) -> Result<$crate::pool::boxed::Box<$name>, $data_type> { + <$name as $crate::pool::boxed::BoxPool>::alloc(value) + } + + /// Inherent method version of `BoxPool::manage` + #[allow(dead_code)] + pub fn manage(&self, block: &'static mut $crate::pool::boxed::BoxBlock<$data_type>) { + <$name as $crate::pool::boxed::BoxPool>::manage(block) + } + } + }; +} + +/// A singleton that manages `pool::boxed::Box`-es +/// +/// # Usage +/// +/// Do not implement this trait yourself; instead use the `box_pool!` macro to create a type that +/// implements this trait. +/// +/// # Semver guarantees +/// +/// *Implementing* this trait is exempt from semver guarantees. +/// i.e. a new patch release is allowed to break downstream `BoxPool` implementations. +/// +/// *Using* the trait, e.g. in generic code, does fall under semver guarantees. +pub trait BoxPool: Sized { + /// The data type managed by the memory pool + type Data: 'static; + + /// `box_pool!` implementation detail + #[doc(hidden)] + fn singleton() -> &'static BoxPoolImpl; + + /// Allocate a new `Box` initialized to the given `value` + /// + /// `manage` should be called at least once before calling `alloc` + /// + /// # Errors + /// + /// The `Err`or variant is returned when the memory pool has run out of memory blocks + fn alloc(value: Self::Data) -> Result, Self::Data> { + Ok(Box { + node_ptr: Self::singleton().alloc(value)?, + }) + } + + /// Add a statically allocated memory block to the memory pool + fn manage(block: &'static mut BoxBlock) { + Self::singleton().manage(block) + } +} + +/// Like `std::boxed::Box` but managed by memory pool `P` rather than `#[global_allocator]` +pub struct Box

+where + P: BoxPool, +{ + node_ptr: NonNullPtr>>, +} + +impl Clone for Box +where + A: BoxPool, + A::Data: Clone, +{ + fn clone(&self) -> Self { + A::alloc((**self).clone()).ok().expect("OOM") + } +} + +impl fmt::Debug for Box +where + A: BoxPool, + A::Data: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + A::Data::fmt(self, f) + } +} + +impl

ops::Deref for Box

+where + P: BoxPool, +{ + type Target = P::Data; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.node_ptr.as_ptr().cast::() } + } +} + +impl

ops::DerefMut for Box

+where + P: BoxPool, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.node_ptr.as_ptr().cast::() } + } +} + +impl fmt::Display for Box +where + A: BoxPool, + A::Data: fmt::Display, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + A::Data::fmt(self, f) + } +} + +impl

Drop for Box

+where + P: BoxPool, +{ + fn drop(&mut self) { + let node = self.node_ptr; + + unsafe { ptr::drop_in_place(node.as_ptr().cast::()) } + + unsafe { P::singleton().stack.push(node) } + } +} + +impl Eq for Box +where + A: BoxPool, + A::Data: Eq, +{ +} + +impl Hash for Box +where + A: BoxPool, + A::Data: Hash, +{ + fn hash(&self, state: &mut H) + where + H: Hasher, + { + (**self).hash(state) + } +} + +impl Ord for Box +where + A: BoxPool, + A::Data: Ord, +{ + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + A::Data::cmp(self, other) + } +} + +impl PartialEq> for Box +where + A: BoxPool, + B: BoxPool, + A::Data: PartialEq, +{ + fn eq(&self, other: &Box) -> bool { + A::Data::eq(self, other) + } +} + +impl PartialOrd> for Box +where + A: BoxPool, + B: BoxPool, + A::Data: PartialOrd, +{ + fn partial_cmp(&self, other: &Box) -> Option { + A::Data::partial_cmp(self, other) + } +} + +unsafe impl

Send for Box

+where + P: BoxPool, + P::Data: Send, +{ +} + +unsafe impl

Sync for Box

+where + P: BoxPool, + P::Data: Sync, +{ +} + +/// `box_pool!` implementation detail +// newtype to avoid having to make field types public +#[doc(hidden)] +pub struct BoxPoolImpl { + stack: Stack>>, +} + +impl BoxPoolImpl { + pub const fn new() -> Self { + Self { + stack: Stack::new(), + } + } + + fn alloc(&self, value: T) -> Result>>, T> { + if let Some(node_ptr) = self.stack.try_pop() { + unsafe { node_ptr.as_ptr().cast::().write(value) } + + Ok(node_ptr) + } else { + Err(value) + } + } + + fn manage(&self, block: &'static mut BoxBlock) { + let node: &'static mut _ = &mut block.node; + + unsafe { self.stack.push(NonNullPtr::from_static_mut_ref(node)) } + } +} + +unsafe impl Sync for BoxPoolImpl {} + +/// A chunk of memory that a `BoxPool` singleton can manage +pub struct BoxBlock { + node: UnionNode>, +} + +impl BoxBlock { + /// Creates a new memory block + pub const fn new() -> Self { + Self { + node: UnionNode { + data: ManuallyDrop::new(MaybeUninit::uninit()), + }, + } + } +} + +#[cfg(test)] +mod tests { + use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + use std::thread; + + use super::*; + + #[test] + fn cannot_alloc_if_empty() { + box_pool!(P: i32); + + assert_eq!(Err(42), P.alloc(42)); + } + + #[test] + fn can_alloc_if_pool_manages_one_block() { + box_pool!(P: i32); + + let block = unsafe { + static mut B: BoxBlock = BoxBlock::new(); + &mut B + }; + P.manage(block); + + assert_eq!(42, *P.alloc(42).unwrap()); + } + + #[test] + fn alloc_drop_alloc() { + box_pool!(P: i32); + + let block = unsafe { + static mut B: BoxBlock = BoxBlock::new(); + &mut B + }; + P.manage(block); + + let boxed = P.alloc(1).unwrap(); + + drop(boxed); + + assert_eq!(2, *P.alloc(2).unwrap()); + } + + #[test] + fn runs_destructor_exactly_once_on_drop() { + static COUNT: AtomicUsize = AtomicUsize::new(0); + + pub struct S; + + impl Drop for S { + fn drop(&mut self) { + COUNT.fetch_add(1, Ordering::Relaxed); + } + } + + box_pool!(P: S); + + let block = unsafe { + static mut B: BoxBlock = BoxBlock::new(); + &mut B + }; + P.manage(block); + + let boxed = P.alloc(S).ok().unwrap(); + + assert_eq!(0, COUNT.load(Ordering::Relaxed)); + + drop(boxed); + + assert_eq!(1, COUNT.load(Ordering::Relaxed)); + } + + #[test] + fn zst_is_well_aligned() { + #[repr(align(4096))] + pub struct Zst4096; + + box_pool!(P: Zst4096); + + let block = unsafe { + static mut B: BoxBlock = BoxBlock::new(); + &mut B + }; + P.manage(block); + + let boxed = P.alloc(Zst4096).ok().unwrap(); + + let raw = &*boxed as *const Zst4096; + assert_eq!(0, raw as usize % 4096); + } + + #[allow(clippy::redundant_clone)] + #[test] + fn can_clone_if_pool_is_not_exhausted() { + static STRUCT_CLONE_WAS_CALLED: AtomicBool = AtomicBool::new(false); + + pub struct S; + + impl Clone for S { + fn clone(&self) -> Self { + STRUCT_CLONE_WAS_CALLED.store(true, Ordering::Relaxed); + Self + } + } + + box_pool!(P: S); + + P.manage(unsafe { + static mut B: BoxBlock = BoxBlock::new(); + &mut B + }); + P.manage(unsafe { + static mut B: BoxBlock = BoxBlock::new(); + &mut B + }); + + let first = P.alloc(S).ok().unwrap(); + let _second = first.clone(); + + assert!(STRUCT_CLONE_WAS_CALLED.load(Ordering::Relaxed)); + + let is_oom = P.alloc(S).is_err(); + assert!(is_oom); + } + + #[allow(clippy::redundant_clone)] + #[test] + fn clone_panics_if_pool_exhausted() { + static STRUCT_CLONE_WAS_CALLED: AtomicBool = AtomicBool::new(false); + + pub struct S; + + impl Clone for S { + fn clone(&self) -> Self { + STRUCT_CLONE_WAS_CALLED.store(true, Ordering::Relaxed); + Self + } + } + + box_pool!(P: S); + + P.manage(unsafe { + static mut B: BoxBlock = BoxBlock::new(); + &mut B + }); + + let first = P.alloc(S).ok().unwrap(); + + let thread = thread::spawn(move || { + let _second = first.clone(); + }); + + let thread_panicked = thread.join().is_err(); + assert!(thread_panicked); + + // we diverge from `alloc::Box` in that we call `T::clone` first and then request + // memory from the allocator whereas `alloc::Box` does it the other way around + // assert!(!STRUCT_CLONE_WAS_CALLED.load(Ordering::Relaxed)); + } + + #[allow(clippy::redundant_clone)] + #[test] + fn panicking_clone_does_not_leak_memory() { + static STRUCT_CLONE_WAS_CALLED: AtomicBool = AtomicBool::new(false); + + pub struct S; + + impl Clone for S { + fn clone(&self) -> Self { + STRUCT_CLONE_WAS_CALLED.store(true, Ordering::Relaxed); + panic!() + } + } + + box_pool!(P: S); + + P.manage(unsafe { + static mut B: BoxBlock = BoxBlock::new(); + &mut B + }); + P.manage(unsafe { + static mut B: BoxBlock = BoxBlock::new(); + &mut B + }); + + let boxed = P.alloc(S).ok().unwrap(); + + let thread = thread::spawn(move || { + let _boxed = boxed.clone(); + }); + + let thread_panicked = thread.join().is_err(); + assert!(thread_panicked); + + assert!(STRUCT_CLONE_WAS_CALLED.load(Ordering::Relaxed)); + + let once = P.alloc(S); + let twice = P.alloc(S); + + assert!(once.is_ok()); + assert!(twice.is_ok()); + } +} diff --git a/src/pool/cas.rs b/src/pool/cas.rs deleted file mode 100644 index 09a373fc..00000000 --- a/src/pool/cas.rs +++ /dev/null @@ -1,248 +0,0 @@ -//! Stack based on CAS atomics -//! -//! To reduce the chance of hitting the ABA problem we use a 32-bit offset + a 32-bit version tag -//! instead of a 64-bit pointer. The version tag will be bumped on each successful `pop` operation. - -use core::{ - cell::UnsafeCell, - marker::PhantomData, - num::{NonZeroU32, NonZeroU64}, - ptr::NonNull, - sync::atomic::{AtomicU64, Ordering}, -}; - -/// Unfortunate implementation detail required to use the -/// [`Pool.grow_exact`](struct.Pool.html#method.grow_exact) method -pub struct Node { - next: Atomic>, - pub(crate) data: UnsafeCell, -} - -impl Node { - fn next(&self) -> &Atomic> { - &self.next - } -} - -pub struct Stack { - head: Atomic>, -} - -impl Stack { - pub const fn new() -> Self { - Self { - head: Atomic::null(), - } - } - - pub fn push(&self, new_head: Ptr>) { - let mut head = self.head.load(Ordering::Relaxed); - - loop { - unsafe { - new_head - .as_raw() - .as_ref() - .next() - .store(head, Ordering::Relaxed); - } - - if let Err(p) = self.head.compare_and_exchange_weak( - head, - Some(new_head), - Ordering::Release, - Ordering::Relaxed, - ) { - head = p; - } else { - return; - } - } - } - - pub fn try_pop(&self) -> Option>> { - loop { - if let Some(mut head) = self.head.load(Ordering::Acquire) { - let next = unsafe { head.as_raw().as_ref().next().load(Ordering::Relaxed) }; - - if self - .head - .compare_and_exchange_weak( - Some(head), - next, - Ordering::Release, - Ordering::Relaxed, - ) - .is_ok() - { - head.incr_tag(); - return Some(head); - } - } else { - // stack observed empty - return None; - } - } - } -} - -#[cfg(target_arch = "x86_64")] -fn anchor(init: Option<*mut T>) -> *mut T { - use core::sync::atomic::AtomicU8; - - use spin::Once; - - static LAZY_ANCHOR: Once = Once::new(); - - let likely_unaligned_address = if let Some(init) = init { - *LAZY_ANCHOR.call_once(|| init as usize) - } else { - LAZY_ANCHOR.get().copied().unwrap_or_else(|| { - // we may hit this branch with Pool of ZSTs where `grow` does not need to be called - static BSS_ANCHOR: AtomicU8 = AtomicU8::new(0); - &BSS_ANCHOR as *const _ as usize - }) - }; - - let alignment_mask = !(core::mem::align_of::() - 1); - let well_aligned_address = likely_unaligned_address & alignment_mask; - well_aligned_address as *mut T -} - -/// On x86_64, anchored pointer. This is a (signed) 32-bit offset from `anchor` plus a 32-bit tag -/// On x86, this is a pointer plus a 32-bit tag -pub struct Ptr { - inner: NonZeroU64, - _marker: PhantomData<*mut T>, -} - -impl Clone for Ptr { - fn clone(&self) -> Self { - *self - } -} - -impl Copy for Ptr {} - -fn initial_tag_value() -> NonZeroU32 { - NonZeroU32::new(1).unwrap() -} - -impl Ptr { - #[cfg(target_arch = "x86_64")] - pub fn new(p: *mut T) -> Option { - use core::convert::TryFrom; - - i32::try_from((p as isize).wrapping_sub(anchor::(Some(p)) as isize)) - .ok() - .map(|offset| unsafe { Ptr::from_parts(initial_tag_value(), offset) }) - } - - #[cfg(target_arch = "x86")] - pub fn new(p: *mut T) -> Option { - Some(unsafe { Ptr::from_parts(initial_tag_value(), p as i32) }) - } - - unsafe fn from_parts(tag: NonZeroU32, offset: i32) -> Self { - Self { - inner: NonZeroU64::new_unchecked((tag.get() as u64) << 32 | (offset as u32 as u64)), - _marker: PhantomData, - } - } - - fn from_u64(p: u64) -> Option { - NonZeroU64::new(p).map(|inner| Self { - inner, - _marker: PhantomData, - }) - } - - fn into_u64(&self) -> u64 { - self.inner.get() - } - - fn tag(&self) -> NonZeroU32 { - let tag = (self.inner.get() >> 32) as u32; - debug_assert_ne!(0, tag, "broken non-zero invariant"); - unsafe { NonZeroU32::new_unchecked(tag) } - } - - fn incr_tag(&mut self) { - let maybe_zero_tag = self.tag().get().wrapping_add(1); - let tag = NonZeroU32::new(maybe_zero_tag).unwrap_or(initial_tag_value()); - let offset = self.offset(); - - *self = unsafe { Ptr::from_parts(tag, offset) }; - } - - fn offset(&self) -> i32 { - self.inner.get() as i32 - } - - #[cfg(target_arch = "x86_64")] - fn as_raw(&self) -> NonNull { - unsafe { - NonNull::new_unchecked( - (anchor::(None) as isize).wrapping_add(self.offset() as isize) as *mut T, - ) - } - } - - #[cfg(target_arch = "x86")] - fn as_raw(&self) -> NonNull { - unsafe { NonNull::new_unchecked(self.offset() as *mut T) } - } - - pub fn dangling() -> Self { - // `anchor()` returns a well-aligned pointer so an offset of 0 will also produce a well-aligned pointer - unsafe { Self::from_parts(initial_tag_value(), 0) } - } - - pub unsafe fn as_ref(&self) -> &T { - &*self.as_raw().as_ptr() - } -} - -struct Atomic { - inner: AtomicU64, - _marker: PhantomData<*mut T>, -} - -impl Atomic { - const fn null() -> Self { - Self { - inner: AtomicU64::new(0), - _marker: PhantomData, - } - } - - fn compare_and_exchange_weak( - &self, - current: Option>, - new: Option>, - succ: Ordering, - fail: Ordering, - ) -> Result<(), Option>> { - self.inner - .compare_exchange_weak( - current.map(|p| p.into_u64()).unwrap_or(0), - new.map(|p| p.into_u64()).unwrap_or(0), - succ, - fail, - ) - .map(drop) - .map_err(Ptr::from_u64) - } - - fn load(&self, ord: Ordering) -> Option> { - NonZeroU64::new(self.inner.load(ord)).map(|inner| Ptr { - inner, - _marker: PhantomData, - }) - } - - fn store(&self, val: Option>, ord: Ordering) { - self.inner - .store(val.map(|p| p.into_u64()).unwrap_or(0), ord) - } -} diff --git a/src/pool/llsc.rs b/src/pool/llsc.rs deleted file mode 100644 index 33f65557..00000000 --- a/src/pool/llsc.rs +++ /dev/null @@ -1,80 +0,0 @@ -//! Stack based on LL/SC atomics - -pub use core::ptr::NonNull as Ptr; -use core::{cell::UnsafeCell, ptr}; - -#[cfg(cas_atomic_polyfill)] -use atomic_polyfill::{AtomicPtr, Ordering}; - -#[cfg(not(cas_atomic_polyfill))] -use core::sync::atomic::{AtomicPtr, Ordering}; - -/// Unfortunate implementation detail required to use the -/// [`Pool.grow_exact`](struct.Pool.html#method.grow_exact) method -pub struct Node { - next: AtomicPtr>, - pub(crate) data: UnsafeCell, -} - -impl Node { - fn next(&self) -> &AtomicPtr> { - &self.next - } -} - -pub struct Stack { - head: AtomicPtr>, -} - -impl Stack { - pub const fn new() -> Self { - Self { - head: AtomicPtr::new(ptr::null_mut()), - } - } - - pub fn push(&self, new_head: Ptr>) { - // NOTE `Ordering`s come from crossbeam's (v0.6.0) `TreiberStack` - - let mut head = self.head.load(Ordering::Relaxed); - loop { - unsafe { new_head.as_ref().next().store(head, Ordering::Relaxed) } - - match self.head.compare_exchange_weak( - head, - new_head.as_ptr(), - Ordering::Release, // success - Ordering::Relaxed, // failure - ) { - Ok(_) => return, - // interrupt occurred or other core made a successful STREX op on the head - Err(p) => head = p, - } - } - } - - pub fn try_pop(&self) -> Option>> { - // NOTE `Ordering`s come from crossbeam's (v0.6.0) `TreiberStack` - - loop { - let head = self.head.load(Ordering::Acquire); - if let Some(nn_head) = Ptr::new(head) { - let next = unsafe { nn_head.as_ref().next().load(Ordering::Relaxed) }; - - match self.head.compare_exchange_weak( - head, - next, - Ordering::Release, // success - Ordering::Relaxed, // failure - ) { - Ok(_) => break Some(nn_head), - // interrupt occurred or other core made a successful STREX op on the head - Err(_) => continue, - } - } else { - // stack is observed as empty - break None; - } - } - } -} diff --git a/src/pool/mod.rs b/src/pool/mod.rs deleted file mode 100644 index 31701e46..00000000 --- a/src/pool/mod.rs +++ /dev/null @@ -1,693 +0,0 @@ -//! A heap-less, interrupt-safe, lock-free memory pool (\*) -//! -//! NOTE: This module is not available on targets that do *not* support CAS operations and are not -//! emulated by the [`atomic_polyfill`](https://crates.io/crates/atomic-polyfill) crate (e.g., -//! MSP430). -//! -//! (\*) Currently, the implementation is only lock-free *and* `Sync` on ARMv6, ARMv7-{A,R,M} & ARMv8-M -//! devices -//! -//! # Examples -//! -//! The most common way of using this pool is as a global singleton; the singleton mode gives you -//! automatic deallocation of memory blocks on `drop`. -//! -//! ``` ignore -//! #![no_main] -//! #![no_std] -//! -//! use cortex_m_rt::{entry, exception}; -//! use heapless::{ -//! pool, -//! pool::singleton::{Box, Pool}, -//! }; -//! -//! // instantiate a memory pool of `[u8; 128]` blocks as a global singleton -//! pool!( -//! // attributes can be used here -//! // #[link_section = ".ccram.A"] -//! A: [u8; 128] -//! ); -//! -//! #[entry] -//! fn main() -> ! { -//! static mut MEMORY: [u8; 1024] = [0; 1024]; -//! -//! // increase the capacity of the pool by ~8 blocks -//! A::grow(MEMORY); -//! -//! // claim a block of memory -//! // note that the type is `Box`, and not `Box<[u8; 128]>` -//! // `A` is the "name" of the pool -//! let x: Box = A::alloc().unwrap(); -//! loop { -//! // .. do stuff with `x` .. -//! } -//! } -//! -//! #[exception] -//! fn SysTick() { -//! // claim a block of memory -//! let y = A::alloc().unwrap(); -//! -//! // .. do stuff with `y` .. -//! -//! // return the memory block to the pool -//! drop(y); -//! } -//! ``` -//! -//! # Portability -//! -//! This pool internally uses a Treiber stack which is known to be susceptible to the ABA problem. -//! The only counter measure against the ABA problem that this implementation currently takes is -//! relying on LL/SC (Link-local / Store-conditional) instructions being used to implement CAS loops -//! on the target architecture (see section on ['Soundness'](#soundness) for more information). For -//! this reason, `Pool` only implements `Sync` when compiling for some ARM cores. -//! -//! This module requires CAS atomic instructions which are not available on all architectures (e.g. -//! ARMv6-M (`thumbv6m-none-eabi`) and MSP430 (`msp430-none-elf`)). These atomics can be emulated -//! however with [`atomic_polyfill`](https://crates.io/crates/atomic-polyfill), which is enabled -//! with the `cas` feature and is enabled by default for `thumbv6m-none-eabi` and `riscv32` targets. -//! MSP430 is currently not supported by -//! [`atomic_polyfill`](https://crates.io/crates/atomic-polyfill). -//! -//! # Soundness -//! -//! This pool uses a Treiber stack to keep a list of free memory blocks (nodes). Each of these -//! nodes has a pointer to the next node. To claim a memory block we simply pop a node from the -//! top of the stack and use it as a memory block. The pop operation consists of swapping the -//! current head (top) node with the node below it. The Rust code for the `pop` operation is shown -//! below: -//! -//! ``` ignore -//! fn pop(&self) -> Option>> { -//! let fetch_order = ..; -//! let set_order = ..; -//! -//! // `self.head` has type `AtomicPtr>` -//! // where `struct Node { next: AtomicPtr>, data: UnsafeCell }` -//! let mut head = self.head.load(fetch_order); -//! loop { -//! if let Some(nn_head) = NonNull::new(head) { -//! let next = unsafe { (*head).next.load(Ordering::Relaxed) }; -//! -//! // <~ preempted -//! -//! match self -//! .head -//! .compare_exchange_weak(head, next, set_order, fetch_order) -//! { -//! Ok(_) => break Some(nn_head), -//! // head was changed by some interrupt handler / thread -//! Err(new_head) => head = new_head, -//! } -//! } else { -//! // stack is observed as empty -//! break None; -//! } -//! } -//! } -//! ``` -//! -//! In general, the `pop` operation is susceptible to the ABA problem. If this operation gets -//! preempted by some interrupt handler somewhere between the `head.load` and the -//! `compare_and_exchange_weak`, and that handler modifies the stack in such a way that the head -//! (top) of the stack remains unchanged then resuming the `pop` operation will corrupt the stack. -//! -//! An example: imagine we are doing on `pop` on stack that contains these nodes: `A -> B -> C`, -//! `A` is the head (top), `B` is next to `A` and `C` is next to `B`. The `pop` operation will do a -//! `CAS(&self.head, A, B)` operation to atomically change the head to `B` iff it currently is `A`. -//! Now, let's say a handler preempts the `pop` operation before the `CAS` operation starts and it -//! `pop`s the stack twice and then `push`es back the `A` node; now the state of the stack is `A -> -//! C`. When the original `pop` operation is resumed it will succeed in doing the `CAS` operation -//! setting `B` as the head of the stack. However, `B` was used by the handler as a memory block and -//! no longer is a valid free node. As a result the stack, and thus the allocator, is in a invalid -//! state. -//! -//! However, not all is lost because ARM devices use LL/SC (Link-local / Store-conditional) -//! operations to implement CAS loops. Let's look at the actual disassembly of `pop` for the ARM -//! Cortex-M. -//! -//! ``` text -//! 08000130 <>::pop>: -//! 8000130: 6802 ldr r2, [r0, #0] -//! 8000132: e00c b.n 800014e <>::pop+0x1e> -//! 8000134: 4611 mov r1, r2 -//! 8000136: f8d2 c000 ldr.w ip, [r2] -//! 800013a: e850 2f00 ldrex r2, [r0] -//! 800013e: 428a cmp r2, r1 -//! 8000140: d103 bne.n 800014a <>::pop+0x1a> -//! 8000142: e840 c300 strex r3, ip, [r0] -//! 8000146: b913 cbnz r3, 800014e <>::pop+0x1e> -//! 8000148: e004 b.n 8000154 <>::pop+0x24> -//! 800014a: f3bf 8f2f clrex -//! 800014e: 2a00 cmp r2, #0 -//! 8000150: d1f0 bne.n 8000134 <>::pop+0x4> -//! 8000152: 2100 movs r1, #0 -//! 8000154: 4608 mov r0, r1 -//! 8000156: 4770 bx lr -//! ``` -//! -//! LDREX ("load exclusive") is the LL instruction, and STREX ("store exclusive") is the SC -//! instruction (see [1](#references)). On the Cortex-M, STREX will always fail if the processor -//! takes an exception between it and its corresponding LDREX operation (see [2](#references)). If -//! STREX fails then the CAS loop is retried (see instruction @ `0x8000146`). On single core -//! systems, preemption is required to run into the ABA problem and on Cortex-M devices preemption -//! always involves taking an exception. Thus the underlying LL/SC operations prevent the ABA -//! problem on Cortex-M. -//! -//! In the case of multi-core systems if any other core successfully does a STREX op on the head -//! while the current core is somewhere between LDREX and STREX then the current core will fail its -//! STREX operation. -//! -//! # x86_64 support / limitations -//! -//! *NOTE* `Pool` is only `Sync` on `x86_64` and `x86` (`i686`) if the Cargo feature "x86-sync-pool" -//! is enabled -//! -//! x86_64 support is a gamble. Yes, a gamble. Do you feel lucky enough to use `Pool` on x86_64? -//! -//! As it's not possible to implement *ideal* LL/SC semantics (\*) on x86_64 the architecture is -//! susceptible to the ABA problem described above. To *reduce the chances* of ABA occurring in -//! practice we use version tags (keyword: IBM ABA-prevention tags). Again, this approach does -//! *not* fix / prevent / avoid the ABA problem; it only reduces the chance of it occurring in -//! practice but the chances of it occurring are not reduced to zero. -//! -//! How we have implemented version tags: instead of using an `AtomicPtr` to link the stack `Node`s -//! we use an `AtomicUsize` where the 64-bit `usize` is always comprised of a monotonically -//! increasing 32-bit tag (higher bits) and a 32-bit signed address offset. The address of a node is -//! computed by adding the 32-bit offset to an "anchor" address (the address of a static variable -//! that lives somewhere in the `.bss` linker section). The tag is increased every time a node is -//! popped (removed) from the stack. -//! -//! To see how version tags can prevent ABA consider the example from the previous section. Let's -//! start with a stack in this state: `(~A, 0) -> (~B, 1) -> (~C, 2)`, where `~A` represents the -//! address of node A as a 32-bit offset from the "anchor" and the second tuple element (e.g. `0`) -//! indicates the version of the node. For simplicity, assume a single core system: thread T1 is -//! performing `pop` and before `CAS(&self.head, (~A, 0), (~B, 1))` is executed a context switch -//! occurs and the core resumes T2. T2 pops the stack twice and pushes A back into the stack; -//! because the `pop` operation increases the version the stack ends in the following state: `(~A, -//! 1) -> (~C, 2)`. Now if T1 is resumed the CAS operation will fail because `self.head` is `(~A, -//! 1)` and not `(~A, 0)`. -//! -//! When can version tags fail to prevent ABA? Using the previous example: if T2 performs a `push` -//! followed by a `pop` `(1 << 32) - 1` times before doing its original `pop` - `pop` - `push` -//! operation then ABA will occur because the version tag of node `A` will wraparound to its -//! original value of `0` and the CAS operation in T1 will succeed and corrupt the stack. -//! -//! It does seem unlikely that (1) a thread will perform the above operation and (2) that the above -//! operation will complete within one time slice, assuming time sliced threads. If you have thread -//! priorities then the above operation could occur during the lifetime of many high priorities -//! threads if T1 is running at low priority. -//! -//! Other implementations of version tags use more than 32 bits in their tags (e.g. "Scalable -//! Lock-Free Dynamic Memory Allocation" uses 42-bit tags in its super blocks). In theory, one could -//! use double-word CAS on x86_64 to pack a 64-bit tag and a 64-bit pointer in a double-word but -//! this CAS operation is not exposed in the standard library (and I think it's not available on -//! older x86_64 processors?) -//! -//! (\*) Apparently one can emulate proper LL/SC semantics on x86_64 using hazard pointers (?) -- -//! the technique appears to be documented in "ABA Prevention Using Single-Word Instructions", which -//! is not public AFAICT -- but hazard pointers require Thread Local Storage (TLS), which is a -//! non-starter for a `no_std` library like `heapless`. -//! -//! ## x86_64 Limitations -//! -//! *NOTE* this limitation does not apply to `x86` (32-bit address space). If you run into this -//! issue, on an x86_64 processor try running your code compiled for `x86`, e.g. `cargo run --target -//! i686-unknown-linux-musl` -//! -//! Because stack nodes must be located within +- 2 GB of the hidden `ANCHOR` variable, which -//! lives in the `.bss` section, `Pool` may not be able to manage static references created using -//! `Box::leak` -- these heap allocated chunks of memory may live in a very different address space. -//! When the `Pool` is unable to manage a node because of its address it will simply discard it: -//! `Pool::grow*` methods return the number of new memory blocks added to the pool; if these methods -//! return `0` it means the `Pool` is unable to manage the memory given to them. -//! -//! # References -//! -//! 1. [Cortex-M3 Devices Generic User Guide (DUI 0552A)][0], Section 2.2.7 "Synchronization -//! primitives" -//! -//! [0]: http://infocenter.arm.com/help/topic/com.arm.doc.dui0552a/DUI0552A_cortex_m3_dgug.pdf -//! -//! 2. [ARMv7-M Architecture Reference Manual (DDI 0403E.b)][1], Section A3.4 "Synchronization and -//! semaphores" -//! -//! [1]: https://static.docs.arm.com/ddi0403/eb/DDI0403E_B_armv7m_arm.pdf -//! -//! 3. "Scalable Lock-Free Dynamic Memory Allocation" Michael, Maged M. -//! -//! 4. "Hazard pointers: Safe memory reclamation for lock-free objects." Michael, Maged M. - -use core::{any::TypeId, mem}; -use core::{ - cmp, fmt, - hash::{Hash, Hasher}, - marker::PhantomData, - mem::MaybeUninit, - ops::{Deref, DerefMut}, - ptr::{self, NonNull}, -}; - -pub use stack::Node; -use stack::{Ptr, Stack}; - -pub mod singleton; -#[cfg_attr(any(target_arch = "x86_64", target_arch = "x86"), path = "cas.rs")] -#[cfg_attr( - not(any(target_arch = "x86_64", target_arch = "x86")), - path = "llsc.rs" -)] -mod stack; - -/// A lock-free memory pool -pub struct Pool { - stack: Stack, - - // Current implementation is unsound on architectures that don't have LL/SC semantics so this - // struct is not `Sync` on those platforms - _not_send_or_sync: PhantomData<*const ()>, -} - -// NOTE(any(test)) makes testing easier (no need to enable Cargo features for testing) -#[cfg(any( - armv6m, - armv7a, - armv7r, - armv7m, - armv8m_main, - all( - any(target_arch = "x86_64", target_arch = "x86"), - feature = "x86-sync-pool" - ), - test -))] -unsafe impl Sync for Pool {} - -unsafe impl Send for Pool {} - -impl Pool { - /// Creates a new empty pool - pub const fn new() -> Self { - Pool { - stack: Stack::new(), - - _not_send_or_sync: PhantomData, - } - } - - /// Claims a memory block from the pool - /// - /// Returns `None` when the pool is observed as exhausted - /// - /// *NOTE:* This method does *not* have bounded execution time because it contains a CAS loop - pub fn alloc(&self) -> Option> { - if mem::size_of::() == 0 { - // NOTE because we return a dangling pointer to a NODE, which has non-zero size - // even when T is a ZST, in this case we need to make sure we - // - don't do pointer arithmetic on this pointer - // - dereference that offset-ed pointer as a ZST - // because miri doesn't like that - return Some(Box { - node: Ptr::dangling(), - _state: PhantomData, - }); - } - - if let Some(node) = self.stack.try_pop() { - Some(Box { - node, - _state: PhantomData, - }) - } else { - None - } - } - - /// Returns a memory block to the pool - /// - /// *NOTE*: `T`'s destructor (if any) will run on `value` iff `S = Init` - /// - /// *NOTE:* This method does *not* have bounded execution time because it contains a CAS loop - pub fn free(&self, value: Box) - where - S: 'static, - { - if TypeId::of::() == TypeId::of::() { - let p = if mem::size_of::() == 0 { - // any pointer will do to invoke the destructor of a ZST - NonNull::dangling().as_ptr() - } else { - unsafe { value.node.as_ref().data.get() } - }; - unsafe { - ptr::drop_in_place(p); - } - } - - // no operation - if mem::size_of::() == 0 { - return; - } - - self.stack.push(value.node) - } - - /// Increases the capacity of the pool - /// - /// This method might *not* fully utilize the given memory block due to alignment requirements. - /// - /// This method returns the number of *new* blocks that can be allocated. - pub fn grow(&self, memory: &'static mut [u8]) -> usize { - if mem::size_of::() == 0 { - // ZST use no memory so a pool of ZST always has maximum capacity - return usize::max_value(); - } - - let sz = mem::size_of::>(); - let mut p = memory.as_mut_ptr(); - let mut len = memory.len(); - - let align = mem::align_of::>(); - let rem = (p as usize) % align; - if rem != 0 { - let offset = align - rem; - - if offset >= len { - // slice is too small - return 0; - } - - p = unsafe { p.add(offset) }; - len -= offset; - } - - let mut n = 0; - while len >= sz { - match () { - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - () => { - if let Some(p) = Ptr::new(p as *mut _) { - self.stack.push(p); - n += 1; - } - } - - #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] - () => { - self.stack.push(unsafe { Ptr::new_unchecked(p as *mut _) }); - n += 1; - } - } - - p = unsafe { p.add(sz) }; - len -= sz; - } - - n - } - - /// Increases the capacity of the pool - /// - /// Unlike [`Pool.grow`](struct.Pool.html#method.grow) this method fully utilizes the given - /// memory block - pub fn grow_exact(&self, memory: &'static mut MaybeUninit) -> usize - where - A: AsMut<[Node]>, - { - if mem::size_of::() == 0 { - return usize::max_value(); - } - - let nodes = unsafe { (*memory.as_mut_ptr()).as_mut() }; - let cap = nodes.len(); - for p in nodes { - match () { - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] - () => { - if let Some(p) = Ptr::new(p) { - self.stack.push(p); - } - } - - #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] - () => self.stack.push(core::ptr::NonNull::from(p)), - } - } - cap - } -} - -/// A memory block -pub struct Box { - _state: PhantomData, - node: Ptr>, -} - -impl Box { - /// Initializes this memory block - pub fn init(self, val: T) -> Box { - if mem::size_of::() == 0 { - // no memory operation needed for ZST - // BUT we want to avoid calling `val`s destructor - mem::forget(val) - } else { - unsafe { - ptr::write(self.node.as_ref().data.get(), val); - } - } - - Box { - node: self.node, - _state: PhantomData, - } - } -} - -/// Uninitialized type state -pub enum Uninit {} - -/// Initialized type state -pub enum Init {} - -unsafe impl Send for Box where T: Send {} - -unsafe impl Sync for Box where T: Sync {} - -unsafe impl stable_deref_trait::StableDeref for Box {} - -impl AsRef<[T]> for Box -where - A: AsRef<[T]>, -{ - fn as_ref(&self) -> &[T] { - self.deref().as_ref() - } -} - -impl AsMut<[T]> for Box -where - A: AsMut<[T]>, -{ - fn as_mut(&mut self) -> &mut [T] { - self.deref_mut().as_mut() - } -} - -impl Deref for Box { - type Target = T; - - fn deref(&self) -> &T { - if mem::size_of::() == 0 { - // any pointer will do for ZST - unsafe { &*NonNull::dangling().as_ptr() } - } else { - unsafe { &*self.node.as_ref().data.get() } - } - } -} - -impl DerefMut for Box { - fn deref_mut(&mut self) -> &mut T { - if mem::size_of::() == 0 { - // any pointer will do for ZST - unsafe { &mut *NonNull::dangling().as_ptr() } - } else { - unsafe { &mut *self.node.as_ref().data.get() } - } - } -} - -impl fmt::Debug for Box -where - T: fmt::Debug, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - ::fmt(self, f) - } -} - -impl fmt::Display for Box -where - T: fmt::Display, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - ::fmt(self, f) - } -} - -impl PartialEq for Box -where - T: PartialEq, -{ - fn eq(&self, rhs: &Box) -> bool { - ::eq(self, rhs) - } -} - -impl Eq for Box where T: Eq {} - -impl PartialOrd for Box -where - T: PartialOrd, -{ - fn partial_cmp(&self, rhs: &Box) -> Option { - ::partial_cmp(self, rhs) - } -} - -impl Ord for Box -where - T: Ord, -{ - fn cmp(&self, rhs: &Box) -> cmp::Ordering { - ::cmp(self, rhs) - } -} - -impl Hash for Box -where - T: Hash, -{ - fn hash(&self, state: &mut H) - where - H: Hasher, - { - ::hash(self, state) - } -} - -#[cfg(test)] -mod tests { - use core::{ - mem::{self, MaybeUninit}, - sync::atomic::{AtomicUsize, Ordering}, - }; - - use super::{Node, Pool}; - - #[test] - fn grow() { - static mut MEMORY: [u8; 1024] = [0; 1024]; - - static POOL: Pool<[u8; 128]> = Pool::new(); - - unsafe { - POOL.grow(&mut MEMORY); - } - - for _ in 0..7 { - assert!(POOL.alloc().is_some()); - } - } - - #[test] - fn grow_exact() { - const SZ: usize = 8; - static mut MEMORY: MaybeUninit<[Node<[u8; 128]>; SZ]> = MaybeUninit::uninit(); - - static POOL: Pool<[u8; 128]> = Pool::new(); - - unsafe { - POOL.grow_exact(&mut MEMORY); - } - - for _ in 0..SZ { - assert!(POOL.alloc().is_some()); - } - assert!(POOL.alloc().is_none()); - } - - #[test] - fn sanity() { - const SZ: usize = 2 * mem::size_of::>() - 1; - static mut MEMORY: [u8; SZ] = [0; SZ]; - - static POOL: Pool = Pool::new(); - - // empty pool - assert!(POOL.alloc().is_none()); - - POOL.grow(unsafe { &mut MEMORY }); - - let x = POOL.alloc().unwrap().init(0); - assert_eq!(*x, 0); - - // pool exhausted - assert!(POOL.alloc().is_none()); - - POOL.free(x); - - // should be possible to allocate again - assert_eq!(*POOL.alloc().unwrap().init(1), 1); - } - - #[test] - fn destructors() { - static COUNT: AtomicUsize = AtomicUsize::new(0); - - struct X; - - impl X { - fn new() -> X { - COUNT.fetch_add(1, Ordering::Relaxed); - X - } - } - - impl Drop for X { - fn drop(&mut self) { - COUNT.fetch_sub(1, Ordering::Relaxed); - } - } - - static mut MEMORY: [u8; 31] = [0; 31]; - - static POOL: Pool = Pool::new(); - - POOL.grow(unsafe { &mut MEMORY }); - - let x = POOL.alloc().unwrap().init(X::new()); - let y = POOL.alloc().unwrap().init(X::new()); - let z = POOL.alloc().unwrap().init(X::new()); - - assert_eq!(COUNT.load(Ordering::Relaxed), 3); - - // this leaks memory - drop(x); - - assert_eq!(COUNT.load(Ordering::Relaxed), 3); - - // this leaks memory - mem::forget(y); - - assert_eq!(COUNT.load(Ordering::Relaxed), 3); - - // this runs `X` destructor - POOL.free(z); - - assert_eq!(COUNT.load(Ordering::Relaxed), 2); - } -} diff --git a/src/pool/object.rs b/src/pool/object.rs new file mode 100644 index 00000000..c1fd518e --- /dev/null +++ b/src/pool/object.rs @@ -0,0 +1,393 @@ +//! Object pool API +//! +//! # Example usage +//! +//! ``` +//! use heapless::{object_pool, pool::object::{Object, ObjectBlock}}; +//! +//! object_pool!(P: [u8; 128]); +//! +//! // cannot request objects without first giving object blocks to the pool +//! assert!(P.request().is_none()); +//! +//! // (some `no_std` runtimes have safe APIs to create `&'static mut` references) +//! let block: &'static mut ObjectBlock<[u8; 128]> = unsafe { +//! // unlike the memory pool APIs, an initial value must be specified here +//! static mut B: ObjectBlock<[u8; 128]>= ObjectBlock::new([0; 128]); +//! &mut B +//! }; +//! +//! // give object block to the pool +//! P.manage(block); +//! +//! // it's now possible to request objects +//! // unlike the memory pool APIs, no initial value is required here +//! let mut object = P.request().unwrap(); +//! +//! // mutation is possible +//! object.iter_mut().for_each(|byte| *byte = byte.wrapping_add(1)); +//! +//! // the number of live objects is limited to the number of blocks managed by the pool +//! let res = P.request(); +//! assert!(res.is_none()); +//! +//! // `object`'s destructor returns the object to the pool +//! drop(object); +//! +//! // it's possible to request an `Object` again +//! let res = P.request(); +//! +//! assert!(res.is_some()); +//! ``` + +use core::{ + cmp::Ordering, + fmt, + hash::{Hash, Hasher}, + mem::ManuallyDrop, + ops, ptr, +}; + +use super::treiber::{AtomicPtr, NonNullPtr, Stack, StructNode}; + +/// Creates a new `ObjectPool` singleton with the given `$name` that manages the specified +/// `$data_type` +/// +/// For more extensive documentation see the [module level documentation](pool/object/index.html) +#[macro_export] +macro_rules! object_pool { + ($name:ident: $data_type:ty) => { + pub struct $name; + + impl $crate::pool::object::ObjectPool for $name { + type Data = $data_type; + + fn singleton() -> &'static $crate::pool::object::ObjectPoolImpl<$data_type> { + static $name: $crate::pool::object::ObjectPoolImpl<$data_type> = + $crate::pool::object::ObjectPoolImpl::new(); + + &$name + } + } + + impl $name { + /// Inherent method version of `ObjectPool::request` + #[allow(dead_code)] + pub fn request(&self) -> Option<$crate::pool::object::Object<$name>> { + <$name as $crate::pool::object::ObjectPool>::request() + } + + /// Inherent method version of `ObjectPool::manage` + #[allow(dead_code)] + pub fn manage( + &self, + block: &'static mut $crate::pool::object::ObjectBlock<$data_type>, + ) { + <$name as $crate::pool::object::ObjectPool>::manage(block) + } + } + }; +} + +/// A singleton that manages `pool::object::Object`s +pub trait ObjectPool: Sized { + /// The data type of the objects managed by the object pool + type Data: 'static; + + /// `object_pool!` implementation detail + #[doc(hidden)] + fn singleton() -> &'static ObjectPoolImpl; + + /// Request a new object from the pool + fn request() -> Option> { + Self::singleton() + .request() + .map(|node_ptr| Object { node_ptr }) + } + + /// Adds a statically allocate object to the pool + fn manage(block: &'static mut ObjectBlock) { + Self::singleton().manage(block) + } +} + +/// `object_pool!` implementation detail +#[doc(hidden)] +pub struct ObjectPoolImpl { + stack: Stack>, +} + +impl ObjectPoolImpl { + /// `object_pool!` implementation detail + #[doc(hidden)] + pub const fn new() -> Self { + Self { + stack: Stack::new(), + } + } + + fn request(&self) -> Option>> { + self.stack.try_pop() + } + + fn manage(&self, block: &'static mut ObjectBlock) { + let node: &'static mut _ = &mut block.node; + + unsafe { self.stack.push(NonNullPtr::from_static_mut_ref(node)) } + } +} + +// `T needs` to be Send because returning an object from a thread and then +// requesting it from another is effectively a cross-thread 'send' operation +unsafe impl Sync for ObjectPoolImpl where T: Send {} + +/// An object managed by object pool `P` +pub struct Object

+where + P: ObjectPool, +{ + node_ptr: NonNullPtr>, +} + +impl AsMut<[T]> for Object +where + A: ObjectPool, +{ + fn as_mut(&mut self) -> &mut [T] { + &mut **self + } +} + +impl AsRef<[T]> for Object +where + A: ObjectPool, +{ + fn as_ref(&self) -> &[T] { + &**self + } +} + +impl fmt::Debug for Object +where + A: ObjectPool, + A::Data: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + A::Data::fmt(self, f) + } +} + +impl ops::Deref for Object +where + A: ObjectPool, +{ + type Target = A::Data; + + fn deref(&self) -> &Self::Target { + unsafe { &*ptr::addr_of!((*self.node_ptr.as_ptr()).data) } + } +} + +impl ops::DerefMut for Object +where + A: ObjectPool, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *ptr::addr_of_mut!((*self.node_ptr.as_ptr()).data) } + } +} + +impl fmt::Display for Object +where + A: ObjectPool, + A::Data: fmt::Display, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + A::Data::fmt(self, f) + } +} + +impl

Drop for Object

+where + P: ObjectPool, +{ + fn drop(&mut self) { + unsafe { P::singleton().stack.push(self.node_ptr) } + } +} + +impl Eq for Object +where + A: ObjectPool, + A::Data: Eq, +{ +} + +impl Hash for Object +where + A: ObjectPool, + A::Data: Hash, +{ + fn hash(&self, state: &mut H) + where + H: Hasher, + { + (**self).hash(state) + } +} + +impl Ord for Object +where + A: ObjectPool, + A::Data: Ord, +{ + fn cmp(&self, other: &Self) -> Ordering { + A::Data::cmp(self, other) + } +} + +impl PartialEq> for Object +where + A: ObjectPool, + B: ObjectPool, + A::Data: PartialEq, +{ + fn eq(&self, other: &Object) -> bool { + A::Data::eq(self, other) + } +} + +impl PartialOrd> for Object +where + A: ObjectPool, + B: ObjectPool, + A::Data: PartialOrd, +{ + fn partial_cmp(&self, other: &Object) -> Option { + A::Data::partial_cmp(self, other) + } +} + +unsafe impl

Send for Object

+where + P: ObjectPool, + P::Data: Send, +{ +} + +unsafe impl

Sync for Object

+where + P: ObjectPool, + P::Data: Sync, +{ +} + +/// An object "block" of data type `T` that has not yet been associated to an `ObjectPool` +pub struct ObjectBlock { + node: StructNode, +} + +impl ObjectBlock { + /// Creates a new object block with the given `initial_value` + pub const fn new(initial_value: T) -> Self { + Self { + node: StructNode { + next: ManuallyDrop::new(AtomicPtr::null()), + data: ManuallyDrop::new(initial_value), + }, + } + } +} + +#[cfg(test)] +mod tests { + use core::sync::atomic::{self, AtomicUsize}; + + use super::*; + + #[test] + fn cannot_request_if_empty() { + object_pool!(P: i32); + + assert_eq!(None, P.request()); + } + + #[test] + fn can_request_if_manages_one_block() { + object_pool!(P: i32); + + let block = unsafe { + static mut B: ObjectBlock = ObjectBlock::new(1); + &mut B + }; + P.manage(block); + + assert_eq!(1, *P.request().unwrap()); + } + + #[test] + fn request_drop_request() { + object_pool!(P: i32); + + let block = unsafe { + static mut B: ObjectBlock = ObjectBlock::new(1); + &mut B + }; + P.manage(block); + + let mut object = P.request().unwrap(); + + *object = 2; + drop(object); + + assert_eq!(2, *P.request().unwrap()); + } + + #[test] + fn destructor_does_not_run_on_drop() { + static COUNT: AtomicUsize = AtomicUsize::new(0); + + pub struct S; + + impl Drop for S { + fn drop(&mut self) { + COUNT.fetch_add(1, atomic::Ordering::Relaxed); + } + } + + object_pool!(P: S); + + let block = unsafe { + static mut B: ObjectBlock = ObjectBlock::new(S); + &mut B + }; + P.manage(block); + + let object = P.request().unwrap(); + + assert_eq!(0, COUNT.load(atomic::Ordering::Relaxed)); + + drop(object); + + assert_eq!(0, COUNT.load(atomic::Ordering::Relaxed)); + } + + #[test] + fn zst_is_well_aligned() { + #[repr(align(4096))] + pub struct Zst4096; + + object_pool!(P: Zst4096); + + let block = unsafe { + static mut B: ObjectBlock = ObjectBlock::new(Zst4096); + &mut B + }; + P.manage(block); + + let object = P.request().unwrap(); + + let raw = &*object as *const Zst4096; + assert_eq!(0, raw as usize % 4096); + } +} diff --git a/src/pool/singleton.rs b/src/pool/singleton.rs deleted file mode 100644 index 0fbb73e4..00000000 --- a/src/pool/singleton.rs +++ /dev/null @@ -1,437 +0,0 @@ -//! `Pool` as a global singleton - -use core::{ - any::TypeId, - cmp, fmt, - hash::{Hash, Hasher}, - marker::PhantomData, - mem::{self, MaybeUninit}, - ops::{Deref, DerefMut}, - ptr::{self, NonNull}, -}; - -use super::{Init, Node, Uninit}; - -pub mod arc; - -/// Instantiates a pool as a global singleton -// NOTE(any(test)) makes testing easier (no need to enable Cargo features for testing) -#[cfg(any( - armv6m, - armv7a, - armv7r, - armv7m, - armv8m_main, - all( - any(target_arch = "x86_64", target_arch = "x86"), - feature = "x86-sync-pool" - ), - test -))] -#[macro_export] -macro_rules! pool { - ($(#[$($attr:tt)*])* $ident:ident: $ty:ty) => { - pub struct $ident; - - impl $crate::pool::singleton::Pool for $ident { - type Data = $ty; - - fn ptr() -> &'static $crate::pool::Pool<$ty> { - $(#[$($attr)*])* - static $ident: $crate::pool::Pool<$ty> = $crate::pool::Pool::new(); - - &$ident - } - } - }; -} - -/// A global singleton memory pool -pub trait Pool { - /// The type of data that can be allocated on this pool - type Data: 'static; - - #[doc(hidden)] - fn ptr() -> &'static super::Pool; - - /// Claims a memory block from the pool - /// - /// Returns `None` when the pool is observed as exhausted - /// - /// *NOTE:* This method does *not* have bounded execution time; i.e. it contains a CAS loop - fn alloc() -> Option> - where - Self: Sized, - { - Self::ptr().alloc().map(|inner| Box { - _pool: PhantomData, - inner, - }) - } - - /// Increases the capacity of the pool - /// - /// This method might *not* fully utilize the given memory block due to alignment requirements - /// - /// This method returns the number of *new* blocks that can be allocated. - fn grow(memory: &'static mut [u8]) -> usize { - Self::ptr().grow(memory) - } - - /// Increases the capacity of the pool - /// - /// Unlike [`Pool.grow`](trait.Pool.html#method.grow_exact) this method fully utilizes the given - /// memory block - fn grow_exact(memory: &'static mut MaybeUninit) -> usize - where - A: AsMut<[Node]>, - { - Self::ptr().grow_exact(memory) - } -} - -/// A memory block that belongs to the global memory pool, `POOL` -pub struct Box -where - POOL: Pool, - STATE: 'static, -{ - _pool: PhantomData, - inner: super::Box, -} - -impl

Box -where - P: Pool, -{ - /// Initializes this memory block - pub fn init(self, val: P::Data) -> Box { - let node = self.inner.node; - - mem::forget(self); - - if mem::size_of::() == 0 { - // no memory operation needed for ZST - // BUT we want to avoid calling `val`s destructor - mem::forget(val) - } else { - unsafe { - ptr::write(node.as_ref().data.get(), val); - } - } - - Box { - inner: super::Box { - node, - _state: PhantomData, - }, - _pool: PhantomData, - } - } -} - -impl

Box -where - P: Pool, - P::Data: AsRef<[u8]>, -{ - #[deprecated( - since = "0.7.3", - note = "This can access uninitialized memory, use `init(..)` instead (https://github.com/japaric/heapless/issues/212)" - )] - /// (DO NOT USE, SEE DEPRECATION) Freezes the contents of this memory block - /// - /// See [rust-lang/rust#58363](https://github.com/rust-lang/rust/pull/58363) for details. - pub fn freeze(self) -> Box { - let node = self.inner.node; - - mem::forget(self); - - // it seems we can get away with not calling `ptr::freeze` here and not run into UB - // because we are dealing with static memory and using fences - // let p: *const u8 = (*node.as_ref().data.get()).as_slice().as_ptr(); - // ptr::freeze(p as *mut u8); - - Box { - inner: super::Box { - node, - _state: PhantomData, - }, - _pool: PhantomData, - } - } -} - -impl

Box -where - P: Pool, -{ - /// Forgets the contents of this memory block without running its destructor. - /// - /// Note that this this does not return the memory block to the pool. The - /// block can be reused, or returned to the pool by dropping it. - pub fn forget(self) -> Box { - let node = self.inner.node; - - mem::forget(self); - if mem::size_of::() == 0 { - // no need to do a pointer dereference in this case - } else { - mem::forget(unsafe { ptr::read(node.as_ref().data.get()) }); - } - - Box { - inner: super::Box { - node, - _state: PhantomData, - }, - _pool: PhantomData, - } - } -} - -impl

Deref for Box

-where - P: Pool, -{ - type Target = P::Data; - - fn deref(&self) -> &P::Data { - self.inner.deref() - } -} - -impl

DerefMut for Box

-where - P: Pool, -{ - fn deref_mut(&mut self) -> &mut P::Data { - self.inner.deref_mut() - } -} - -unsafe impl stable_deref_trait::StableDeref for Box

{} - -impl

fmt::Debug for Box

-where - P: Pool, - P::Data: fmt::Debug, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - ::fmt(self, f) - } -} - -impl

fmt::Display for Box

-where - P: Pool, - P::Data: fmt::Display, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - ::fmt(self, f) - } -} - -impl Drop for Box -where - P: Pool, - S: 'static, -{ - fn drop(&mut self) { - if TypeId::of::() == TypeId::of::() { - let p = if mem::size_of::() == 0 { - // any pointer will do to invoke the destructor of a ZST - NonNull::dangling().as_ptr() - } else { - unsafe { self.inner.node.as_ref().data.get() } - }; - unsafe { - ptr::drop_in_place(p); - } - } - - if mem::size_of::() != 0 { - P::ptr().stack.push(self.inner.node) - } - } -} - -unsafe impl Send for Box -where - P: Pool, - P::Data: Send, -{ -} - -unsafe impl Sync for Box -where - P: Pool, - P::Data: Sync, -{ -} - -impl AsRef<[T]> for Box

-where - P: Pool, - P::Data: AsRef<[T]>, -{ - fn as_ref(&self) -> &[T] { - self.deref().as_ref() - } -} - -impl AsMut<[T]> for Box

-where - P: Pool, - P::Data: AsMut<[T]>, -{ - fn as_mut(&mut self) -> &mut [T] { - self.deref_mut().as_mut() - } -} - -impl

PartialEq for Box

-where - P: Pool, - P::Data: PartialEq, -{ - fn eq(&self, rhs: &Box

) -> bool { - ::eq(self, rhs) - } -} - -impl

Eq for Box

-where - P: Pool, - P::Data: Eq, -{ -} - -impl

PartialOrd for Box

-where - P: Pool, - P::Data: PartialOrd, -{ - fn partial_cmp(&self, rhs: &Box

) -> Option { - ::partial_cmp(self, rhs) - } -} - -impl

Ord for Box

-where - P: Pool, - P::Data: Ord, -{ - fn cmp(&self, rhs: &Box

) -> cmp::Ordering { - ::cmp(self, rhs) - } -} - -impl

Hash for Box

-where - P: Pool, - P::Data: Hash, -{ - fn hash(&self, state: &mut H) - where - H: Hasher, - { - ::hash(self, state) - } -} - -#[cfg(test)] -mod tests { - use core::{ - mem, - sync::atomic::{AtomicUsize, Ordering}, - }; - - use super::{super::Node, Pool}; - - #[test] - fn sanity() { - const SZ: usize = 2 * mem::size_of::>() - 1; - static mut MEMORY: [u8; SZ] = [0; SZ]; - - pool!(A: u8); - - // empty pool - assert!(A::alloc().is_none()); - - A::grow(unsafe { &mut MEMORY }); - - let x = A::alloc().unwrap().init(0); - assert_eq!(*x, 0); - - // pool exhausted - assert!(A::alloc().is_none()); - - drop(x); - - // should be possible to allocate again - assert_eq!(*A::alloc().unwrap().init(1), 1); - } - - #[test] - fn boxed_zst_is_well_aligned() { - #[repr(align(2))] - pub struct Zst2; - - pool!(A: Zst2); - - let x = A::alloc().unwrap().init(Zst2); - assert_eq!(0, &*x as *const Zst2 as usize % 2); - - #[repr(align(4096))] - pub struct Zst4096; - - pool!(B: Zst4096); - - let x = B::alloc().unwrap().init(Zst4096); - assert_eq!(0, &*x as *const Zst4096 as usize % 4096); - } - - #[test] - fn destructors() { - static COUNT: AtomicUsize = AtomicUsize::new(0); - - pub struct X; - - impl X { - fn new() -> X { - COUNT.fetch_add(1, Ordering::Relaxed); - X - } - } - - impl Drop for X { - fn drop(&mut self) { - COUNT.fetch_sub(1, Ordering::Relaxed); - } - } - - pool!(A: X); - - let x = A::alloc().unwrap().init(X::new()); - let y = A::alloc().unwrap().init(X::new()); - let z = A::alloc().unwrap().init(X::new()); - - assert_eq!(COUNT.load(Ordering::Relaxed), 3); - - // this runs `X`'s destructor - drop(x); - - assert_eq!(COUNT.load(Ordering::Relaxed), 2); - - // this leaks memory - mem::forget(y); - - assert_eq!(COUNT.load(Ordering::Relaxed), 2); - - // this forgets `X` without leaking memory - z.forget(); - - assert_eq!(COUNT.load(Ordering::Relaxed), 2); - } -} diff --git a/src/pool/singleton/arc.rs b/src/pool/singleton/arc.rs deleted file mode 100644 index a83519d8..00000000 --- a/src/pool/singleton/arc.rs +++ /dev/null @@ -1,392 +0,0 @@ -//! Like [`std::sync::Arc`](https://doc.rust-lang.org/std/sync/struct.Arc.html) but backed by a -//! memory [`Pool`](trait.Pool.html) rather than `#[global_allocator]` -//! -//! Note that the same limitations that apply to ["Box" pool] also apply to the "Arc" pool. -//! -//! ["Box" pool]: ../../index.html -//! -//! # Examples -//! -//! ``` ignore -//! use heapless::{arc_pool, Arc}; -//! -//! pub struct BigStruct { // <- does NOT implement Clone -//! data: [u8; 128], -//! // .. -//! } -//! -//! // declare a memory pool -//! arc_pool!(P: BigStruct); -//! -//! -//! #[cortex_m_rt::entry] -//! fn main() -> ! { -//! static mut MEMORY: [u8; 1024] = [0; 1024]; -//! -//! // give some static memory to the pool -//! P::grow(MEMORY); -//! -//! let x: Arc

= P::alloc(BigStruct::new()).ok().expect("OOM"); -//! // ^ NOTE: this is the Pool type, not the data type -//! -//! // cloning is cheap; it increases the refcount -//! let y = x.clone(); -//! -//! // same data address -//! assert_eq!(&*x as *const _, &*y as *const _); -//! -//! // auto-deref -//! let data: &[u8] = &x.data; -//! -//! // decrease refcount -//! drop(x); -//! -//! // refcount decreased to 0; memory is returned to the pool -//! drop(y); -//! -//! // .. -//! } -//! ``` -//! -//! The `grow_exact` API is also available on the "Arc pool". It requires using -//! `Node>` as the array element type. Example below: -//! -//! ``` ignore -//! use heapless::pool::{singleton::arc::ArcInner, Node}; -//! -//! pub struct BigStruct { /* .. */ } -//! -//! arc_pool!(P: BigStruct); -//! -//! #[cortex_m_rt::entry] -//! fn main() -> ! { -//! static mut MEMORY: MaybeUninit<[Node>; 2]> = MaybeUninit::uninit(); -//! -//! P::grow_exact(MEMORY); -//! -//! // 2 allocations are guaranteed to work -//! let x = P::alloc(BigStruct::new()).ok().expect("OOM"); -//! let y = P::alloc(BigStruct::new()).ok().expect("OOM"); -//! -//! // .. -//! } -//! ``` - -use core::{ - cmp, fmt, - hash::{Hash, Hasher}, - marker::PhantomData, - ops::Deref, - ptr, - sync::atomic, -}; - -#[cfg(cas_atomic_polyfill)] -use atomic_polyfill::{AtomicUsize, Ordering}; - -#[cfg(not(cas_atomic_polyfill))] -use core::sync::atomic::{AtomicUsize, Ordering}; - -use crate::pool::{self, stack::Ptr, Node}; - -/// Instantiates a pool of Arc pointers as a global singleton -// NOTE(any(test)) makes testing easier (no need to enable Cargo features for testing) -#[cfg(any( - armv6m, - armv7a, - armv7r, - armv7m, - armv8m_main, - all( - any(target_arch = "x86_64", target_arch = "x86"), - feature = "x86-sync-pool" - ), - test -))] -#[macro_export] -macro_rules! arc_pool { - ($(#[$($attr:tt)*])* $ident:ident: $ty:ty) => { - pub struct $ident; - - impl $crate::pool::singleton::arc::Pool for $ident { - type Data = $ty; - - fn ptr() -> &'static $crate::pool::Pool<$crate::pool::singleton::arc::ArcInner<$ty>> { - $(#[$($attr)*])* - static POOL: $crate::pool::Pool<$crate::pool::singleton::arc::ArcInner<$ty>> = - $crate::pool::Pool::new(); - - &POOL - } - } - - impl $ident { - /// Allocates a new `Arc` and writes `data` to it - /// - /// Returns an `Err`or if the backing memory pool is empty - pub fn alloc(data: $ty) -> Result<$crate::Arc, $ty> - where - Self: Sized, - { - $crate::Arc::new(data) - } - - /// Increases the capacity of the pool - /// - /// This method might *not* fully utilize the given memory block due to alignment requirements - /// - /// This method returns the number of *new* blocks that can be allocated. - pub fn grow(memory: &'static mut [u8]) -> usize { - ::ptr().grow(memory) - } - - /// Increases the capacity of the pool - /// - /// Unlike `grow`, this method fully utilizes the given memory block - pub fn grow_exact(memory: &'static mut MaybeUninit) -> usize - where - A: AsMut<[$crate::pool::Node<$crate::pool::singleton::arc::ArcInner<$ty>>]>, - { - ::ptr().grow_exact(memory) - } - } - }; -} - -/// Pool of Arc pointers -pub trait Pool { - /// The data behind the Arc pointer - type Data: 'static; - - #[doc(hidden)] - fn ptr() -> &'static pool::Pool>; -} - -// mostly a verbatim copy of liballoc(/src/sync.rs) as of v1.54.0 minus the `Weak` API -// anything that diverges has been marked with `XXX` - -/// `std::sync::Arc` but backed by a memory [`Pool`] rather than `#[global_allocator]` -/// -/// [`Pool`]: trait.Pool.html -/// -/// An example and more details can be found in the [module level documentation](index.html). -// XXX `Pool::Data` is not `?Sized` -- `Unsize` coercions cannot be implemented on stable -pub struct Arc

-where - P: Pool, -{ - phantom: PhantomData>, - ptr: Ptr>>, - pool: PhantomData

, -} - -impl

Arc

-where - P: Pool, -{ - /// Constructs a new `Arc` - /// - /// Returns an `Err`or if the backing memory pool is empty - // XXX original API is "infallible" - pub fn new(data: P::Data) -> Result { - if let Some(node) = P::ptr().stack.try_pop() { - unsafe { - ptr::write( - node.as_ref().data.get(), - ArcInner { - strong: AtomicUsize::new(1), - data, - }, - ) - } - - Ok(Self { - phantom: PhantomData, - pool: PhantomData, - ptr: node, - }) - } else { - Err(data) - } - } - - fn inner(&self) -> &ArcInner { - unsafe { &*self.ptr.as_ref().data.get() } - } - - fn from_inner(ptr: Ptr>>) -> Self { - Self { - phantom: PhantomData, - pool: PhantomData, - ptr, - } - } - - unsafe fn get_mut_unchecked(this: &mut Self) -> &mut P::Data { - &mut (*this.ptr.as_ref().data.get()).data - // &mut (*this.ptr.as_ptr()).data - } - - #[inline(never)] - unsafe fn drop_slow(&mut self) { - // run `P::Data`'s destructor - ptr::drop_in_place(Self::get_mut_unchecked(self)); - - // XXX memory pool instead of `#[global_allocator]` - // return memory to pool - P::ptr().stack.push(self.ptr); - } -} - -const MAX_REFCOUNT: usize = (isize::MAX) as usize; - -impl

AsRef for Arc

-where - P: Pool, -{ - fn as_ref(&self) -> &P::Data { - &**self - } -} - -// XXX no `Borrow` implementation due to 'conflicting implementations of trait' error - -impl

Clone for Arc

-where - P: Pool, -{ - fn clone(&self) -> Self { - let old_size = self.inner().strong.fetch_add(1, Ordering::Relaxed); - - if old_size > MAX_REFCOUNT { - // XXX original code calls `intrinsics::abort` which is unstable API - panic!(); - } - - Self::from_inner(self.ptr) - } -} - -impl

fmt::Debug for Arc

-where - P: Pool, - P::Data: fmt::Debug, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&**self, f) - } -} - -impl

Deref for Arc

-where - P: Pool, -{ - type Target = P::Data; - - fn deref(&self) -> &P::Data { - &self.inner().data - } -} - -impl

fmt::Display for Arc

-where - P: Pool, - P::Data: fmt::Display, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&**self, f) - } -} - -// XXX original uses `#[may_dangle]` which is an unstable language feature -impl

Drop for Arc

-where - P: Pool, -{ - fn drop(&mut self) { - if self.inner().strong.fetch_sub(1, Ordering::Release) != 1 { - return; - } - - atomic::fence(Ordering::Acquire); - - unsafe { - self.drop_slow(); - } - } -} - -impl

Eq for Arc

-where - P: Pool, - P::Data: Eq, -{ -} - -impl

Hash for Arc

-where - P: Pool, - P::Data: Hash, -{ - fn hash(&self, state: &mut H) - where - H: Hasher, - { - (**self).hash(state) - } -} - -impl

Ord for Arc

-where - P: Pool, - P::Data: Ord, -{ - fn cmp(&self, other: &Self) -> cmp::Ordering { - (**self).cmp(&**other) - } -} - -impl

PartialEq for Arc

-where - P: Pool, - P::Data: PartialEq, -{ - fn eq(&self, other: &Self) -> bool { - // XXX missing pointer equality specialization, which uses an unstable language feature - (**self).eq(&**other) - } -} - -impl

PartialOrd for Arc

-where - P: Pool, - P::Data: PartialOrd, -{ - fn partial_cmp(&self, other: &Self) -> Option { - (**self).partial_cmp(&**other) - } -} - -unsafe impl

Send for Arc

-where - P: Pool, - P::Data: Sync + Send, -{ -} - -unsafe impl

Sync for Arc

-where - P: Pool, - P::Data: Sync + Send, -{ -} - -impl

Unpin for Arc

where P: Pool {} - -/// Unfortunate implementation detail required to use the `grow_exact` API -pub struct ArcInner { - data: T, - strong: AtomicUsize, - // XXX `Weak` API not implemented - // weak: AtomicUsize, -} diff --git a/src/pool/treiber.rs b/src/pool/treiber.rs new file mode 100644 index 00000000..9182edad --- /dev/null +++ b/src/pool/treiber.rs @@ -0,0 +1,91 @@ +use core::mem::ManuallyDrop; + +#[cfg_attr(target_arch = "x86", path = "treiber/cas.rs")] +#[cfg_attr(arm_llsc, path = "treiber/llsc.rs")] +mod impl_; + +pub use impl_::{AtomicPtr, NonNullPtr}; + +pub struct Stack +where + N: Node, +{ + top: AtomicPtr, +} + +impl Stack +where + N: Node, +{ + pub const fn new() -> Self { + Self { + top: AtomicPtr::null(), + } + } + + /// # Safety + /// - `node` must be a valid pointer + /// - aliasing rules must be enforced by the caller. e.g, the same `node` may not be pushed more than once + pub unsafe fn push(&self, node: NonNullPtr) { + impl_::push(self, node) + } + + pub fn try_pop(&self) -> Option> { + impl_::try_pop(self) + } +} + +pub trait Node: Sized { + type Data; + + fn next(&self) -> &AtomicPtr; + fn next_mut(&mut self) -> &mut AtomicPtr; +} + +pub union UnionNode { + next: ManuallyDrop>>, + pub data: ManuallyDrop, +} + +impl Node for UnionNode { + type Data = T; + + fn next(&self) -> &AtomicPtr { + unsafe { &self.next } + } + + fn next_mut(&mut self) -> &mut AtomicPtr { + unsafe { &mut self.next } + } +} + +pub struct StructNode { + pub next: ManuallyDrop>>, + pub data: ManuallyDrop, +} + +impl Node for StructNode { + type Data = T; + + fn next(&self) -> &AtomicPtr { + &self.next + } + + fn next_mut(&mut self) -> &mut AtomicPtr { + &mut self.next + } +} + +#[cfg(test)] +mod tests { + use core::mem; + + use super::*; + + #[test] + fn node_is_never_zero_sized() { + struct Zst; + + assert_ne!(mem::size_of::>(), 0); + } +} diff --git a/src/pool/treiber/cas.rs b/src/pool/treiber/cas.rs new file mode 100644 index 00000000..62efdf8b --- /dev/null +++ b/src/pool/treiber/cas.rs @@ -0,0 +1,196 @@ +use core::{ + marker::PhantomData, + num::{NonZeroU32, NonZeroU64}, + ptr::NonNull, + sync::atomic::{AtomicU64, Ordering}, +}; + +use super::{Node, Stack}; + +pub struct AtomicPtr +where + N: Node, +{ + inner: AtomicU64, + _marker: PhantomData<*mut N>, +} + +impl AtomicPtr +where + N: Node, +{ + pub const fn null() -> Self { + Self { + inner: AtomicU64::new(0), + _marker: PhantomData, + } + } + + fn compare_and_exchange_weak( + &self, + current: Option>, + new: Option>, + success: Ordering, + failure: Ordering, + ) -> Result<(), Option>> { + self.inner + .compare_exchange_weak( + current + .map(|pointer| pointer.into_u64()) + .unwrap_or_default(), + new.map(|pointer| pointer.into_u64()).unwrap_or_default(), + success, + failure, + ) + .map(drop) + .map_err(NonNullPtr::from_u64) + } + + fn load(&self, order: Ordering) -> Option> { + NonZeroU64::new(self.inner.load(order)).map(|inner| NonNullPtr { + inner, + _marker: PhantomData, + }) + } + + fn store(&self, value: Option>, order: Ordering) { + self.inner.store( + value.map(|pointer| pointer.into_u64()).unwrap_or_default(), + order, + ) + } +} + +pub struct NonNullPtr +where + N: Node, +{ + inner: NonZeroU64, + _marker: PhantomData<*mut N>, +} + +impl Clone for NonNullPtr +where + N: Node, +{ + fn clone(&self) -> Self { + *self + } +} + +impl Copy for NonNullPtr where N: Node {} + +impl NonNullPtr +where + N: Node, +{ + pub fn as_ptr(&self) -> *mut N { + self.inner.get() as *mut N + } + + pub fn from_static_mut_ref(ref_: &'static mut N) -> NonNullPtr { + let non_null = NonNull::from(ref_); + Self::from_non_null(non_null) + } + + fn from_non_null(ptr: NonNull) -> Self { + let address = ptr.as_ptr() as u32; + let tag = initial_tag().get(); + + let value = (u64::from(tag) << 32) | u64::from(address); + + Self { + inner: unsafe { NonZeroU64::new_unchecked(value) }, + _marker: PhantomData, + } + } + + fn from_u64(value: u64) -> Option { + NonZeroU64::new(value).map(|inner| Self { + inner, + _marker: PhantomData, + }) + } + + fn non_null(&self) -> NonNull { + unsafe { NonNull::new_unchecked(self.inner.get() as *mut N) } + } + + fn tag(&self) -> NonZeroU32 { + unsafe { NonZeroU32::new_unchecked((self.inner.get() >> 32) as u32) } + } + + fn into_u64(self) -> u64 { + self.inner.get() + } + + fn increase_tag(&mut self) { + let address = self.as_ptr() as u32; + + let new_tag = self + .tag() + .get() + .checked_add(1) + .map(|val| unsafe { NonZeroU32::new_unchecked(val) }) + .unwrap_or_else(initial_tag) + .get(); + + let value = (u64::from(new_tag) << 32) | u64::from(address); + + self.inner = unsafe { NonZeroU64::new_unchecked(value) }; + } +} + +fn initial_tag() -> NonZeroU32 { + unsafe { NonZeroU32::new_unchecked(1) } +} + +pub unsafe fn push(stack: &Stack, new_top: NonNullPtr) +where + N: Node, +{ + let mut top = stack.top.load(Ordering::Relaxed); + + loop { + new_top + .non_null() + .as_ref() + .next() + .store(top, Ordering::Relaxed); + + if let Err(p) = stack.top.compare_and_exchange_weak( + top, + Some(new_top), + Ordering::Release, + Ordering::Relaxed, + ) { + top = p; + } else { + return; + } + } +} + +pub fn try_pop(stack: &Stack) -> Option> +where + N: Node, +{ + loop { + if let Some(mut top) = stack.top.load(Ordering::Acquire) { + let next = unsafe { top.non_null().as_ref().next().load(Ordering::Relaxed) }; + + if stack + .top + .compare_and_exchange_weak(Some(top), next, Ordering::Release, Ordering::Relaxed) + .is_ok() + { + top.increase_tag(); + + return Some(top); + } + } else { + // stack observed as empty + return None; + } + } +} diff --git a/src/pool/treiber/llsc.rs b/src/pool/treiber/llsc.rs new file mode 100644 index 00000000..59e9d8ac --- /dev/null +++ b/src/pool/treiber/llsc.rs @@ -0,0 +1,145 @@ +use core::{ + cell::UnsafeCell, + ptr::{self, NonNull}, +}; + +use super::{Node, Stack}; + +pub struct AtomicPtr +where + N: Node, +{ + inner: UnsafeCell>>, +} + +impl AtomicPtr +where + N: Node, +{ + pub const fn null() -> Self { + Self { + inner: UnsafeCell::new(None), + } + } +} + +pub struct NonNullPtr +where + N: Node, +{ + inner: NonNull, +} + +impl NonNullPtr +where + N: Node, +{ + pub fn as_ptr(&self) -> *mut N { + self.inner.as_ptr().cast() + } + + pub fn from_static_mut_ref(ref_: &'static mut N) -> Self { + Self { + inner: NonNull::from(ref_), + } + } +} + +impl Clone for NonNullPtr +where + N: Node, +{ + fn clone(&self) -> Self { + Self { inner: self.inner } + } +} + +impl Copy for NonNullPtr where N: Node {} + +pub unsafe fn push(stack: &Stack, mut node: NonNullPtr) +where + N: Node, +{ + let top_addr = ptr::addr_of!(stack.top) as *mut usize; + + loop { + let top = arch::load_link(top_addr); + + node.inner + .as_mut() + .next_mut() + .inner + .get() + .write(NonNull::new(top as *mut _)); + + if arch::store_conditional(node.inner.as_ptr() as usize, top_addr).is_ok() { + break; + } + } +} + +pub fn try_pop(stack: &Stack) -> Option> +where + N: Node, +{ + unsafe { + let top_addr = ptr::addr_of!(stack.top) as *mut usize; + + loop { + let top = arch::load_link(top_addr); + + if let Some(top) = NonNull::new(top as *mut N) { + let next = &top.as_ref().next(); + + if arch::store_conditional( + next.inner + .get() + .read() + .map(|non_null| non_null.as_ptr() as usize) + .unwrap_or_default(), + top_addr, + ) + .is_ok() + { + break Some(NonNullPtr { inner: top }); + } + } else { + arch::clear_load_link(); + + break None; + } + } + } +} + +#[cfg(arm_llsc)] +mod arch { + use core::arch::asm; + + #[inline(always)] + pub fn clear_load_link() { + unsafe { asm!("clrex", options(nomem, nostack)) } + } + + /// # Safety + /// - `addr` must be a valid pointer + #[inline(always)] + pub unsafe fn load_link(addr: *const usize) -> usize { + let value; + asm!("ldrex {}, [{}]", out(reg) value, in(reg) addr, options(nostack)); + value + } + + /// # Safety + /// - `addr` must be a valid pointer + #[inline(always)] + pub unsafe fn store_conditional(value: usize, addr: *mut usize) -> Result<(), ()> { + let outcome: usize; + asm!("strex {}, {}, [{}]", out(reg) outcome, in(reg) value, in(reg) addr, options(nostack)); + if outcome == 0 { + Ok(()) + } else { + Err(()) + } + } +}