intern/
intern_slice.rs

1//! Interning of slices, potentially with a header.
2//!
3//! See [`crate::intern`] for an explanation of interning modes. Note that slice interning is currently
4//! available only in GC mode (there is no other need).
5//!
6//! [`InternedSlice`] and [`InternedSliceRef`] are essentially [`Interned<(Header, Box<[SliceType]>)>`][crate::Interned]
7//! and [`InternedRef`][crate::InternedRef] with the same types, but more optimized. There is only one
8//! allocation and the pointer is thin.
9
10use std::{
11    ffi::c_void,
12    fmt::{self, Debug},
13    hash::{BuildHasher, Hash, Hasher},
14    marker::PhantomData,
15    mem::ManuallyDrop,
16    ops::Deref,
17    ptr::{self, NonNull},
18    sync::OnceLock,
19};
20
21use dashmap::{DashMap, SharedValue};
22use hashbrown::raw::RawTable;
23use rustc_hash::FxBuildHasher;
24use triomphe::{HeaderSlice, HeaderWithLength, ThinArc};
25
26type InternMap<T> = DashMap<
27    ThinArc<<T as SliceInternable>::Header, <T as SliceInternable>::SliceType>,
28    (),
29    FxBuildHasher,
30>;
31type Guard<T> = dashmap::RwLockWriteGuard<
32    'static,
33    RawTable<(
34        ThinArc<<T as SliceInternable>::Header, <T as SliceInternable>::SliceType>,
35        SharedValue<()>,
36    )>,
37>;
38type Pointee<T> = HeaderSlice<
39    HeaderWithLength<<T as SliceInternable>::Header>,
40    [<T as SliceInternable>::SliceType],
41>;
42
43pub struct InternedSlice<T: SliceInternable> {
44    arc: ThinArc<T::Header, T::SliceType>,
45}
46
47impl<T: SliceInternable> InternedSlice<T> {
48    #[inline]
49    pub fn from_header_and_slice<'a>(
50        header: T::Header,
51        slice: &[T::SliceType],
52    ) -> InternedSliceRef<'a, T> {
53        const { assert!(T::USE_GC) };
54
55        let storage = T::storage().get();
56        let (mut shard, hash) = Self::select(storage, &header, slice);
57        // Atomically,
58        // - check if `obj` is already in the map
59        //   - if so, clone its `Arc` and return it
60        //   - if not, box it up, insert it, and return a clone
61        // This needs to be atomic (locking the shard) to avoid races with other thread, which could
62        // insert the same object between us looking it up and inserting it.
63        let bucket = match shard.find_or_find_insert_slot(
64            hash,
65            |(other, _)| other.header.header == header && other.slice == *slice,
66            |(x, _)| storage.hasher().hash_one(x),
67        ) {
68            Ok(bucket) => bucket,
69            // SAFETY: The slot came from `find_or_find_insert_slot()`, and the table wasn't modified since then.
70            Err(insert_slot) => unsafe {
71                shard.insert_in_slot(
72                    hash,
73                    insert_slot,
74                    (ThinArc::from_header_and_slice(header, slice), SharedValue::new(())),
75                )
76            },
77        };
78        // SAFETY: We just retrieved/inserted this bucket.
79        // `NonNull::new_unchecked()` is safe because the pointer originates from a `ThinArc`.
80        unsafe {
81            InternedSliceRef {
82                // INVARIANT: We create it from a `ThinArc`.
83                ptr: NonNull::new_unchecked(ThinArc::as_ptr(&bucket.as_ref().0).cast_mut()),
84                _marker: PhantomData,
85            }
86        }
87    }
88
89    #[inline]
90    fn select(
91        storage: &'static InternMap<T>,
92        header: &T::Header,
93        slice: &[T::SliceType],
94    ) -> (Guard<T>, u64) {
95        let hash = Self::hash(storage, header, slice);
96        let shard_idx = storage.determine_shard(hash as usize);
97        let shard = &storage.shards()[shard_idx];
98        (shard.write(), hash)
99    }
100
101    #[inline]
102    fn hash(storage: &'static InternMap<T>, header: &T::Header, slice: &[T::SliceType]) -> u64 {
103        storage.hasher().hash_one(HeaderSlice {
104            header: HeaderWithLength { header, length: slice.len() },
105            slice,
106        })
107    }
108
109    #[inline(always)]
110    fn ptr(&self) -> *const c_void {
111        self.arc.as_ptr()
112    }
113
114    #[inline]
115    pub fn as_ref(&self) -> InternedSliceRef<'_, T> {
116        InternedSliceRef {
117            // SAFETY: `self.ptr` comes from a valid `ThinArc`, so non null.
118            // INVARIANT: We create it from a `ThinArc`.
119            ptr: unsafe { NonNull::new_unchecked(self.ptr().cast_mut()) },
120            _marker: PhantomData,
121        }
122    }
123}
124
125/// Compares interned `Ref`s using pointer equality.
126impl<T: SliceInternable> PartialEq for InternedSlice<T> {
127    // NOTE: No `?Sized` because `ptr_eq` doesn't work right with trait objects.
128
129    #[inline]
130    fn eq(&self, other: &Self) -> bool {
131        self.arc.as_ptr() == other.arc.as_ptr()
132    }
133}
134
135impl<T: SliceInternable> Eq for InternedSlice<T> {}
136
137impl<T: SliceInternable> Hash for InternedSlice<T> {
138    #[inline]
139    fn hash<H: Hasher>(&self, state: &mut H) {
140        state.write_usize(self.ptr().addr())
141    }
142}
143
144impl<T: SliceInternable> Deref for InternedSlice<T> {
145    type Target = Pointee<T>;
146
147    #[inline]
148    fn deref(&self) -> &Self::Target {
149        &self.arc
150    }
151}
152
153impl<T: SliceInternable> Clone for InternedSlice<T> {
154    #[inline]
155    fn clone(&self) -> Self {
156        Self { arc: self.arc.clone() }
157    }
158}
159
160impl<T> Debug for InternedSlice<T>
161where
162    T: SliceInternable,
163    T::SliceType: Debug,
164    T::Header: Debug,
165{
166    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
167        (*self.arc).fmt(f)
168    }
169}
170
171#[repr(transparent)]
172pub struct InternedSliceRef<'a, T> {
173    /// # Invariant
174    ///
175    /// There is no `ThinArcBorrow` unfortunately, so this is basically a `ManuallyDrop<ThinArc>`,
176    /// except that can't be `Copy`, so we store a raw pointer instead.
177    ptr: NonNull<c_void>,
178    _marker: PhantomData<&'a T>,
179}
180
181// SAFETY: This is essentially a `ThinArc`, implemented as a raw pointer because there is no `ThinArcBorrowed`.
182unsafe impl<T: Send + Sync> Send for InternedSliceRef<'_, T> {}
183unsafe impl<T: Send + Sync> Sync for InternedSliceRef<'_, T> {}
184
185impl<'a, T: SliceInternable> InternedSliceRef<'a, T> {
186    #[inline(always)]
187    fn arc(self) -> ManuallyDrop<ThinArc<T::Header, T::SliceType>> {
188        // SAFETY: `self.ptr`'s invariant.
189        unsafe { ManuallyDrop::new(ThinArc::from_raw(self.ptr.as_ptr())) }
190    }
191
192    #[inline]
193    pub fn to_owned(self) -> InternedSlice<T> {
194        InternedSlice { arc: (*self.arc()).clone() }
195    }
196
197    #[inline]
198    pub fn get(self) -> &'a Pointee<T> {
199        // SAFETY: This is a lifetime extension, valid because we live for `'a`.
200        unsafe { &*ptr::from_ref::<Pointee<T>>(&*self.arc()) }
201    }
202
203    /// # Safety
204    ///
205    /// You have to make sure the data is not referenced after the refcount reaches zero; beware the interning
206    /// map also keeps a reference to the value.
207    #[inline]
208    pub unsafe fn decrement_refcount(self) {
209        drop(ManuallyDrop::into_inner(self.arc()));
210    }
211
212    #[inline]
213    pub(crate) fn strong_count(self) -> usize {
214        ThinArc::strong_count(&self.arc())
215    }
216
217    #[inline]
218    pub(crate) fn as_raw(self) -> *const c_void {
219        self.arc().as_ptr()
220    }
221
222    /// **Available only on GC mode**.
223    ///
224    /// Changes the attached lifetime, as in GC mode, the lifetime is more kind of a lint to prevent misuse
225    /// than actual soundness check.
226    #[inline]
227    pub fn change_lifetime<'b>(self) -> InternedSliceRef<'b, T> {
228        const { assert!(T::USE_GC) };
229        // SAFETY: The lifetime on `InternedSliceRef` is essentially advisory only for GCed types.
230        unsafe { std::mem::transmute::<InternedSliceRef<'a, T>, InternedSliceRef<'b, T>>(self) }
231    }
232}
233
234impl<T> Clone for InternedSliceRef<'_, T> {
235    #[inline]
236    fn clone(&self) -> Self {
237        *self
238    }
239}
240
241impl<T> Copy for InternedSliceRef<'_, T> {}
242
243impl<T: SliceInternable> Hash for InternedSliceRef<'_, T> {
244    #[inline]
245    fn hash<H: Hasher>(&self, state: &mut H) {
246        state.write_usize(self.ptr.as_ptr().addr());
247    }
248}
249
250impl<T: SliceInternable> PartialEq for InternedSliceRef<'_, T> {
251    #[inline]
252    fn eq(&self, other: &Self) -> bool {
253        self.ptr == other.ptr
254    }
255}
256
257impl<T: SliceInternable> Eq for InternedSliceRef<'_, T> {}
258
259impl<T: SliceInternable> Deref for InternedSliceRef<'_, T> {
260    type Target = Pointee<T>;
261
262    #[inline]
263    fn deref(&self) -> &Self::Target {
264        self.get()
265    }
266}
267
268impl<T> Debug for InternedSliceRef<'_, T>
269where
270    T: SliceInternable,
271    T::SliceType: Debug,
272    T::Header: Debug,
273{
274    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
275        (**self).fmt(f)
276    }
277}
278
279pub struct InternSliceStorage<T: SliceInternable> {
280    map: OnceLock<InternMap<T>>,
281}
282
283#[allow(
284    clippy::new_without_default,
285    reason = "this a const fn, so it can't be default yet. See <https://github.com/rust-lang/rust/issues/63065>"
286)]
287impl<T: SliceInternable> InternSliceStorage<T> {
288    pub const fn new() -> Self {
289        Self { map: OnceLock::new() }
290    }
291}
292
293impl<T: SliceInternable> InternSliceStorage<T> {
294    pub(crate) fn get(&self) -> &InternMap<T> {
295        self.map.get_or_init(|| {
296            DashMap::with_capacity_and_hasher(
297                (64 * 1024) / std::mem::size_of::<T::SliceType>(),
298                Default::default(),
299            )
300        })
301    }
302}
303
304pub trait SliceInternable: Sized + 'static {
305    const USE_GC: bool;
306    type Header: Eq + Hash + Send + Sync;
307    type SliceType: Eq + Hash + Send + Sync + Copy + 'static;
308    fn storage() -> &'static InternSliceStorage<Self>;
309}
310
311/// Implements `SliceInternable` for a given list of types, making them usable with `InternedSlice`.
312#[macro_export]
313#[doc(hidden)]
314macro_rules! _impl_slice_internable {
315    ( gc; $tag:ident, $h:ty, $t:ty $(,)? ) => {
316        #[allow(unreachable_pub)]
317        pub struct $tag;
318        impl $crate::SliceInternable for $tag {
319            const USE_GC: bool = true;
320            type Header = $h;
321            type SliceType = $t;
322            fn storage() -> &'static $crate::InternSliceStorage<Self> {
323                static STORAGE: $crate::InternSliceStorage<$tag> =
324                    $crate::InternSliceStorage::new();
325                &STORAGE
326            }
327        }
328    };
329}
330pub use crate::_impl_slice_internable as impl_slice_internable;