diff --git a/crates/hir-ty/src/next_solver.rs b/crates/hir-ty/src/next_solver.rs index 539d09a8f5..e91864bd87 100644 --- a/crates/hir-ty/src/next_solver.rs +++ b/crates/hir-ty/src/next_solver.rs @@ -1,5 +1,9 @@ //! Things relevant to the next trait solver. +// Note: in interned types defined in this module, we generally treat the lifetime as advisory +// and transmute it as needed. This is because no real memory unsafety can be caused from an +// incorrect lifetime here. + pub mod abi; mod binder; mod consts; diff --git a/crates/hir-ty/src/next_solver/generic_arg.rs b/crates/hir-ty/src/next_solver/generic_arg.rs index f31b487eae..b600f6000d 100644 --- a/crates/hir-ty/src/next_solver/generic_arg.rs +++ b/crates/hir-ty/src/next_solver/generic_arg.rs @@ -1,4 +1,10 @@ -//! Things related to generic args in the next-trait-solver. +//! Things related to generic args in the next-trait-solver (`GenericArg`, `GenericArgs`, `Term`). +//! +//! Implementations of `GenericArg` and `Term` are pointer-tagged instead of an enum (rustc does +//! the same). This is done to save memory (which also helps speed) - one `GenericArg` is a machine +//! word instead of two, while matching on it is basically as cheap. The implementation for both +//! `GenericArg` and `Term` is shared in [`GenericArgImpl`]. This both simplifies the implementation, +//! as well as enables a noop conversion from `Term` to `GenericArg`. use std::{hint::unreachable_unchecked, marker::PhantomData, ptr::NonNull}; @@ -29,10 +35,14 @@ pub type TermKind<'db> = rustc_type_ir::TermKind>; #[derive(Clone, Copy, PartialEq, Eq, Hash)] struct GenericArgImpl<'db> { + /// # Invariant + /// + /// Contains an [`InternedRef`] of a [`Ty`], [`Const`] or [`Region`], bit-tagged as per the consts below. ptr: NonNull<()>, _marker: PhantomData<(Ty<'db>, Const<'db>, Region<'db>)>, } +// SAFETY: We essentially own the `Ty`, `Const` or `Region`, and they are `Send + Sync`. unsafe impl Send for GenericArgImpl<'_> {} unsafe impl Sync for GenericArgImpl<'_> {} @@ -46,6 +56,7 @@ impl<'db> GenericArgImpl<'db> { #[inline] fn new_ty(ty: Ty<'db>) -> Self { Self { + // SAFETY: We create it from an `InternedRef`, and it's never null. ptr: unsafe { NonNull::new_unchecked( ty.interned @@ -62,6 +73,7 @@ impl<'db> GenericArgImpl<'db> { #[inline] fn new_const(ty: Const<'db>) -> Self { Self { + // SAFETY: We create it from an `InternedRef`, and it's never null. ptr: unsafe { NonNull::new_unchecked( ty.interned @@ -78,6 +90,7 @@ impl<'db> GenericArgImpl<'db> { #[inline] fn new_region(ty: Region<'db>) -> Self { Self { + // SAFETY: We create it from an `InternedRef`, and it's never null. ptr: unsafe { NonNull::new_unchecked( ty.interned @@ -94,6 +107,7 @@ impl<'db> GenericArgImpl<'db> { #[inline] fn kind(self) -> GenericArgKind<'db> { let ptr = self.ptr.as_ptr().map_addr(|addr| addr & Self::PTR_MASK); + // SAFETY: We can only be created from a `Ty`, a `Const` or a `Region`, and the tag will match. unsafe { match self.ptr.addr().get() & Self::KIND_MASK { Self::TY_TAG => GenericArgKind::Type(Ty { @@ -113,6 +127,9 @@ impl<'db> GenericArgImpl<'db> { #[inline] fn term_kind(self) -> TermKind<'db> { let ptr = self.ptr.as_ptr().map_addr(|addr| addr & Self::PTR_MASK); + // SAFETY: We can only be created from a `Ty`, a `Const` or a `Region`, and the tag will match. + // It is the caller's responsibility (encapsulated within this module) to only call this with + // `Term`, which cannot be constructed from a `Region`. unsafe { match self.ptr.addr().get() & Self::KIND_MASK { Self::TY_TAG => { diff --git a/crates/hir-ty/src/next_solver/interner.rs b/crates/hir-ty/src/next_solver/interner.rs index f2dacd16db..6b97d110ee 100644 --- a/crates/hir-ty/src/next_solver/interner.rs +++ b/crates/hir-ty/src/next_solver/interner.rs @@ -230,8 +230,10 @@ macro_rules! impl_stored_interned_slice { } } + // SAFETY: It is safe to store this type in queries (but not `$name`). unsafe impl salsa::Update for $stored_name { unsafe fn maybe_update(old_pointer: *mut Self, new_value: Self) -> bool { + // SAFETY: Comparing by (pointer) equality is safe. unsafe { crate::utils::unsafe_update_eq(old_pointer, new_value) } } } @@ -294,6 +296,8 @@ macro_rules! impl_stored_interned { } pub(crate) use impl_stored_interned; +/// This is a visitor trait that treats any interned thing specifically. Visitables are expected to call +/// the trait's methods when encountering an interned. This is used to implement marking in GC. pub trait WorldExposer { fn on_interned( &mut self, @@ -2613,6 +2617,10 @@ pub unsafe fn collect_ty_garbage() { gc.add_slice_storage::(); gc.add_slice_storage::(); + // SAFETY: + // - By our precondition, there are no unrecorded types. + // - We implement `GcInternedVisit` and `GcInternedSliceVisit` correctly for all types. + // - We added all storages (FIXME: it's too easy to forget to add a new storage here). unsafe { gc.collect() }; } diff --git a/crates/intern/src/gc.rs b/crates/intern/src/gc.rs index e559d1dc57..0d500a9714 100644 --- a/crates/intern/src/gc.rs +++ b/crates/intern/src/gc.rs @@ -1,7 +1,12 @@ //! Garbage collection of interned values. +//! +//! The GC is a simple mark-and-sweep GC: you first mark all storages, then the +//! GC visits them, and each live value they refer, recursively, then removes +//! those not marked. The sweep phase is done in parallel. -use std::{marker::PhantomData, ops::ControlFlow}; +use std::{hash::Hash, marker::PhantomData, ops::ControlFlow}; +use dashmap::DashMap; use hashbrown::raw::RawTable; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use rustc_hash::{FxBuildHasher, FxHashSet}; @@ -39,15 +44,7 @@ impl Storage for InternedStorage { fn sweep(&self, gc: &GarbageCollector) { let storage = T::storage().get(); - if cfg!(miri) { - storage.shards().iter().for_each(|shard| { - gc.retain_only_alive(&mut *shard.write(), |item| item.0.as_ptr().addr()) - }); - } else { - storage.shards().par_iter().for_each(|shard| { - gc.retain_only_alive(&mut *shard.write(), |item| item.0.as_ptr().addr()) - }); - } + gc.sweep_storage(storage, |item| item.as_ptr().addr()); } } @@ -74,15 +71,7 @@ impl Storage for InternedSliceStorage fn sweep(&self, gc: &GarbageCollector) { let storage = T::storage().get(); - if cfg!(miri) { - storage.shards().iter().for_each(|shard| { - gc.retain_only_alive(&mut *shard.write(), |item| item.0.as_ptr().addr()) - }); - } else { - storage.shards().par_iter().for_each(|shard| { - gc.retain_only_alive(&mut *shard.write(), |item| item.0.as_ptr().addr()) - }); - } + gc.sweep_storage(storage, |item| item.as_ptr().addr()); } } @@ -116,7 +105,10 @@ impl GarbageCollector { /// # Safety /// - /// This cannot be called if there are some not-yet-recorded type values. + /// - This cannot be called if there are some not-yet-recorded type values. + /// - All relevant storages must have been added; that is, within the full graph of values, + /// the added storages must form a DAG. + /// - [`GcInternedVisit`] and [`GcInternedSliceVisit`] must mark all values reachable from the node. pub unsafe fn collect(mut self) { let total_nodes = self.storages.iter().map(|storage| storage.len()).sum(); self.alive = FxHashSet::with_capacity_and_hasher(total_nodes, FxBuildHasher); @@ -127,6 +119,7 @@ impl GarbageCollector { storage.mark(&mut self); } + // Miri doesn't support rayon. if cfg!(miri) { storages.iter().for_each(|storage| storage.sweep(&self)); } else { @@ -158,8 +151,26 @@ impl GarbageCollector { if !self.alive.insert(addr) { ControlFlow::Break(()) } else { ControlFlow::Continue(()) } } + fn sweep_storage( + &self, + storage: &DashMap, + get_addr: impl Fn(&T) -> usize + Send + Sync, + ) { + // Miri doesn't support rayon. + if cfg!(miri) { + storage.shards().iter().for_each(|shard| { + self.retain_only_alive(&mut *shard.write(), |item| get_addr(&item.0)) + }); + } else { + storage.shards().par_iter().for_each(|shard| { + self.retain_only_alive(&mut *shard.write(), |item| get_addr(&item.0)) + }); + } + } + #[inline] fn retain_only_alive(&self, map: &mut RawTable, mut get_addr: impl FnMut(&T) -> usize) { + // This code was copied from DashMap's retain() - which we can't use because we want to run in parallel. unsafe { // Here we only use `iter` as a temporary, preventing use-after-free for bucket in map.iter() { @@ -218,7 +229,7 @@ mod tests { fn visit_with(&self, _gc: &mut GarbageCollector) {} } - crate::impl_slice_internable!(gc; StringSlice, String, String); + crate::impl_slice_internable!(gc; StringSlice, String, u32); type InternedSlice = crate::InternedSlice; impl GcInternedSliceVisit for StringSlice { @@ -245,19 +256,13 @@ mod tests { assert_ne!(b.as_ref(), c.as_ref()); assert_eq!(c.as_ref().0, "def"); - let d = InternedSlice::from_header_and_slice( - "abc".to_owned(), - &["def".to_owned(), "123".to_owned()], - ); - let e = InternedSlice::from_header_and_slice( - "abc".to_owned(), - &["def".to_owned(), "123".to_owned()], - ); + let d = InternedSlice::from_header_and_slice("abc".to_owned(), &[123, 456]); + let e = InternedSlice::from_header_and_slice("abc".to_owned(), &[123, 456]); assert_eq!(d, e); assert_eq!(d.to_owned(), e.to_owned()); assert_eq!(d.header.length, 2); assert_eq!(d.header.header, "abc"); - assert_eq!(d.slice, ["def", "123"]); + assert_eq!(d.slice, [123, 456]); (a, d.to_owned()) }; @@ -269,7 +274,7 @@ mod tests { assert_eq!(a.0, "abc"); assert_eq!(d.header.length, 2); assert_eq!(d.header.header, "abc"); - assert_eq!(d.slice, ["def", "123"]); + assert_eq!(d.slice, [123, 456]); drop(a); drop(d); diff --git a/crates/intern/src/intern.rs b/crates/intern/src/intern.rs index 5d4d001185..b7acd6624b 100644 --- a/crates/intern/src/intern.rs +++ b/crates/intern/src/intern.rs @@ -1,8 +1,31 @@ //! Interning of single values. +//! +//! Interning supports two modes: GC and non-GC. +//! +//! In non-GC mode, you create [`Interned`]s, and can create `Copy` handles to them +//! that can still be upgraded back to [`Interned`] ([`InternedRef`]) via [`Interned::as_ref`]. +//! Generally, letting the [`InternedRef`] to outlive the [`Interned`] is a soundness bug and can +//! lead to UB. When all [`Interned`]s of some value are dropped, the value is freed (newer interns +//! may re-create it, not necessarily in the same place). +//! +//! In GC mode, you generally operate on [`InternedRef`]s. They are `Copy` and comfortable. To intern +//! a value you call [`Interned::new_gc`], which returns an [`InternedRef`]. Having all [`Interned`]s +//! of some value be dropped will *not* immediately free the value. Instead, a mark-and-sweep GC can +//! be initiated, which will free all values which have no live [`Interned`]s. +//! +//! Generally, in GC mode, you operate on [`InternedRef`], but when you need to store some long-term +//! value (e.g. a Salsa query output), you convert it to an [`Interned`]. This ensures that an eventual +//! GC will not free it as long as it is alive. +//! +//! Making mistakes is hard due to GC [`InternedRef`] wrappers not implementing `salsa::Update`, meaning +//! Salsa will ensure you do not store them in queries or Salsa-interneds. However it's still *possible* +//! without unsafe code (for example, by storing them in a `static`), which is why triggering GC is unsafe. +//! +//! For more information about GC see [`crate::gc`]. use std::{ fmt::{self, Debug, Display}, - hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}, + hash::{BuildHasher, Hash, Hasher}, ops::Deref, ptr, sync::OnceLock, @@ -10,19 +33,16 @@ use std::{ use dashmap::{DashMap, SharedValue}; use hashbrown::raw::RawTable; -use rustc_hash::FxHasher; +use rustc_hash::FxBuildHasher; use triomphe::{Arc, ArcBorrow}; -type InternMap = DashMap, (), BuildHasherDefault>; +type InternMap = DashMap, (), FxBuildHasher>; type Guard = dashmap::RwLockWriteGuard<'static, RawTable<(Arc, SharedValue<()>)>>; pub struct Interned { arc: Arc, } -unsafe impl Send for Interned {} -unsafe impl Sync for Interned {} - impl Interned { #[inline] pub fn new(obj: T) -> Self { @@ -96,6 +116,7 @@ impl Interned { /// The pointer should originate from an `Interned` or an `InternedRef`. #[inline] pub unsafe fn from_raw(ptr: *const T) -> Self { + // SAFETY: Our precondition. Self { arc: unsafe { Arc::from_raw(ptr) } } } @@ -209,6 +230,7 @@ impl<'a, T: Internable> InternedRef<'a, T> { /// The pointer needs to originate from `Interned` or `InternedRef`. #[inline] pub unsafe fn from_raw(ptr: *const T) -> Self { + // SAFETY: Our precondition. Self { arc: unsafe { ArcBorrow::from_ptr(ptr) } } } @@ -228,6 +250,7 @@ impl<'a, T: Internable> InternedRef<'a, T> { /// map also keeps a reference to the value. #[inline] pub unsafe fn decrement_refcount(self) { + // SAFETY: Our precondition. unsafe { drop(Arc::from_raw(self.as_raw())) } } @@ -235,6 +258,17 @@ impl<'a, T: Internable> InternedRef<'a, T> { pub(crate) fn strong_count(self) -> usize { ArcBorrow::strong_count(&self.arc) } + + /// **Available only on GC mode**. + /// + /// Changes the attached lifetime, as in GC mode, the lifetime is more kind of a lint to prevent misuse + /// than actual soundness check. + #[inline] + pub fn change_lifetime<'b>(self) -> InternedRef<'b, T> { + const { assert!(T::USE_GC) }; + // SAFETY: The lifetime on `InternedRef` is essentially advisory only for GCed types. + unsafe { std::mem::transmute::, InternedRef<'b, T>>(self) } + } } impl Clone for InternedRef<'_, T> { diff --git a/crates/intern/src/intern_slice.rs b/crates/intern/src/intern_slice.rs index 7f2159ee66..58de6e17bd 100644 --- a/crates/intern/src/intern_slice.rs +++ b/crates/intern/src/intern_slice.rs @@ -1,10 +1,16 @@ //! Interning of slices, potentially with a header. +//! +//! See [`crate::intern`] for an explanation of interning modes. Note that slice interning is currently +//! available only in GC mode (there is no other need). +//! +//! [`InternedSlice`] and [`InternedSliceRef`] are essentially [`Interned<(Header, Box<[SliceType]>)>`][crate::Interned] +//! and [`InternedRef`][crate::InternedRef] with the same types, but more optimized. There is only one +//! allocation and the pointer is thin. use std::{ - borrow::Borrow, ffi::c_void, fmt::{self, Debug}, - hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}, + hash::{BuildHasher, Hash, Hasher}, marker::PhantomData, mem::ManuallyDrop, ops::Deref, @@ -14,13 +20,13 @@ use std::{ use dashmap::{DashMap, SharedValue}; use hashbrown::raw::RawTable; -use rustc_hash::FxHasher; +use rustc_hash::FxBuildHasher; use triomphe::{HeaderSlice, HeaderWithLength, ThinArc}; type InternMap = DashMap< ThinArc<::Header, ::SliceType>, (), - BuildHasherDefault, + FxBuildHasher, >; type Guard = dashmap::RwLockWriteGuard< 'static, @@ -40,14 +46,14 @@ pub struct InternedSlice { impl InternedSlice { #[inline] - fn new<'a>( + pub fn from_header_and_slice<'a>( header: T::Header, - slice: impl Borrow<[T::SliceType]> - + IntoIterator, + slice: &[T::SliceType], ) -> InternedSliceRef<'a, T> { const { assert!(T::USE_GC) }; + let storage = T::storage().get(); - let (mut shard, hash) = Self::select(storage, &header, slice.borrow()); + let (mut shard, hash) = Self::select(storage, &header, slice); // Atomically, // - check if `obj` is already in the map // - if so, clone its `Arc` and return it @@ -56,7 +62,7 @@ impl InternedSlice { // insert the same object between us looking it up and inserting it. let bucket = match shard.find_or_find_insert_slot( hash, - |(other, _)| other.header.header == header && other.slice == *slice.borrow(), + |(other, _)| other.header.header == header && other.slice == *slice, |(x, _)| storage.hasher().hash_one(x), ) { Ok(bucket) => bucket, @@ -65,51 +71,21 @@ impl InternedSlice { shard.insert_in_slot( hash, insert_slot, - ( - ThinArc::from_header_and_iter(header, slice.into_iter()), - SharedValue::new(()), - ), + (ThinArc::from_header_and_slice(header, slice), SharedValue::new(())), ) }, }; // SAFETY: We just retrieved/inserted this bucket. + // `NonNull::new_unchecked()` is safe because the pointer originates from a `ThinArc`. unsafe { InternedSliceRef { + // INVARIANT: We create it from a `ThinArc`. ptr: NonNull::new_unchecked(ThinArc::as_ptr(&bucket.as_ref().0).cast_mut()), _marker: PhantomData, } } } - #[inline] - pub fn from_header_and_slice<'a>( - header: T::Header, - slice: &[T::SliceType], - ) -> InternedSliceRef<'a, T> - where - T::SliceType: Clone, - { - return Self::new(header, Iter(slice)); - - struct Iter<'a, T>(&'a [T]); - - impl<'a, T: Clone> IntoIterator for Iter<'a, T> { - type IntoIter = std::iter::Cloned>; - type Item = T; - #[inline] - fn into_iter(self) -> Self::IntoIter { - self.0.iter().cloned() - } - } - - impl Borrow<[T]> for Iter<'_, T> { - #[inline] - fn borrow(&self) -> &[T] { - self.0 - } - } - } - #[inline] fn select( storage: &'static InternMap, @@ -132,51 +108,20 @@ impl InternedSlice { #[inline(always)] fn ptr(&self) -> *const c_void { - unsafe { ptr::from_ref(&self.arc).read().into_raw() } + self.arc.as_ptr() } #[inline] pub fn as_ref(&self) -> InternedSliceRef<'_, T> { InternedSliceRef { + // SAFETY: `self.ptr` comes from a valid `ThinArc`, so non null. + // INVARIANT: We create it from a `ThinArc`. ptr: unsafe { NonNull::new_unchecked(self.ptr().cast_mut()) }, _marker: PhantomData, } } } -impl Drop for InternedSlice { - #[inline] - fn drop(&mut self) { - // When the last `Ref` is dropped, remove the object from the global map. - if !T::USE_GC && ThinArc::strong_count(&self.arc) == 2 { - // Only `self` and the global map point to the object. - - self.drop_slow(); - } - } -} - -impl InternedSlice { - #[cold] - fn drop_slow(&mut self) { - let storage = T::storage().get(); - let (mut shard, hash) = Self::select(storage, &self.arc.header.header, &self.arc.slice); - - if ThinArc::strong_count(&self.arc) != 2 { - // Another thread has interned another copy - return; - } - - shard.remove_entry(hash, |(other, _)| **other == *self.arc); - - // Shrink the backing storage if the shard is less than 50% occupied. - if shard.len() * 2 < shard.capacity() { - let len = shard.len(); - shard.shrink_to(len, |(x, _)| storage.hasher().hash_one(x)); - } - } -} - /// Compares interned `Ref`s using pointer equality. impl PartialEq for InternedSlice { // NOTE: No `?Sized` because `ptr_eq` doesn't work right with trait objects. @@ -225,16 +170,22 @@ where #[repr(transparent)] pub struct InternedSliceRef<'a, T> { + /// # Invariant + /// + /// There is no `ThinArcBorrow` unfortunately, so this is basically a `ManuallyDrop`, + /// except that can't be `Copy`, so we store a raw pointer instead. ptr: NonNull, _marker: PhantomData<&'a T>, } +// SAFETY: This is essentially a `ThinArc`, implemented as a raw pointer because there is no `ThinArcBorrowed`. unsafe impl Send for InternedSliceRef<'_, T> {} unsafe impl Sync for InternedSliceRef<'_, T> {} impl<'a, T: SliceInternable> InternedSliceRef<'a, T> { #[inline(always)] fn arc(self) -> ManuallyDrop> { + // SAFETY: `self.ptr`'s invariant. unsafe { ManuallyDrop::new(ThinArc::from_raw(self.ptr.as_ptr())) } } @@ -245,6 +196,7 @@ impl<'a, T: SliceInternable> InternedSliceRef<'a, T> { #[inline] pub fn get(self) -> &'a Pointee { + // SAFETY: This is a lifetime extension, valid because we live for `'a`. unsafe { &*ptr::from_ref::>(&*self.arc()) } } @@ -266,6 +218,17 @@ impl<'a, T: SliceInternable> InternedSliceRef<'a, T> { pub(crate) fn as_raw(self) -> *const c_void { self.arc().as_ptr() } + + /// **Available only on GC mode**. + /// + /// Changes the attached lifetime, as in GC mode, the lifetime is more kind of a lint to prevent misuse + /// than actual soundness check. + #[inline] + pub fn change_lifetime<'b>(self) -> InternedSliceRef<'b, T> { + const { assert!(T::USE_GC) }; + // SAFETY: The lifetime on `InternedSliceRef` is essentially advisory only for GCed types. + unsafe { std::mem::transmute::, InternedSliceRef<'b, T>>(self) } + } } impl Clone for InternedSliceRef<'_, T> { @@ -336,7 +299,7 @@ impl InternSliceStorage { pub trait SliceInternable: Sized + 'static { const USE_GC: bool; type Header: Eq + Hash + Send + Sync; - type SliceType: Eq + Hash + Send + Sync + 'static; + type SliceType: Eq + Hash + Send + Sync + Copy + 'static; fn storage() -> &'static InternSliceStorage; }