From f93c6b2cb05c2be1a80ff8e9dc8b7779f0ec6d11 Mon Sep 17 00:00:00 2001 From: Moulins Date: Tue, 2 Jan 2024 01:13:21 +0100 Subject: [PATCH] feat: Add `SmolStr::from_static` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows creating `SmolStr`s longer than 23 bytes in constant contexts. This is done by replacing the `Repr::Substring` variant by a more general `Repr::Static(&'static str)` variant, and borrowing from ̀`WS` directly instead of storing two `usize`s. As a bonus, it also simplifies the `as_str` implementation, hopefully saving an extra branch. --- lib/smol_str/README.md | 3 ++- lib/smol_str/src/lib.rs | 39 ++++++++++++++++++++++++--------------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index 5e3506846f..ce16759e81 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -7,13 +7,14 @@ A `SmolStr` is a string type that has the following properties: -* `size_of::() == 24 (therefore == size_of::() on 64 bit platforms) +* `size_of::() == 24` (therefore `== size_of::()` on 64 bit platforms) * `Clone` is `O(1)` * Strings are stack-allocated if they are: * Up to 23 bytes long * Longer than 23 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist solely of consecutive newlines, followed by consecutive spaces * If a string does not satisfy the aforementioned conditions, it is heap-allocated +* Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation Unlike `String`, however, `SmolStr` is immutable. The primary use case for `SmolStr` is a good enough default storage for tokens of typical programming diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 692803e447..a27b14c0ad 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -26,6 +26,7 @@ use core::{ /// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist /// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated +/// * Additionally, a `SmolStr` can be explicitely created from a `&'static str` without allocation /// /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming @@ -78,6 +79,17 @@ impl SmolStr { }) } + /// Constructs a `SmolStr` from a statically allocated string. + /// + /// This never allocates. + #[inline(always)] + pub const fn new_static(text: &'static str) -> SmolStr { + // NOTE: this never uses the inline storage; if a canonical + // representation is needed, we could check for `len() < INLINE_CAP` + // and call `new_inline`, but this would mean an extra branch. + SmolStr(Repr::Static(text)) + } + pub fn new(text: T) -> SmolStr where T: AsRef, @@ -395,6 +407,11 @@ const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; +const _: () = { + assert!(WS.len() == N_NEWLINES + N_SPACES); + assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n'); + assert!(WS.as_bytes()[N_NEWLINES] == b' '); +}; #[derive(Clone, Copy, Debug)] #[repr(u8)] @@ -428,18 +445,15 @@ enum InlineSize { #[derive(Clone, Debug)] enum Repr { Heap(Arc), + Static(&'static str), Inline { len: InlineSize, buf: [u8; INLINE_CAP], }, - Substring { - newlines: usize, - spaces: usize, - }, } impl Repr { - /// This function tries to create a new Repr::Inline or Repr::Substring + /// This function tries to create a new Repr::Inline or Repr::Static /// If it isn't possible, this function returns None fn new_on_stack(text: T) -> Option where @@ -467,7 +481,8 @@ impl Repr { let possible_space_count = len - newlines; if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') { let spaces = possible_space_count; - return Some(Repr::Substring { newlines, spaces }); + let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]; + return Some(Repr::Static(substring)); } } None @@ -484,8 +499,8 @@ impl Repr { fn len(&self) -> usize { match self { Repr::Heap(data) => data.len(), + Repr::Static(data) => data.len(), Repr::Inline { len, .. } => *len as usize, - Repr::Substring { newlines, spaces } => *newlines + *spaces, } } @@ -493,9 +508,8 @@ impl Repr { fn is_empty(&self) -> bool { match self { Repr::Heap(data) => data.is_empty(), + Repr::Static(data) => data.is_empty(), Repr::Inline { len, .. } => *len as u8 == 0, - // A substring isn't created for an empty string. - Repr::Substring { .. } => false, } } @@ -503,17 +517,12 @@ impl Repr { fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, + Repr::Static(data) => data, Repr::Inline { len, buf } => { let len = *len as usize; let buf = &buf[..len]; unsafe { ::core::str::from_utf8_unchecked(buf) } } - Repr::Substring { newlines, spaces } => { - let newlines = *newlines; - let spaces = *spaces; - assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); - &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] - } } } }