diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index 2577011ffe..c0193f6fcb 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -5,6 +5,8 @@ - Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` ~2x speedup inline, ~4-22x for heap. - Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. +- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace. + ~3x speedup inline, ~1.8x for heap (len=50). ## 0.3.2 - 2024-10-23 diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 5ef6260f56..d55ba20522 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -715,6 +715,13 @@ impl StrExt for str { #[inline] fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr { + // Fast path for replacing a single ASCII character with another inline. + if let [from_u8] = from.as_bytes() { + if let [to_u8] = to.as_bytes() { + return replacen_1_ascii(self, *from_u8, *to_u8, count); + } + } + let mut result = SmolStrBuilder::new(); let mut last_end = 0; for (start, part) in self.match_indices(from).take(count) { @@ -731,6 +738,34 @@ impl StrExt for str { } } +#[inline] +fn replacen_1_ascii(src: &str, from: u8, to: u8, count: usize) -> SmolStr { + let mut replaced = 0; + let mut ascii_replace = |b: &u8| { + if *b == from && replaced != count { + replaced += 1; + to + } else { + *b + } + }; + if src.len() <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + for (idx, b) in src.as_bytes().iter().enumerate() { + buf[idx] = ascii_replace(b); + } + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) }, + buf, + }) + } else { + let out = src.as_bytes().iter().map(ascii_replace).collect(); + // SAFETY: We replaced ascii with ascii on valid utf8 strings. + unsafe { String::from_utf8_unchecked(out).into() } + } +} + /// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23. #[inline] fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) { diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 0070b3a5ec..8f7d9ec39a 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -389,6 +389,13 @@ mod test_str_ext { assert_eq!(result, "foo_dor_baz"); assert!(!result.is_heap_allocated()); } + + #[test] + fn replacen_1_ascii() { + let result = "foo_bar_baz".replacen_smolstr("o", "u", 1); + assert_eq!(result, "fuo_bar_baz"); + assert!(!result.is_heap_allocated()); + } } #[cfg(feature = "borsh")]