From 89682556085c8e80a15e92a019c2c975a20e663e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 16 Aug 2018 23:32:49 +0300 Subject: [PATCH 001/132] initial --- lib/smol_str/.gitignore | 2 + lib/smol_str/.travis.yml | 5 + lib/smol_str/Cargo.toml | 10 ++ lib/smol_str/LICENSE-APACHE | 201 ++++++++++++++++++++++++++++++++++++ lib/smol_str/LICENSE-MIT | 23 +++++ lib/smol_str/README.md | 20 ++++ lib/smol_str/src/lib.rs | 155 +++++++++++++++++++++++++++ lib/smol_str/tests/test.rs | 46 +++++++++ 8 files changed, 462 insertions(+) create mode 100644 lib/smol_str/.gitignore create mode 100644 lib/smol_str/.travis.yml create mode 100644 lib/smol_str/Cargo.toml create mode 100644 lib/smol_str/LICENSE-APACHE create mode 100644 lib/smol_str/LICENSE-MIT create mode 100644 lib/smol_str/README.md create mode 100644 lib/smol_str/src/lib.rs create mode 100644 lib/smol_str/tests/test.rs diff --git a/lib/smol_str/.gitignore b/lib/smol_str/.gitignore new file mode 100644 index 0000000000..4470988469 --- /dev/null +++ b/lib/smol_str/.gitignore @@ -0,0 +1,2 @@ +target/ +Cargo.lock \ No newline at end of file diff --git a/lib/smol_str/.travis.yml b/lib/smol_str/.travis.yml new file mode 100644 index 0000000000..5070d43aa5 --- /dev/null +++ b/lib/smol_str/.travis.yml @@ -0,0 +1,5 @@ +language: rust + +script: + - cargo test + - cargo run --example serde diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml new file mode 100644 index 0000000000..a82c8aec85 --- /dev/null +++ b/lib/smol_str/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "smol_str" +version = "0.1.0" +authors = ["Aleksey Kladov "] +repository = "https://github.com/matklad/smol_str" +description = "small-string optimized stirng type with O(1) clone" +license = "MIT OR Apache-2.0" + +[dev-dependencies] +proptest = "0.8.3" diff --git a/lib/smol_str/LICENSE-APACHE b/lib/smol_str/LICENSE-APACHE new file mode 100644 index 0000000000..16fe87b06e --- /dev/null +++ b/lib/smol_str/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/lib/smol_str/LICENSE-MIT b/lib/smol_str/LICENSE-MIT new file mode 100644 index 0000000000..31aa79387f --- /dev/null +++ b/lib/smol_str/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md new file mode 100644 index 0000000000..14215cf54f --- /dev/null +++ b/lib/smol_str/README.md @@ -0,0 +1,20 @@ +# typed_key + +[![Build Status](https://travis-ci.org/matklad/smol_str.svg?branch=master)](https://travis-ci.org/matklad/smol_str) +[![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str) +[![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/) + + +A `SmolStr` is a string type that has the following properties + + * `size_of::() == size_of::()` + * Strings up to 22 bytes long do not use heap allocations + * Runs of `\n` and space symbols (typical whitespace pattern of indentation + in programming laguages) do not use heap allocations + * `Clone` is `O(1)` + +Unlike `String`, however, `SmolStr` is immutable. The primary use-case for +`SmolStr` is a good enough default storage for tokens of typical programming +languages. A specialized interner might be a better solution for some use-cases. + +Intenrally, `SmolStr` is roughly an `enum { Heap>, Inline([u8; 22]) }`. diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs new file mode 100644 index 0000000000..a5a99d2271 --- /dev/null +++ b/lib/smol_str/src/lib.rs @@ -0,0 +1,155 @@ +use std::{fmt, ops::Deref, sync::Arc}; + +/// A `SmolStr` is a string type that has the following properties +/// +/// * `size_of::() == size_of::()` +/// * Strings up to 22 bytes long do not use heap allocations +/// * Runs of `\n` and space symbols (typical whitespace pattern of indentation +/// in programming laguages) do not use heap allocations +/// * `Clone` is `O(1)` +/// +/// Unlike `String`, however, `SmolStr` is immutable. The primary use-case for +/// `SmolStr` is a good enough default storage for tokens of typical programming +/// languages. A specialized interner might be a better solution for some use-cases. +/// +/// Intenrally, `SmolStr` is roughly an `enum { Heap>, Inline([u8; 22]) }`. +#[derive(Clone)] +pub struct SmolStr(Repr); + +impl SmolStr { + pub fn new(text: &str) -> SmolStr { + SmolStr(Repr::new(text)) + } + + pub fn as_str(&self) -> &str { + self.0.as_str() + } + + pub fn to_string(&self) -> String { + self.as_str().to_string() + } +} + +impl Deref for SmolStr { + type Target = str; + + fn deref(&self) -> &str { + self.as_str() + } +} + +impl PartialEq for SmolStr { + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq for str { + fn eq(&self, other: &SmolStr) -> bool { + other == self + } +} + +impl<'a> PartialEq<&'a str> for SmolStr { + fn eq(&self, other: &&'a str) -> bool { + self == *other + } +} + +impl<'a> PartialEq for &'a str { + fn eq(&self, other: &SmolStr) -> bool { + *self == other + } +} + +impl PartialEq for SmolStr { + fn eq(&self, other: &String) -> bool { + self.as_str() == other + } +} + +impl PartialEq for String { + fn eq(&self, other: &SmolStr) -> bool { + other == self + } +} + +impl<'a> PartialEq<&'a String> for SmolStr { + fn eq(&self, other: &&'a String) -> bool { + self == *other + } +} + +impl<'a> PartialEq for &'a String { + fn eq(&self, other: &SmolStr) -> bool { + *self == other + } +} + +impl fmt::Debug for SmolStr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self.as_str(), f) + } +} + +impl fmt::Display for SmolStr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self.as_str(), f) + } +} + +const INLINE_CAP: usize = 22; +const WS_TAG: u8 = (INLINE_CAP + 1) as u8; +const N_NEWLINES: usize = 32; +const N_SPACES: usize = 128; +const WS: &str = + "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; + +#[derive(Clone, Debug)] +enum Repr { + Heap(Arc), + Inline { len: u8, buf: [u8; INLINE_CAP] }, +} + +impl Repr { + fn new(text: &str) -> Repr { + let len = text.len(); + if len <= INLINE_CAP { + let mut buf = [0; INLINE_CAP]; + buf[..len].copy_from_slice(text.as_bytes()); + return Repr::Inline { + len: len as u8, + buf, + }; + } + + let newlines = text.bytes().take_while(|&b| b == b'\n').count(); + let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); + if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { + let mut buf = [0; INLINE_CAP]; + buf[0] = newlines as u8; + buf[1] = spaces as u8; + return Repr::Inline { len: WS_TAG, buf }; + } + + Repr::Heap(text.to_string().into_boxed_str().into()) + } + + fn as_str(&self) -> &str { + match self { + Repr::Heap(data) => &*data, + Repr::Inline { len, buf } => { + if *len == WS_TAG { + let newlines = buf[0] as usize; + let spaces = buf[1] as usize; + assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); + return &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]; + } + + let len = *len as usize; + let buf = &buf[..len]; + unsafe { ::std::str::from_utf8_unchecked(buf) } + } + } + } +} diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs new file mode 100644 index 0000000000..44a0f57e42 --- /dev/null +++ b/lib/smol_str/tests/test.rs @@ -0,0 +1,46 @@ +extern crate smol_str; +#[macro_use] +extern crate proptest; + +use smol_str::SmolStr; + +#[test] +#[cfg(target_pointer_width = "64")] +fn smol_str_is_smol() { + assert_eq!( + ::std::mem::size_of::(), + ::std::mem::size_of::(), + ); +} + +#[test] +fn assert_traits() { + fn f() {} + f::(); +} + +proptest! { + #[test] + fn roundtrip(s: String) { + let smol = SmolStr::new(s.as_str()); + prop_assert_eq!(smol.as_str(), s.as_str()); + } + + #[test] + fn roundtrip_spaces(s in r"( )*") { + let smol = SmolStr::new(s.as_str()); + prop_assert_eq!(smol.as_str(), s.as_str()); + } + + #[test] + fn roundtrip_newlines(s in r"\n*") { + let smol = SmolStr::new(s.as_str()); + prop_assert_eq!(smol.as_str(), s.as_str()); + } + + #[test] + fn roundtrip_ws(s in r"( |\n)*") { + let smol = SmolStr::new(s.as_str()); + prop_assert_eq!(smol.as_str(), s.as_str()); + } +} From 63784097f5ea0b9c9d917aec3b617d8692d86920 Mon Sep 17 00:00:00 2001 From: jD91mZM2 Date: Fri, 17 Aug 2018 19:58:10 +0200 Subject: [PATCH 002/132] Implement From<&str> --- lib/smol_str/src/lib.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index a5a99d2271..c4d03dae37 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -98,6 +98,12 @@ impl fmt::Display for SmolStr { } } +impl<'a> From<&'a str> for SmolStr { + fn from(text: &'a str) -> Self { + Self::new(text) + } +} + const INLINE_CAP: usize = 22; const WS_TAG: u8 = (INLINE_CAP + 1) as u8; const N_NEWLINES: usize = 32; From 2f2a83ea8481e1472b5af55b4d16e195b170005c Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 17 Aug 2018 21:01:40 +0300 Subject: [PATCH 003/132] 0.1.1 --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index a82c8aec85..173a149fe3 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.0" +version = "0.1.1" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" From f636b5fe901060ef12f9a926768b4882fb4a0d46 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 17 Aug 2018 22:10:08 +0300 Subject: [PATCH 004/132] fix CI --- lib/smol_str/.travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/smol_str/.travis.yml b/lib/smol_str/.travis.yml index 5070d43aa5..d848914cf9 100644 --- a/lib/smol_str/.travis.yml +++ b/lib/smol_str/.travis.yml @@ -2,4 +2,3 @@ language: rust script: - cargo test - - cargo run --example serde From 3b5bc6db04d29fa4839f4c9b6eb02e59be87f1c8 Mon Sep 17 00:00:00 2001 From: jD91mZM2 Date: Fri, 17 Aug 2018 21:15:24 +0200 Subject: [PATCH 005/132] Implement PartialEq and From --- lib/smol_str/src/lib.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index c4d03dae37..eeee48b49c 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -38,6 +38,12 @@ impl Deref for SmolStr { } } +impl PartialEq for SmolStr { + fn eq(&self, other: &SmolStr) -> bool { + self.as_str() == other.as_str() + } +} + impl PartialEq for SmolStr { fn eq(&self, other: &str) -> bool { self.as_str() == other @@ -98,6 +104,12 @@ impl fmt::Display for SmolStr { } } +impl From for SmolStr { + fn from(text: String) -> Self { + SmolStr(Repr::new_heap(text)) + } +} + impl<'a> From<&'a str> for SmolStr { fn from(text: &'a str) -> Self { Self::new(text) @@ -118,7 +130,7 @@ enum Repr { } impl Repr { - fn new(text: &str) -> Repr { + fn new(text: &str) -> Self { let len = text.len(); if len <= INLINE_CAP { let mut buf = [0; INLINE_CAP]; @@ -138,7 +150,10 @@ impl Repr { return Repr::Inline { len: WS_TAG, buf }; } - Repr::Heap(text.to_string().into_boxed_str().into()) + Self::new_heap(text.to_string()) + } + fn new_heap(text: String) -> Self { + Repr::Heap(text.into_boxed_str().into()) } fn as_str(&self) -> &str { From 0ec0da07f4697cd4d707df1f89bf8f47b320b86c Mon Sep 17 00:00:00 2001 From: jD91mZM2 Date: Sat, 18 Aug 2018 10:38:23 +0200 Subject: [PATCH 006/132] More generics! --- lib/smol_str/src/lib.rs | 63 +++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index eeee48b49c..c9606f9049 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -17,7 +17,9 @@ use std::{fmt, ops::Deref, sync::Arc}; pub struct SmolStr(Repr); impl SmolStr { - pub fn new(text: &str) -> SmolStr { + pub fn new(text: T) -> SmolStr + where T: Into + AsRef + { SmolStr(Repr::new(text)) } @@ -104,14 +106,10 @@ impl fmt::Display for SmolStr { } } -impl From for SmolStr { - fn from(text: String) -> Self { - SmolStr(Repr::new_heap(text)) - } -} - -impl<'a> From<&'a str> for SmolStr { - fn from(text: &'a str) -> Self { +impl From for SmolStr + where T: Into + AsRef +{ + fn from(text: T) -> Self { Self::new(text) } } @@ -130,30 +128,33 @@ enum Repr { } impl Repr { - fn new(text: &str) -> Self { - let len = text.len(); - if len <= INLINE_CAP { - let mut buf = [0; INLINE_CAP]; - buf[..len].copy_from_slice(text.as_bytes()); - return Repr::Inline { - len: len as u8, - buf, - }; + fn new(text: T) -> Self + where T: Into + AsRef + { + { + let text = text.as_ref(); + + let len = text.len(); + if len <= INLINE_CAP { + let mut buf = [0; INLINE_CAP]; + buf[..len].copy_from_slice(text.as_bytes()); + return Repr::Inline { + len: len as u8, + buf, + }; + } + + let newlines = text.bytes().take_while(|&b| b == b'\n').count(); + let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); + if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { + let mut buf = [0; INLINE_CAP]; + buf[0] = newlines as u8; + buf[1] = spaces as u8; + return Repr::Inline { len: WS_TAG, buf }; + } } - let newlines = text.bytes().take_while(|&b| b == b'\n').count(); - let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); - if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { - let mut buf = [0; INLINE_CAP]; - buf[0] = newlines as u8; - buf[1] = spaces as u8; - return Repr::Inline { len: WS_TAG, buf }; - } - - Self::new_heap(text.to_string()) - } - fn new_heap(text: String) -> Self { - Repr::Heap(text.into_boxed_str().into()) + Repr::Heap(text.into().into_boxed_str().into()) } fn as_str(&self) -> &str { From b527777f3f2eafe51925ea9b052afcc3f8c82edd Mon Sep 17 00:00:00 2001 From: jD91mZM2 Date: Sat, 18 Aug 2018 10:41:16 +0200 Subject: [PATCH 007/132] Bump version --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 173a149fe3..cdda605163 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.1" +version = "0.1.2" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" From f00cf645c1ee1022cffd420a18dcf6cbe65cbea7 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Aug 2018 15:47:17 +0300 Subject: [PATCH 008/132] smol_str is hash --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/src/lib.rs | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index cdda605163..7cd8cb4ede 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.2" +version = "0.1.3" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index c9606f9049..dc59333190 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,4 +1,4 @@ -use std::{fmt, ops::Deref, sync::Arc}; +use std::{fmt, hash, ops::Deref, sync::Arc}; /// A `SmolStr` is a string type that has the following properties /// @@ -94,6 +94,12 @@ impl<'a> PartialEq for &'a String { } } +impl hash::Hash for SmolStr { + fn hash(&self, hasher: &mut H) { + self.as_str().hash(hasher) + } +} + impl fmt::Debug for SmolStr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) From 2534906a7fd36d83f572227d3b1f3ec4fc30bc47 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Aug 2018 15:50:29 +0300 Subject: [PATCH 009/132] smol_str is Eq --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/src/lib.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 7cd8cb4ede..2cec274c8f 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.3" +version = "0.1.4" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index dc59333190..1862396a03 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -46,6 +46,8 @@ impl PartialEq for SmolStr { } } +impl Eq for SmolStr {} + impl PartialEq for SmolStr { fn eq(&self, other: &str) -> bool { self.as_str() == other From 2fc58bd51c8ef888af3dbae91058dabb1ab9568a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 2 Sep 2018 14:01:46 +0300 Subject: [PATCH 010/132] serde --- lib/smol_str/.travis.yml | 2 +- lib/smol_str/Cargo.toml | 6 +++++- lib/smol_str/src/lib.rs | 21 +++++++++++++++++++++ lib/smol_str/tests/test.rs | 10 ++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/.travis.yml b/lib/smol_str/.travis.yml index d848914cf9..56abf36895 100644 --- a/lib/smol_str/.travis.yml +++ b/lib/smol_str/.travis.yml @@ -1,4 +1,4 @@ language: rust script: - - cargo test + - cargo test --all-features diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 2cec274c8f..0eba48e8c2 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,10 +1,14 @@ [package] name = "smol_str" -version = "0.1.4" +version = "0.1.5" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" license = "MIT OR Apache-2.0" +[dependencies] +serde = { version = "1", optional = true } + [dev-dependencies] proptest = "0.8.3" +serde_json = "1" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 1862396a03..cdf013519d 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -183,3 +183,24 @@ impl Repr { } } } + +#[cfg(feature = "serde")] +mod serde { + extern crate serde; + + use SmolStr; + + impl serde::Serialize for SmolStr { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer + { self.as_str().serialize(serializer) } + } + + impl<'de> serde::Deserialize<'de> for SmolStr { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de> + { <&'de str>::deserialize(deserializer).map(SmolStr::from) } + } +} diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 44a0f57e42..d66d6ab742 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -1,4 +1,5 @@ extern crate smol_str; +extern crate serde_json; #[macro_use] extern crate proptest; @@ -44,3 +45,12 @@ proptest! { prop_assert_eq!(smol.as_str(), s.as_str()); } } + +#[test] +fn test_serde() { + let s = SmolStr::new("Hello, World"); + let s = serde_json::to_string(&s).unwrap(); + assert_eq!(s, "\"Hello, World\""); + let s: SmolStr = serde_json::from_str(&s).unwrap(); + assert_eq!(s, "Hello, World"); +} From 47569f04e6cf4433c9418b1f5d06c89db78711cd Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 3 Sep 2018 21:47:04 +0300 Subject: [PATCH 011/132] add len method --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/src/lib.rs | 19 +++++++++++++++++++ lib/smol_str/tests/test.rs | 19 +++++++++++-------- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 0eba48e8c2..09978f0a98 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.5" +version = "0.1.6" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized stirng type with O(1) clone" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index cdf013519d..c41389f38e 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -30,6 +30,11 @@ impl SmolStr { pub fn to_string(&self) -> String { self.as_str().to_string() } + + #[inline(always)] + pub fn len(&self) -> usize { + self.0.len() + } } impl Deref for SmolStr { @@ -165,6 +170,20 @@ impl Repr { Repr::Heap(text.into().into_boxed_str().into()) } + fn len(&self) -> usize { + match self { + Repr::Heap(data) => data.len(), + Repr::Inline { len, buf } => { + if *len == WS_TAG { + let newlines = buf[0] as usize; + let spaces = buf[1] as usize; + return newlines + spaces; + } + *len as usize + } + } + } + fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index d66d6ab742..56837b4c33 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -20,29 +20,32 @@ fn assert_traits() { f::(); } +fn check_props(s: &str) -> Result<(), proptest::test_runner::TestCaseError> { + let smol = SmolStr::new(s); + prop_assert_eq!(smol.as_str(), s); + prop_assert_eq!(smol.len(), s.len()); + Ok(()) +} + proptest! { #[test] fn roundtrip(s: String) { - let smol = SmolStr::new(s.as_str()); - prop_assert_eq!(smol.as_str(), s.as_str()); + check_props(s.as_str())?; } #[test] fn roundtrip_spaces(s in r"( )*") { - let smol = SmolStr::new(s.as_str()); - prop_assert_eq!(smol.as_str(), s.as_str()); + check_props(s.as_str())?; } #[test] fn roundtrip_newlines(s in r"\n*") { - let smol = SmolStr::new(s.as_str()); - prop_assert_eq!(smol.as_str(), s.as_str()); + check_props(s.as_str())?; } #[test] fn roundtrip_ws(s in r"( |\n)*") { - let smol = SmolStr::new(s.as_str()); - prop_assert_eq!(smol.as_str(), s.as_str()); + check_props(s.as_str())?; } } From f183a7fea571a18c2191784e45c8214772fa6c28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Tue, 9 Oct 2018 18:29:37 +0200 Subject: [PATCH 012/132] Fix some stuff in the readme I don't understand the sentence that starts with *Runs of `\n` and space symbols*... What do you mean by that? --- lib/smol_str/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index 14215cf54f..53b33aa273 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -5,7 +5,7 @@ [![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/) -A `SmolStr` is a string type that has the following properties +A `SmolStr` is a string type that has the following properties: * `size_of::() == size_of::()` * Strings up to 22 bytes long do not use heap allocations @@ -15,6 +15,6 @@ A `SmolStr` is a string type that has the following properties Unlike `String`, however, `SmolStr` is immutable. The primary use-case for `SmolStr` is a good enough default storage for tokens of typical programming -languages. A specialized interner might be a better solution for some use-cases. +languages. A specialized interner might be a better solution for some use cases. -Intenrally, `SmolStr` is roughly an `enum { Heap>, Inline([u8; 22]) }`. +Internally, `SmolStr` is roughly an `enum { Heap(Arc), Inline([u8; 22]) }`. From 4702d59ee77cd36cdd255a738b581f25927a4874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Tue, 9 Oct 2018 20:56:18 +0200 Subject: [PATCH 013/132] Clarify docs --- lib/smol_str/README.md | 22 +++++++++-------- lib/smol_str/src/lib.rs | 52 +++++++++++++++++------------------------ 2 files changed, 34 insertions(+), 40 deletions(-) diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index 53b33aa273..5ba92a637b 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -1,4 +1,4 @@ -# typed_key +# smol_str [![Build Status](https://travis-ci.org/matklad/smol_str.svg?branch=master)](https://travis-ci.org/matklad/smol_str) [![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str) @@ -7,14 +7,16 @@ A `SmolStr` is a string type that has the following properties: - * `size_of::() == size_of::()` - * Strings up to 22 bytes long do not use heap allocations - * Runs of `\n` and space symbols (typical whitespace pattern of indentation - in programming laguages) do not use heap allocations - * `Clone` is `O(1)` +* `size_of::() == size_of::()` +* `Clone` is `O(1)` +* Strings are stack-allocated if they are: + * Up to 22 bytes long + * Longer than 22 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist + solely of consecutive newlines, followed by consecutive spaces +* If a string does not satisfy the aforementioned conditions, it is heap-allocated -Unlike `String`, however, `SmolStr` is immutable. The primary use-case for +Unlike `String`, however, `SmolStr` is immutable. The primary use case for `SmolStr` is a good enough default storage for tokens of typical programming -languages. A specialized interner might be a better solution for some use cases. - -Internally, `SmolStr` is roughly an `enum { Heap(Arc), Inline([u8; 22]) }`. +languages. Strings consisting of a series of newlines, followed by a series of +whitespace are a typical pattern in computer programms because of indentation. +Note that a specialized interner might be a better solution for some use cases. diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index c41389f38e..49d26bbf9d 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,18 +1,20 @@ use std::{fmt, hash, ops::Deref, sync::Arc}; -/// A `SmolStr` is a string type that has the following properties +/// A `SmolStr` is a string type that has the following properties: /// -/// * `size_of::() == size_of::()` -/// * Strings up to 22 bytes long do not use heap allocations -/// * Runs of `\n` and space symbols (typical whitespace pattern of indentation -/// in programming laguages) do not use heap allocations -/// * `Clone` is `O(1)` +/// * `size_of::() == size_of::()` +/// * `Clone` is `O(1)` +/// * Strings are stack-allocated if they are: +/// * Up to 22 bytes long +/// * Longer than 22 bytes, but substrings of `WS` (see below). Such strings consist +/// solely of consecutive newlines, followed by consecutive spaces +/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated /// -/// Unlike `String`, however, `SmolStr` is immutable. The primary use-case for +/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming -/// languages. A specialized interner might be a better solution for some use-cases. -/// -/// Intenrally, `SmolStr` is roughly an `enum { Heap>, Inline([u8; 22]) }`. +/// languages. Strings consisting of a series of newlines, followed by a series of +/// whitespace are a typical pattern in computer programms because of indentation. +/// Note that a specialized interner might be a better solution for some use cases. #[derive(Clone)] pub struct SmolStr(Repr); @@ -128,7 +130,6 @@ impl From for SmolStr } const INLINE_CAP: usize = 22; -const WS_TAG: u8 = (INLINE_CAP + 1) as u8; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = @@ -138,6 +139,7 @@ const WS: &str = enum Repr { Heap(Arc), Inline { len: u8, buf: [u8; INLINE_CAP] }, + Substring { newlines: usize, spaces: usize }, } impl Repr { @@ -160,10 +162,7 @@ impl Repr { let newlines = text.bytes().take_while(|&b| b == b'\n').count(); let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { - let mut buf = [0; INLINE_CAP]; - buf[0] = newlines as u8; - buf[1] = spaces as u8; - return Repr::Inline { len: WS_TAG, buf }; + return Repr::Substring { newlines, spaces }; } } @@ -173,14 +172,8 @@ impl Repr { fn len(&self) -> usize { match self { Repr::Heap(data) => data.len(), - Repr::Inline { len, buf } => { - if *len == WS_TAG { - let newlines = buf[0] as usize; - let spaces = buf[1] as usize; - return newlines + spaces; - } - *len as usize - } + Repr::Inline { len, .. } => *len as usize, + Repr::Substring { newlines, spaces } => *newlines + *spaces } } @@ -188,17 +181,16 @@ impl Repr { match self { Repr::Heap(data) => &*data, Repr::Inline { len, buf } => { - if *len == WS_TAG { - let newlines = buf[0] as usize; - let spaces = buf[1] as usize; - assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); - return &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]; - } - let len = *len as usize; let buf = &buf[..len]; unsafe { ::std::str::from_utf8_unchecked(buf) } } + Repr::Substring { newlines, spaces } => { + let newlines = *newlines; + let spaces = *spaces; + assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); + &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] + } } } } From 48e1ed47e875cbe4ff86550a0882b65cc5106afe Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:17:31 +0700 Subject: [PATCH 014/132] Fix typos. --- lib/smol_str/README.md | 2 +- lib/smol_str/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index 5ba92a637b..949f6e6ebf 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -18,5 +18,5 @@ A `SmolStr` is a string type that has the following properties: Unlike `String`, however, `SmolStr` is immutable. The primary use case for `SmolStr` is a good enough default storage for tokens of typical programming languages. Strings consisting of a series of newlines, followed by a series of -whitespace are a typical pattern in computer programms because of indentation. +whitespace are a typical pattern in computer programs because of indentation. Note that a specialized interner might be a better solution for some use cases. diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 49d26bbf9d..a62cbddb43 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -13,7 +13,7 @@ use std::{fmt, hash, ops::Deref, sync::Arc}; /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming /// languages. Strings consisting of a series of newlines, followed by a series of -/// whitespace are a typical pattern in computer programms because of indentation. +/// whitespace are a typical pattern in computer programs because of indentation. /// Note that a specialized interner might be a better solution for some use cases. #[derive(Clone)] pub struct SmolStr(Repr); From c8ac40fcd84ab1288c3079e11f472ac7866adce5 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:18:49 +0700 Subject: [PATCH 015/132] rustfmt with stable. --- lib/smol_str/src/lib.rs | 23 +++++++++++++++-------- lib/smol_str/tests/test.rs | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 49d26bbf9d..c13a19d8f0 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -20,7 +20,8 @@ pub struct SmolStr(Repr); impl SmolStr { pub fn new(text: T) -> SmolStr - where T: Into + AsRef + where + T: Into + AsRef, { SmolStr(Repr::new(text)) } @@ -122,7 +123,8 @@ impl fmt::Display for SmolStr { } impl From for SmolStr - where T: Into + AsRef +where + T: Into + AsRef, { fn from(text: T) -> Self { Self::new(text) @@ -144,7 +146,8 @@ enum Repr { impl Repr { fn new(text: T) -> Self - where T: Into + AsRef + where + T: Into + AsRef, { { let text = text.as_ref(); @@ -173,7 +176,7 @@ impl Repr { match self { Repr::Heap(data) => data.len(), Repr::Inline { len, .. } => *len as usize, - Repr::Substring { newlines, spaces } => *newlines + *spaces + Repr::Substring { newlines, spaces } => *newlines + *spaces, } } @@ -204,14 +207,18 @@ mod serde { impl serde::Serialize for SmolStr { fn serialize(&self, serializer: S) -> Result where - S: serde::Serializer - { self.as_str().serialize(serializer) } + S: serde::Serializer, + { + self.as_str().serialize(serializer) + } } impl<'de> serde::Deserialize<'de> for SmolStr { fn deserialize(deserializer: D) -> Result where - D: serde::Deserializer<'de> - { <&'de str>::deserialize(deserializer).map(SmolStr::from) } + D: serde::Deserializer<'de>, + { + <&'de str>::deserialize(deserializer).map(SmolStr::from) + } } } diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 56837b4c33..011a9d733d 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -1,5 +1,5 @@ -extern crate smol_str; extern crate serde_json; +extern crate smol_str; #[macro_use] extern crate proptest; From ea731a9259ddf52b34763ae52e033f09d41185b2 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:25:25 +0700 Subject: [PATCH 016/132] Fix test compilation without serde feature. --- lib/smol_str/tests/test.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 011a9d733d..eebdc205a0 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -49,6 +49,7 @@ proptest! { } } +#[cfg(feature = "serde")] #[test] fn test_serde() { let s = SmolStr::new("Hello, World"); From 497de131f5f29365699f70672d297b4879fe1389 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:28:44 +0700 Subject: [PATCH 017/132] Fix typo in Cargo.toml. --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 09978f0a98..45a3f2cac8 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -3,7 +3,7 @@ name = "smol_str" version = "0.1.6" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" -description = "small-string optimized stirng type with O(1) clone" +description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" [dependencies] From 5076180f775f93acac8a2674ed876fa8c16f4052 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Fri, 12 Oct 2018 21:27:49 +0700 Subject: [PATCH 018/132] clippy: Add (and test) is_empty method. Since there is a `len` method, clippy suggests having an `is_empty` method as well. --- lib/smol_str/src/lib.rs | 14 ++++++++++++++ lib/smol_str/tests/test.rs | 1 + 2 files changed, 15 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 5df411a56d..bedb6fb5b3 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -38,6 +38,11 @@ impl SmolStr { pub fn len(&self) -> usize { self.0.len() } + + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } } impl Deref for SmolStr { @@ -180,6 +185,15 @@ impl Repr { } } + fn is_empty(&self) -> bool { + match self { + Repr::Heap(data) => data.is_empty(), + Repr::Inline { len, .. } => *len == 0, + // A substring isn't created for an empty string. + Repr::Substring { .. } => false, + } + } + fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index eebdc205a0..f015f2fe77 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -24,6 +24,7 @@ fn check_props(s: &str) -> Result<(), proptest::test_runner::TestCaseError> { let smol = SmolStr::new(s); prop_assert_eq!(smol.as_str(), s); prop_assert_eq!(smol.len(), s.len()); + prop_assert_eq!(smol.is_empty(), s.is_empty()); Ok(()) } From d64d877b808b7eadad8592cd427cbab9d88eef48 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 13 Oct 2018 11:36:06 +0300 Subject: [PATCH 019/132] Propagate inline to inner wrapper --- lib/smol_str/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index bedb6fb5b3..85381b31fd 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -177,6 +177,7 @@ impl Repr { Repr::Heap(text.into().into_boxed_str().into()) } + #[inline(always)] fn len(&self) -> usize { match self { Repr::Heap(data) => data.len(), @@ -185,6 +186,7 @@ impl Repr { } } + #[inline(always)] fn is_empty(&self) -> bool { match self { Repr::Heap(data) => data.is_empty(), From c6c487ea31b540ddd27126ab491c234068627141 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 13 Oct 2018 11:36:56 +0300 Subject: [PATCH 020/132] bump version --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 45a3f2cac8..051ee6dcff 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.6" +version = "0.1.7" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From 373ca5eda7bf809e3c69bb2ab1df3ffd16ae9350 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 21 Dec 2018 14:16:23 +0300 Subject: [PATCH 021/132] add From for String --- lib/smol_str/src/lib.rs | 6 ++++++ lib/smol_str/tests/test.rs | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 85381b31fd..78b0103ef5 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -136,6 +136,12 @@ where } } +impl From for String { + fn from(text: SmolStr) -> Self { + text.to_string() + } +} + const INLINE_CAP: usize = 22; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index f015f2fe77..94ab66dacb 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -20,6 +20,13 @@ fn assert_traits() { f::(); } +#[test] +fn conversions() { + let s: SmolStr = "Hello, World!".into(); + let s: String = s.into(); + assert_eq!(s, "Hello, World!") +} + fn check_props(s: &str) -> Result<(), proptest::test_runner::TestCaseError> { let smol = SmolStr::new(s); prop_assert_eq!(smol.as_str(), s); From 18ed52bd0470497c9c87b003eac019aefd189949 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 21 Dec 2018 14:16:57 +0300 Subject: [PATCH 022/132] bump version --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 051ee6dcff..c01c378c2e 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.7" +version = "0.1.8" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From 05bebe195b4640f9274b0ca0ad024b8ba4d26651 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 11 Jan 2019 13:15:38 +0300 Subject: [PATCH 023/132] add Default for SmolStr --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/src/lib.rs | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index c01c378c2e..dc99ab007a 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.8" +version = "0.1.9" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 78b0103ef5..e355630e47 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -45,6 +45,12 @@ impl SmolStr { } } +impl Default for SmolStr { + fn default() -> SmolStr { + SmolStr::new("") + } +} + impl Deref for SmolStr { type Target = str; From 28c8d02439e3302f8f7b17aec4f836b1accaf5b6 Mon Sep 17 00:00:00 2001 From: "Evgeniy A. Dushistov" Date: Sun, 10 Mar 2019 04:27:42 +0300 Subject: [PATCH 024/132] implement Borrow to make possible search str in HashMap for String { } } +impl Borrow for SmolStr { + fn borrow(&self) -> &str { + self.as_str() + } +} + const INLINE_CAP: usize = 22; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 94ab66dacb..f5b7cf3daf 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -66,3 +66,10 @@ fn test_serde() { let s: SmolStr = serde_json::from_str(&s).unwrap(); assert_eq!(s, "Hello, World"); } + +#[test] +fn test_search_in_hashmap() { + let mut m = ::std::collections::HashMap::::new(); + m.insert("aaa".into(), 17); + assert_eq!(17, *m.get("aaa").unwrap()); +} From 21d82e2c4780e9b0dc176041123cc1ba304fd49d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 31 Mar 2019 14:15:00 +0300 Subject: [PATCH 025/132] add is_heap_allocated --- lib/smol_str/src/lib.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index eb05dc67eb..b4240c9efb 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -26,10 +26,12 @@ impl SmolStr { SmolStr(Repr::new(text)) } + #[inline(always)] pub fn as_str(&self) -> &str { self.0.as_str() } + #[inline(always)] pub fn to_string(&self) -> String { self.as_str().to_string() } @@ -43,6 +45,14 @@ impl SmolStr { pub fn is_empty(&self) -> bool { self.0.is_empty() } + + #[inline(always)] + pub fn is_heap_allocated(&self) -> bool { + match self.0 { + Repr::Heap(..) => true, + _ => false + } + } } impl Default for SmolStr { @@ -214,6 +224,7 @@ impl Repr { } } + #[inline] fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, From 0894bfa4068ca9d33718cb831d5ccb8aa09d441c Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 31 Mar 2019 14:15:41 +0300 Subject: [PATCH 026/132] v0.1.10 --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index dc99ab007a..acf5f3cf63 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.9" +version = "0.1.10" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From a3eb0a1e9b2ffb9e5c7c6a4a157e52f6cc9a21ec Mon Sep 17 00:00:00 2001 From: Kevin Stenerson Date: Tue, 23 Apr 2019 13:56:07 -0600 Subject: [PATCH 027/132] Implement `FromIterator` for `SmolStr` --- lib/smol_str/src/lib.rs | 23 ++++++++++++++++++++++- lib/smol_str/tests/test.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index b4240c9efb..e357872205 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,4 +1,4 @@ -use std::{borrow::Borrow, fmt, hash, ops::Deref, sync::Arc}; +use std::{borrow::Borrow, fmt, hash, iter, ops::Deref, sync::Arc}; /// A `SmolStr` is a string type that has the following properties: /// @@ -143,6 +143,27 @@ impl fmt::Display for SmolStr { } } +impl iter::FromIterator for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + let mut iter = iter.into_iter(); + while let Some(ch) = iter.next() { + let size = ch.len_utf8(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push(ch); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + ch.encode_utf8(&mut buf[len..]); + len += size; + } + SmolStr(Repr::Inline { len: len as u8, buf }) + } +} + impl From for SmolStr where T: Into + AsRef, diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index f5b7cf3daf..beab0780a8 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -73,3 +73,29 @@ fn test_search_in_hashmap() { m.insert("aaa".into(), 17); assert_eq!(17, *m.get("aaa").unwrap()); } + +#[test] +fn test_from_iterator() { + let examples = [ + // Simple keyword-like strings + ("if", false), + ("for", false), + ("impl", false), + + // Strings containing two-byte characters + ("パーティーへ行かないか", true), + ("パーティーへ行か", true), + ("パーティーへ行_", false), + ("和製漢語", false), + ("部落格", false), + ("사회과학원 어학연구소", true), + + // String containin diverse characters + ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), + ]; + for (raw, is_heap) in &examples { + let s: SmolStr = raw.chars().collect(); + assert_eq!(s.as_str(), *raw); + assert_eq!(s.is_heap_allocated(), *is_heap); + } +} From 2b008c4625cbb8381bba14f02ef00d0d7a3a9256 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 21 May 2019 12:51:58 +0300 Subject: [PATCH 028/132] SmolStr: Ord --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/src/lib.rs | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index acf5f3cf63..ae17a208f4 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.10" +version = "0.1.11" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index e357872205..ca64e9077c 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,4 +1,10 @@ -use std::{borrow::Borrow, fmt, hash, iter, ops::Deref, sync::Arc}; +use std::{ + fmt, hash, iter, + borrow::Borrow, + cmp::Ordering, + ops::Deref, + sync::Arc, +}; /// A `SmolStr` is a string type that has the following properties: /// @@ -125,6 +131,18 @@ impl<'a> PartialEq for &'a String { } } +impl Ord for SmolStr { + fn cmp(&self, other: &SmolStr) -> Ordering { + self.as_str().cmp(other.as_str()) + } +} + +impl PartialOrd for SmolStr { + fn partial_cmp(&self, other: &SmolStr) -> Option { + Some(self.cmp(other)) + } +} + impl hash::Hash for SmolStr { fn hash(&self, hasher: &mut H) { self.as_str().hash(hasher) From 4f603f4f5d2ff3354b8456981413e9323ddc61b7 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sat, 25 May 2019 17:39:32 +0200 Subject: [PATCH 029/132] Add FromIterator with &str/&String/String items --- lib/smol_str/src/lib.rs | 52 ++++++++++++++++++++++++++++++++++++++ lib/smol_str/tests/test.rs | 35 +++++++++++++++++-------- 2 files changed, 76 insertions(+), 11 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index ca64e9077c..344c4ae051 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -182,6 +182,58 @@ impl iter::FromIterator for SmolStr { } } +impl iter::FromIterator for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + use std::io::prelude::*; + + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + let mut iter = iter.into_iter(); + while let Some(slice) = iter.next() { + let size = slice.len(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(&slice); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); + len += size; + } + SmolStr(Repr::Inline { len: len as u8, buf }) + } +} + +impl<'a> iter::FromIterator<&'a String> for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + SmolStr::from_iter(iter.into_iter().map(|x| x.as_str())) + } +} + +impl<'a> iter::FromIterator<&'a str> for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + use std::io::prelude::*; + + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + let mut iter = iter.into_iter(); + while let Some(slice) = iter.next() { + let size = slice.len(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(slice); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); + len += size; + } + SmolStr(Repr::Inline { len: len as u8, buf }) + } +} + impl From for SmolStr where T: Into + AsRef, diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index beab0780a8..28081bc3d1 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -27,33 +27,46 @@ fn conversions() { assert_eq!(s, "Hello, World!") } -fn check_props(s: &str) -> Result<(), proptest::test_runner::TestCaseError> { - let smol = SmolStr::new(s); - prop_assert_eq!(smol.as_str(), s); - prop_assert_eq!(smol.len(), s.len()); - prop_assert_eq!(smol.is_empty(), s.is_empty()); +fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { + prop_assert_eq!(smol.as_str(), std_str); + prop_assert_eq!(smol.len(), std_str.len()); + prop_assert_eq!(smol.is_empty(), std_str.is_empty()); Ok(()) } proptest! { #[test] fn roundtrip(s: String) { - check_props(s.as_str())?; + check_props(s.as_str(), SmolStr::new(s.clone()))?; } #[test] fn roundtrip_spaces(s in r"( )*") { - check_props(s.as_str())?; + check_props(s.as_str(), SmolStr::new(s.clone()))?; } #[test] fn roundtrip_newlines(s in r"\n*") { - check_props(s.as_str())?; + check_props(s.as_str(), SmolStr::new(s.clone()))?; } #[test] fn roundtrip_ws(s in r"( |\n)*") { - check_props(s.as_str())?; + check_props(s.as_str(), SmolStr::new(s.clone()))?; + } + + #[test] + fn from_string_iter(slices in proptest::collection::vec(".*", 1..100)) { + let string: String = slices.iter().map(|x| x.as_str()).collect(); + let smol: SmolStr = slices.into_iter().collect(); + check_props(string.as_str(), smol)?; + } + + #[test] + fn from_str_iter(slices in proptest::collection::vec(".*", 1..100)) { + let string: String = slices.iter().map(|x| x.as_str()).collect(); + let smol: SmolStr = slices.iter().collect(); + check_props(string.as_str(), smol)?; } } @@ -75,7 +88,7 @@ fn test_search_in_hashmap() { } #[test] -fn test_from_iterator() { +fn test_from_char_iterator() { let examples = [ // Simple keyword-like strings ("if", false), @@ -90,7 +103,7 @@ fn test_from_iterator() { ("部落格", false), ("사회과학원 어학연구소", true), - // String containin diverse characters + // String containing diverse characters ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), ]; for (raw, is_heap) in &examples { From e1457edb5336472fbf3f21a7bcfe23f7ac974d06 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sat, 25 May 2019 21:28:32 +0200 Subject: [PATCH 030/132] Deduplicate FromIterator code Using a private function that is overly generic. --- lib/smol_str/src/lib.rs | 63 ++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 344c4ae051..af8001ced9 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -182,26 +182,34 @@ impl iter::FromIterator for SmolStr { } } +fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr +where + T: AsRef, + std::string::String: std::iter::Extend, +{ + use std::io::prelude::*; + + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + while let Some(slice) = iter.next() { + let slice = slice.as_ref(); + let size = slice.len(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(&slice); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); + len += size; + } + SmolStr(Repr::Inline { len: len as u8, buf }) +} + impl iter::FromIterator for SmolStr { fn from_iter>(iter: I) -> SmolStr { - use std::io::prelude::*; - - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; - let mut iter = iter.into_iter(); - while let Some(slice) = iter.next() { - let size = slice.len(); - if size + len > INLINE_CAP { - let mut heap = String::with_capacity(size + len); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); - heap.push_str(&slice); - heap.extend(iter); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); - } - (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); - len += size; - } - SmolStr(Repr::Inline { len: len as u8, buf }) + build_from_str_iter(iter.into_iter()) } } @@ -213,24 +221,7 @@ impl<'a> iter::FromIterator<&'a String> for SmolStr { impl<'a> iter::FromIterator<&'a str> for SmolStr { fn from_iter>(iter: I) -> SmolStr { - use std::io::prelude::*; - - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; - let mut iter = iter.into_iter(); - while let Some(slice) = iter.next() { - let size = slice.len(); - if size + len > INLINE_CAP { - let mut heap = String::with_capacity(size + len); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); - heap.push_str(slice); - heap.extend(iter); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); - } - (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); - len += size; - } - SmolStr(Repr::Inline { len: len as u8, buf }) + build_from_str_iter(iter.into_iter()) } } From 03af6342294c3953e0daf294dc746da6dd33fcc2 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:16:37 +0200 Subject: [PATCH 031/132] Explicitly use copy_from_slice --- lib/smol_str/src/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index af8001ced9..684cb8d0fd 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -187,8 +187,6 @@ where T: AsRef, std::string::String: std::iter::Extend, { - use std::io::prelude::*; - let mut len = 0; let mut buf = [0u8; INLINE_CAP]; while let Some(slice) = iter.next() { @@ -201,7 +199,7 @@ where heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } - (&mut buf[len..]).write_all(slice.as_bytes()).unwrap(); + (&mut buf[len..len + size]).copy_from_slice(slice.as_bytes()); len += size; } SmolStr(Repr::Inline { len: len as u8, buf }) From f093bd4ed67256a26445927a155a186d3d178c68 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:16:49 +0200 Subject: [PATCH 032/132] Simplify type names --- lib/smol_str/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 684cb8d0fd..e075b200aa 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -185,7 +185,7 @@ impl iter::FromIterator for SmolStr { fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr where T: AsRef, - std::string::String: std::iter::Extend, + String: iter::Extend, { let mut len = 0; let mut buf = [0u8; INLINE_CAP]; From 3b30f08b61509fd188f2c4c52d6fd8bc434e7a8c Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:17:07 +0200 Subject: [PATCH 033/132] Explicitly test for heap allocation, too --- lib/smol_str/tests/test.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 28081bc3d1..853c0da673 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -31,6 +31,9 @@ fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner prop_assert_eq!(smol.as_str(), std_str); prop_assert_eq!(smol.len(), std_str.len()); prop_assert_eq!(smol.is_empty(), std_str.is_empty()); + if smol.len() <= 22 { + prop_assert!(!smol.is_heap_allocated()); + } Ok(()) } From 6c94863ae9637fbfc7a6753a08eafdc97f00369f Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:17:13 +0200 Subject: [PATCH 034/132] Add simple benchmark --- lib/smol_str/Cargo.toml | 5 ++++ lib/smol_str/benches/building.rs | 44 ++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 lib/smol_str/benches/building.rs diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index ae17a208f4..81f23ff60b 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -12,3 +12,8 @@ serde = { version = "1", optional = true } [dev-dependencies] proptest = "0.8.3" serde_json = "1" +criterion = "0.2" + +[[bench]] +name = "building" +harness = false diff --git a/lib/smol_str/benches/building.rs b/lib/smol_str/benches/building.rs new file mode 100644 index 0000000000..1983314676 --- /dev/null +++ b/lib/smol_str/benches/building.rs @@ -0,0 +1,44 @@ +#[macro_use] +extern crate criterion; +extern crate smol_str; + +use criterion::{Criterion, ParameterizedBenchmark, Throughput}; +use smol_str::SmolStr; + +fn from_str_iter(c: &mut Criterion) { + use std::iter::FromIterator; + + const SIZES: &[usize] = &[0, 5, 10, 15, 20, 2 << 4, 2 << 5, 2 << 6, 2 << 7, 2 << 8]; + + fn test_data(input: &str, size: usize) -> Vec<&str> { + std::iter::repeat(input).take(size / input.len()).collect() + } + + c.bench( + "FromIterator", + ParameterizedBenchmark::new( + "SmolStr, one byte elements", + |b, &&size| { + let src = test_data("x", size); + b.iter(|| SmolStr::from_iter(src.iter().cloned()).len()) + }, + SIZES, + ) + .with_function("SmolStr, five byte elements", |b, &&size| { + let src = test_data("helloo", size); + b.iter(|| SmolStr::from_iter(src.iter().cloned()).len()) + }) + .with_function("String, one byte elements", |b, &&size| { + let src = test_data("x", size); + b.iter(|| String::from_iter(src.iter().cloned()).len()) + }) + .with_function("String, five byte elements", |b, &&size| { + let src = test_data("hello", size); + b.iter(|| String::from_iter(src.iter().cloned()).len()) + }) + .throughput(|elems| Throughput::Bytes(**elems as u32)), + ); +} + +criterion_group!(benches, from_str_iter); +criterion_main!(benches); From 9c77c3ac83ebdbdcb78bead946b163a8aad4d544 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Sun, 26 May 2019 14:27:46 +0200 Subject: [PATCH 035/132] Simplify slicing (no visible perf impact) --- lib/smol_str/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index e075b200aa..b5892cf455 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -199,7 +199,7 @@ where heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } - (&mut buf[len..len + size]).copy_from_slice(slice.as_bytes()); + (&mut buf[len..][..size]).copy_from_slice(slice.as_bytes()); len += size; } SmolStr(Repr::Inline { len: len as u8, buf }) From 2f924f6954486b8d5bd1ce0ec7955a8ea961ab5f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 27 May 2019 09:29:17 +0300 Subject: [PATCH 036/132] add bors --- lib/smol_str/bors.toml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 lib/smol_str/bors.toml diff --git a/lib/smol_str/bors.toml b/lib/smol_str/bors.toml new file mode 100644 index 0000000000..574c56320f --- /dev/null +++ b/lib/smol_str/bors.toml @@ -0,0 +1,4 @@ +status = [ + "continuous-integration/travis-ci/push", +] +delete_merged_branches = true From 6e20c9967aa9b22f7e928a36f2da3a9837a0b941 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 2 Jun 2019 14:38:18 +0300 Subject: [PATCH 037/132] don't use derive --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 81f23ff60b..5595969665 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -7,7 +7,7 @@ description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" [dependencies] -serde = { version = "1", optional = true } +serde = { version = "1", optional = true, default_features = false } [dev-dependencies] proptest = "0.8.3" From e717be12fb26ed86fc1b917f57a60030eea9e7ad Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 7 Jul 2019 13:59:33 +0300 Subject: [PATCH 038/132] add cosnt-fn ctor --- lib/smol_str/src/lib.rs | 48 +++++++++++++++++++++++++++++--------- lib/smol_str/tests/test.rs | 18 ++++++++++++-- 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index b5892cf455..449c199682 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,10 +1,4 @@ -use std::{ - fmt, hash, iter, - borrow::Borrow, - cmp::Ordering, - ops::Deref, - sync::Arc, -}; +use std::{borrow::Borrow, cmp::Ordering, fmt, hash, iter, ops::Deref, sync::Arc}; /// A `SmolStr` is a string type that has the following properties: /// @@ -25,6 +19,32 @@ use std::{ pub struct SmolStr(Repr); impl SmolStr { + /// Constructs an inline variant of `SmolStr` at compile time. + /// + /// `len` must be short (<= 22), `bytes` must be ascii. If `len` is smaller + /// than the actual len of `bytes`, the string is truncated. + pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { + let _len_is_short = [(); INLINE_CAP + 1][len]; + + const ZEROS: &[u8] = &[0; INLINE_CAP]; + + let mut buf = [0; INLINE_CAP]; + macro_rules! s { + ($($idx:literal),*) => ( $(s!(set $idx);)* ); + (set $idx:literal) => ({ + let src: &[u8] = [ZEROS, bytes][($idx < len) as usize]; + let b = src[$idx]; + let _is_ascii = [(); 128][b as usize]; + buf[$idx] = b + }); + } + s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + SmolStr(Repr::Inline { + len: len as u8, + buf, + }) + } + pub fn new(text: T) -> SmolStr where T: Into + AsRef, @@ -56,7 +76,7 @@ impl SmolStr { pub fn is_heap_allocated(&self) -> bool { match self.0 { Repr::Heap(..) => true, - _ => false + _ => false, } } } @@ -178,11 +198,14 @@ impl iter::FromIterator for SmolStr { ch.encode_utf8(&mut buf[len..]); len += size; } - SmolStr(Repr::Inline { len: len as u8, buf }) + SmolStr(Repr::Inline { + len: len as u8, + buf, + }) } } -fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr +fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr where T: AsRef, String: iter::Extend, @@ -202,7 +225,10 @@ where (&mut buf[len..][..size]).copy_from_slice(slice.as_bytes()); len += size; } - SmolStr(Repr::Inline { len: len as u8, buf }) + SmolStr(Repr::Inline { + len: len as u8, + buf, + }) } impl iter::FromIterator for SmolStr { diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 853c0da673..b2a2ea0c7e 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -27,6 +27,22 @@ fn conversions() { assert_eq!(s, "Hello, World!") } +#[test] +fn const_fn_ctor() { + const EMPTY: SmolStr = SmolStr::new_inline_from_ascii(0, b""); + const A: SmolStr = SmolStr::new_inline_from_ascii(1, b"A"); + const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); + const LONG: SmolStr = SmolStr::new_inline_from_ascii(22, b"ABCDEFGHIZKLMNOPQRSTUV"); + + // const TOO_LONG: SmolStr = SmolStr::new_inline_from_ascii(23, b"ABCDEFGHIZKLMNOPQRSTUVW"); + // const NON_ASCII: SmolStr = SmolStr::new_inline_from_ascii(2, &[209, 139]); + + assert_eq!(EMPTY, SmolStr::from("")); + assert_eq!(A, SmolStr::from("A")); + assert_eq!(HELLO, SmolStr::from("HELLO")); + assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUV")); +} + fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { prop_assert_eq!(smol.as_str(), std_str); prop_assert_eq!(smol.len(), std_str.len()); @@ -97,7 +113,6 @@ fn test_from_char_iterator() { ("if", false), ("for", false), ("impl", false), - // Strings containing two-byte characters ("パーティーへ行かないか", true), ("パーティーへ行か", true), @@ -105,7 +120,6 @@ fn test_from_char_iterator() { ("和製漢語", false), ("部落格", false), ("사회과학원 어학연구소", true), - // String containing diverse characters ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), ]; From 9b27bfaddff07cbbc70bd9550462909835262db2 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 7 Jul 2019 14:00:33 +0300 Subject: [PATCH 039/132] pubish v0.1.12 --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 5595969665..1fd0841324 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.11" +version = "0.1.12" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From a390271aec827249af418af65b01fad81a994570 Mon Sep 17 00:00:00 2001 From: Pascal Hertleif Date: Mon, 22 Jul 2019 13:28:12 +0200 Subject: [PATCH 040/132] new_inline_from_ascii: Docs and compile-fail tests --- lib/smol_str/src/lib.rs | 67 +++++++++++++++++++++++++++++++++++--- lib/smol_str/tests/test.rs | 3 -- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 449c199682..c7bbb4a875 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -21,8 +21,65 @@ pub struct SmolStr(Repr); impl SmolStr { /// Constructs an inline variant of `SmolStr` at compile time. /// - /// `len` must be short (<= 22), `bytes` must be ascii. If `len` is smaller - /// than the actual len of `bytes`, the string is truncated. + /// # Parameters + /// + /// - `len`: Must be short (≤ 22 bytes) + /// - `bytes`: Must be ASCII bytes, and there must be at least `len` of + /// them. If `len` is smaller than the actual len of `bytes`, the string + /// is truncated. + /// + /// # Returns + /// + /// A constant `SmolStr` with inline data. + /// + /// # Examples + /// + /// ```rust + /// # use smol_str::SmolStr; + /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii(5, b"hello"); + /// ``` + /// + /// Given a `len` smaller than the number of bytes in `bytes`, the string is + /// cut off: + /// + /// ```rust + /// # use smol_str::SmolStr; + /// const SHORT: SmolStr = SmolStr::new_inline_from_ascii(5, b"hello world"); + /// assert_eq!(SHORT.as_str(), "hello"); + /// ``` + /// + /// ## Compile-time errors + /// + /// This will **fail** at compile-time with a message like "index out of + /// bounds" on a `_len_is_short` because the string is too large: + /// + /// ```rust,compile_fail + /// # use smol_str::SmolStr; + /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii( + /// 49, + /// b"hello world, how are you doing this fine morning?", + /// ); + /// ``` + /// + /// Similarly, this will **fail** to compile with "index out of bounds" on + /// an `_is_ascii` binding because it contains non-ASCII characters: + /// + /// ```rust,compile_fail + /// # use smol_str::SmolStr; + /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii( + /// 2, + /// &[209, 139], + /// ); + /// ``` + /// + /// Last but not least, given a `len` that is larger than the number of + /// bytes in `bytes`, it will fail to compile with "index out of bounds: the + /// len is 5 but the index is 5" on a binding called `byte`: + /// + /// ```rust,compile_fail + /// # use smol_str::SmolStr; + /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii(10, b"hello"); + /// ``` pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { let _len_is_short = [(); INLINE_CAP + 1][len]; @@ -33,9 +90,9 @@ impl SmolStr { ($($idx:literal),*) => ( $(s!(set $idx);)* ); (set $idx:literal) => ({ let src: &[u8] = [ZEROS, bytes][($idx < len) as usize]; - let b = src[$idx]; - let _is_ascii = [(); 128][b as usize]; - buf[$idx] = b + let byte = src[$idx]; + let _is_ascii = [(); 128][byte as usize]; + buf[$idx] = byte }); } s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index b2a2ea0c7e..13e0f01e2f 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -34,9 +34,6 @@ fn const_fn_ctor() { const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); const LONG: SmolStr = SmolStr::new_inline_from_ascii(22, b"ABCDEFGHIZKLMNOPQRSTUV"); - // const TOO_LONG: SmolStr = SmolStr::new_inline_from_ascii(23, b"ABCDEFGHIZKLMNOPQRSTUVW"); - // const NON_ASCII: SmolStr = SmolStr::new_inline_from_ascii(2, &[209, 139]); - assert_eq!(EMPTY, SmolStr::from("")); assert_eq!(A, SmolStr::from("A")); assert_eq!(HELLO, SmolStr::from("HELLO")); From 1bb2d0f36c1b94481cdd5de3df56ca72a1594ac6 Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Wed, 25 Sep 2019 14:46:30 +0200 Subject: [PATCH 041/132] Demonstrate bug with serde from_reader --- lib/smol_str/Cargo.toml | 2 + lib/smol_str/tests/test.rs | 94 +++++++++++++++++++++++++++++++++++--- 2 files changed, 89 insertions(+), 7 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 1fd0841324..2a795643fa 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -12,6 +12,8 @@ serde = { version = "1", optional = true, default_features = false } [dev-dependencies] proptest = "0.8.3" serde_json = "1" +serde_derive = "1" +serde = "1" criterion = "0.2" [[bench]] diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 13e0f01e2f..8814036d1f 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -2,6 +2,8 @@ extern crate serde_json; extern crate smol_str; #[macro_use] extern crate proptest; +#[cfg(feature = "serde")] +extern crate serde_derive; use smol_str::SmolStr; @@ -87,13 +89,91 @@ proptest! { } #[cfg(feature = "serde")] -#[test] -fn test_serde() { - let s = SmolStr::new("Hello, World"); - let s = serde_json::to_string(&s).unwrap(); - assert_eq!(s, "\"Hello, World\""); - let s: SmolStr = serde_json::from_str(&s).unwrap(); - assert_eq!(s, "Hello, World"); +mod serde_tests { + use super::*; + use std::collections::HashMap; + use serde_derive::{Serialize, Deserialize}; + + #[derive(Serialize, Deserialize)] + struct SmolStrStruct { + pub(crate) s: SmolStr, + pub(crate) vec: Vec, + pub(crate) map: HashMap + } + + #[test] + fn test_serde() { + let s = SmolStr::new("Hello, World"); + let s = serde_json::to_string(&s).unwrap(); + assert_eq!(s, "\"Hello, World\""); + let s: SmolStr = serde_json::from_str(&s).unwrap(); + assert_eq!(s, "Hello, World"); + } + + #[test] + fn test_serde_reader() { + let s = SmolStr::new("Hello, World"); + let s = serde_json::to_string(&s).unwrap(); + assert_eq!(s, "\"Hello, World\""); + let s: SmolStr = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + assert_eq!(s, "Hello, World"); + } + + #[test] + fn test_serde_struct() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let struct_ = SmolStrStruct { + s: SmolStr::new("Hello, World"), + vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")], + map, + }; + let s = serde_json::to_string(&struct_).unwrap(); + let _new_struct: SmolStrStruct = serde_json::from_str(&s).unwrap(); + } + + #[test] + fn test_serde_struct_reader() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let struct_ = SmolStrStruct { + s: SmolStr::new("Hello, World"), + vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")], + map, + }; + let s = serde_json::to_string(&struct_).unwrap(); + let _new_struct: SmolStrStruct = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + } + + #[test] + fn test_serde_hashmap() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let s = serde_json::to_string(&map).unwrap(); + let _s: HashMap = serde_json::from_str(&s).unwrap(); + } + + #[test] + fn test_serde_hashmap_reader() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let s = serde_json::to_string(&map).unwrap(); + let _s: HashMap = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + } + + #[test] + fn test_serde_vec() { + let vec = vec![SmolStr::new(""), SmolStr::new("b")]; + let s = serde_json::to_string(&vec).unwrap(); + let _s: Vec = serde_json::from_str(&s).unwrap(); + } + + #[test] + fn test_serde_vec_reader() { + let vec = vec![SmolStr::new(""), SmolStr::new("b")]; + let s = serde_json::to_string(&vec).unwrap(); + let _s: Vec = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + } } #[test] From 6e6b2059988a643576b8a39716924a59b7924c7d Mon Sep 17 00:00:00 2001 From: Brendan Molloy Date: Thu, 26 Sep 2019 10:17:56 +0200 Subject: [PATCH 042/132] Implement visitor --- lib/smol_str/Cargo.toml | 4 +- lib/smol_str/src/lib.rs | 78 ++++++++++++++++++++++++++++++++++++-- lib/smol_str/tests/test.rs | 11 ++---- 3 files changed, 81 insertions(+), 12 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 2a795643fa..8d4373357a 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -5,6 +5,7 @@ authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" +edition = "2018" [dependencies] serde = { version = "1", optional = true, default_features = false } @@ -12,8 +13,7 @@ serde = { version = "1", optional = true, default_features = false } [dev-dependencies] proptest = "0.8.3" serde_json = "1" -serde_derive = "1" -serde = "1" +serde = { version = "1", features = [ "derive" ] } criterion = "0.2" [[bench]] diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index c7bbb4a875..9e35158177 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -408,9 +408,81 @@ impl Repr { #[cfg(feature = "serde")] mod serde { - extern crate serde; + use ::serde::de::{Deserializer, Error, Unexpected, Visitor}; + use std::fmt; + use super::SmolStr; - use SmolStr; + // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 + fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct SmolStrVisitor; + + impl<'a> Visitor<'a> for SmolStrVisitor { + type Value = SmolStr; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string") + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_borrowed_str(self, v: &'a str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + match std::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_borrowed_bytes(self, v: &'a [u8]) -> Result + where + E: Error, + { + match std::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + match String::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(e) => Err(Error::invalid_value( + Unexpected::Bytes(&e.into_bytes()), + &self, + )), + } + } + } + + deserializer.deserialize_str(SmolStrVisitor) + } impl serde::Serialize for SmolStr { fn serialize(&self, serializer: S) -> Result @@ -426,7 +498,7 @@ mod serde { where D: serde::Deserializer<'de>, { - <&'de str>::deserialize(deserializer).map(SmolStr::from) + smol_str(deserializer) } } } diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 8814036d1f..ab2235d3c5 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -1,9 +1,5 @@ -extern crate serde_json; -extern crate smol_str; #[macro_use] extern crate proptest; -#[cfg(feature = "serde")] -extern crate serde_derive; use smol_str::SmolStr; @@ -91,14 +87,14 @@ proptest! { #[cfg(feature = "serde")] mod serde_tests { use super::*; + use serde::{Serialize, Deserialize}; use std::collections::HashMap; - use serde_derive::{Serialize, Deserialize}; #[derive(Serialize, Deserialize)] struct SmolStrStruct { pub(crate) s: SmolStr, pub(crate) vec: Vec, - pub(crate) map: HashMap + pub(crate) map: HashMap, } #[test] @@ -158,7 +154,8 @@ mod serde_tests { let mut map = HashMap::new(); map.insert(SmolStr::new("a"), SmolStr::new("ohno")); let s = serde_json::to_string(&map).unwrap(); - let _s: HashMap = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + let _s: HashMap = + serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); } #[test] From 53b5fd690f1ee951f66900ed237a5dfdeee54ea2 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 26 Sep 2019 11:48:37 +0300 Subject: [PATCH 043/132] publish v0.1.13 --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 8d4373357a..219b95fdf5 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.12" +version = "0.1.13" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" From a9682d10ab338d22ea6727530e1e0a219a3cb5c1 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 9 Oct 2019 14:24:56 +0300 Subject: [PATCH 044/132] enable std feature for serde --- lib/smol_str/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 219b95fdf5..35b98f0e4d 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.13" +version = "0.1.14" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" @@ -13,7 +13,7 @@ serde = { version = "1", optional = true, default_features = false } [dev-dependencies] proptest = "0.8.3" serde_json = "1" -serde = { version = "1", features = [ "derive" ] } +serde = { version = "1", features = [ "derive", "std" ] } criterion = "0.2" [[bench]] From 9142ee13b45a3362ef661444acdc72a36fea52c6 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 9 Oct 2019 14:48:03 +0300 Subject: [PATCH 045/132] actually enabled serde std feature --- lib/smol_str/Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 35b98f0e4d..b691d2d2bb 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.14" +version = "0.1.15" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" @@ -8,12 +8,12 @@ license = "MIT OR Apache-2.0" edition = "2018" [dependencies] -serde = { version = "1", optional = true, default_features = false } +serde = { version = "1", optional = true, default_features = false, features = [ "std" ] } [dev-dependencies] proptest = "0.8.3" serde_json = "1" -serde = { version = "1", features = [ "derive", "std" ] } +serde = { version = "1", features = [ "derive" ] } criterion = "0.2" [[bench]] From bcb69c61dfdba00e4fbd2ad0f983e8162507383a Mon Sep 17 00:00:00 2001 From: Shotaro Yamada Date: Fri, 10 Jan 2020 00:49:00 +0900 Subject: [PATCH 046/132] Do not count spaces --- lib/smol_str/src/lib.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 9e35158177..c0c61c80d7 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -359,9 +359,11 @@ impl Repr { } let newlines = text.bytes().take_while(|&b| b == b'\n').count(); - let spaces = text[newlines..].bytes().take_while(|&b| b == b' ').count(); - if newlines + spaces == len && newlines <= N_NEWLINES && spaces <= N_SPACES { - return Repr::Substring { newlines, spaces }; + if text[newlines..].bytes().all(|b| b == b' ') { + let spaces = len - newlines; + if newlines <= N_NEWLINES && spaces <= N_SPACES { + return Repr::Substring { newlines, spaces }; + } } } From f7821f55c85cdbfdcd042f393075387bcb8960e5 Mon Sep 17 00:00:00 2001 From: Shotaro Yamada Date: Fri, 10 Jan 2020 00:50:08 +0900 Subject: [PATCH 047/132] Improve `Arc` creation While using `Into` could avoid an allocation in `String` -> `Box`, converting `Box` into `Arc` deallocates and re-allocates anyway. --- lib/smol_str/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index c0c61c80d7..e4cca47176 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -104,7 +104,7 @@ impl SmolStr { pub fn new(text: T) -> SmolStr where - T: Into + AsRef, + T: AsRef, { SmolStr(Repr::new(text)) } @@ -343,7 +343,7 @@ enum Repr { impl Repr { fn new(text: T) -> Self where - T: Into + AsRef, + T: AsRef, { { let text = text.as_ref(); @@ -367,7 +367,7 @@ impl Repr { } } - Repr::Heap(text.into().into_boxed_str().into()) + Repr::Heap(text.as_ref().into()) } #[inline(always)] From 38c343ca5a89ef8265670b4b3ea660bd5cfb04be Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 6 Jul 2020 15:31:35 +0200 Subject: [PATCH 048/132] Speadup From for String Thanks htpps://github.com/fasterthanlime! --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index b691d2d2bb..eeb3b1b092 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.15" +version = "0.1.16" authors = ["Aleksey Kladov "] repository = "https://github.com/matklad/smol_str" description = "small-string optimized string type with O(1) clone" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index e4cca47176..bdd228b89a 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -317,7 +317,7 @@ where impl From for String { fn from(text: SmolStr) -> Self { - text.to_string() + text.as_str().into() } } From 1235f1c11a699d56d8c415d02287157565be6566 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 6 Jul 2020 13:12:41 -0400 Subject: [PATCH 049/132] Rustfmt --- lib/smol_str/src/lib.rs | 2 +- lib/smol_str/tests/test.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index bdd228b89a..45f1cb2542 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -410,9 +410,9 @@ impl Repr { #[cfg(feature = "serde")] mod serde { + use super::SmolStr; use ::serde::de::{Deserializer, Error, Unexpected, Visitor}; use std::fmt; - use super::SmolStr; // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index ab2235d3c5..57c0e8447f 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -87,7 +87,7 @@ proptest! { #[cfg(feature = "serde")] mod serde_tests { use super::*; - use serde::{Serialize, Deserialize}; + use serde::{Deserialize, Serialize}; use std::collections::HashMap; #[derive(Serialize, Deserialize)] From d97525eb5e611dbb1bf4c5bf7133883caabe8549 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 6 Jul 2020 16:07:29 -0400 Subject: [PATCH 050/132] Avoid checking long strings for matching against whitespace Previously, the string was checked for starting with newlines and ending with spaces, then ensuring that the length of those substrings were short enough to use our constant. Instead, only do the check for as many items as we have in the WS constant. In the worst case, this avoids an O(n) check if the input is a long string of `\n`, possibly followed by a long string of spaces. --- lib/smol_str/src/lib.rs | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 45f1cb2542..7314d270f3 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,4 +1,10 @@ -use std::{borrow::Borrow, cmp::Ordering, fmt, hash, iter, ops::Deref, sync::Arc}; +use std::{ + borrow::Borrow, + cmp::{self, Ordering}, + fmt, hash, iter, + ops::Deref, + sync::Arc, +}; /// A `SmolStr` is a string type that has the following properties: /// @@ -358,10 +364,17 @@ impl Repr { }; } - let newlines = text.bytes().take_while(|&b| b == b'\n').count(); - if text[newlines..].bytes().all(|b| b == b' ') { - let spaces = len - newlines; - if newlines <= N_NEWLINES && spaces <= N_SPACES { + if len <= N_NEWLINES + N_SPACES { + let bytes = text.as_bytes(); + let possible_newline_count = cmp::min(len, N_NEWLINES); + let newlines = bytes[..possible_newline_count] + .iter() + .take_while(|&&b| b == b'\n') + .count(); + let possible_space_count = len - newlines; + if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') + { + let spaces = possible_space_count; return Repr::Substring { newlines, spaces }; } } From 1fea4a5b2adf08777184b9db71e3ec3be33d83e8 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 6 Jul 2020 18:16:33 -0400 Subject: [PATCH 051/132] Avoid possible extra monomorphization By pulling `from_char_iter()` into a function, we can avoid multiple monomorphizations of `FromIterator` when multiple `IntoIterator`s result in the same iterator type. --- lib/smol_str/src/lib.rs | 42 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index bdd228b89a..60fc81e227 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -136,6 +136,27 @@ impl SmolStr { _ => false, } } + + fn from_char_iter>(mut iter: I) -> SmolStr { + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + while let Some(ch) = iter.next() { + let size = ch.len_utf8(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push(ch); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + ch.encode_utf8(&mut buf[len..]); + len += size; + } + SmolStr(Repr::Inline { + len: len as u8, + buf, + }) + } } impl Default for SmolStr { @@ -240,25 +261,8 @@ impl fmt::Display for SmolStr { impl iter::FromIterator for SmolStr { fn from_iter>(iter: I) -> SmolStr { - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; - let mut iter = iter.into_iter(); - while let Some(ch) = iter.next() { - let size = ch.len_utf8(); - if size + len > INLINE_CAP { - let mut heap = String::with_capacity(size + len); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); - heap.push(ch); - heap.extend(iter); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); - } - ch.encode_utf8(&mut buf[len..]); - len += size; - } - SmolStr(Repr::Inline { - len: len as u8, - buf, - }) + let iter = iter.into_iter(); + Self::from_char_iter(iter) } } From 2b15d9c1f5c3346aff261a15758758648d3d51b4 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 6 Jul 2020 18:23:01 -0400 Subject: [PATCH 052/132] Use the number of remaining elements in the char iter for allocation When collecting from an iterator of chars, when expanding past INLINE_CAP, include extra space for at least one byte per char for any remaining known size. --- lib/smol_str/src/lib.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 60fc81e227..c449f3611c 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -138,12 +138,18 @@ impl SmolStr { } fn from_char_iter>(mut iter: I) -> SmolStr { + let (min_size, _) = iter.size_hint(); + if min_size > INLINE_CAP { + let heap: String = iter.collect(); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } let mut len = 0; let mut buf = [0u8; INLINE_CAP]; while let Some(ch) = iter.next() { let size = ch.len_utf8(); if size + len > INLINE_CAP { - let mut heap = String::with_capacity(size + len); + let (min_remaining, _) = iter.size_hint(); + let mut heap = String::with_capacity(size + len + min_remaining); heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); heap.push(ch); heap.extend(iter); From 667c63685d925fbecb1d24673c49f011c26cf112 Mon Sep 17 00:00:00 2001 From: Atul Bhosale Date: Sun, 5 Jul 2020 23:12:18 +0530 Subject: [PATCH 053/132] Add fmt tidy test --- lib/smol_str/tests/tidy.rs | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 lib/smol_str/tests/tidy.rs diff --git a/lib/smol_str/tests/tidy.rs b/lib/smol_str/tests/tidy.rs new file mode 100644 index 0000000000..a716e35b2f --- /dev/null +++ b/lib/smol_str/tests/tidy.rs @@ -0,0 +1,46 @@ +use std::{ + env, + path::{Path, PathBuf}, + process::{Command, Stdio}, +}; + +fn project_root() -> PathBuf { + PathBuf::from( + env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned()), + ) +} + +fn run(cmd: &str, dir: impl AsRef) -> Result<(), ()> { + let mut args: Vec<_> = cmd.split_whitespace().collect(); + let bin = args.remove(0); + println!("> {}", cmd); + let output = Command::new(bin) + .args(args) + .current_dir(dir) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .output() + .map_err(drop)?; + if output.status.success() { + Ok(()) + } else { + let stdout = String::from_utf8(output.stdout).map_err(drop)?; + print!("{}", stdout); + Err(()) + } +} + +#[test] +fn check_code_formatting() { + let dir = project_root(); + if run("rustfmt +stable --version", &dir).is_err() { + panic!( + "failed to run rustfmt from toolchain 'stable'; \ + please run `rustup component add rustfmt --toolchain stable` to install it.", + ); + } + if run("cargo +stable fmt -- --check", &dir).is_err() { + panic!("code is not properly formatted; please format the code by running `cargo fmt`") + } +} From d40b0bc1b53e67aa2cbb9ca076278a0d3f88f935 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:43:24 +0200 Subject: [PATCH 054/132] Add new_inline const-fn constructor --- lib/smol_str/src/lib.rs | 79 +++++++++----------------------------- lib/smol_str/tests/test.rs | 14 +++++++ 2 files changed, 32 insertions(+), 61 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 945bbc9d70..6136878b3e 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -25,67 +25,7 @@ use std::{ pub struct SmolStr(Repr); impl SmolStr { - /// Constructs an inline variant of `SmolStr` at compile time. - /// - /// # Parameters - /// - /// - `len`: Must be short (≤ 22 bytes) - /// - `bytes`: Must be ASCII bytes, and there must be at least `len` of - /// them. If `len` is smaller than the actual len of `bytes`, the string - /// is truncated. - /// - /// # Returns - /// - /// A constant `SmolStr` with inline data. - /// - /// # Examples - /// - /// ```rust - /// # use smol_str::SmolStr; - /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii(5, b"hello"); - /// ``` - /// - /// Given a `len` smaller than the number of bytes in `bytes`, the string is - /// cut off: - /// - /// ```rust - /// # use smol_str::SmolStr; - /// const SHORT: SmolStr = SmolStr::new_inline_from_ascii(5, b"hello world"); - /// assert_eq!(SHORT.as_str(), "hello"); - /// ``` - /// - /// ## Compile-time errors - /// - /// This will **fail** at compile-time with a message like "index out of - /// bounds" on a `_len_is_short` because the string is too large: - /// - /// ```rust,compile_fail - /// # use smol_str::SmolStr; - /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii( - /// 49, - /// b"hello world, how are you doing this fine morning?", - /// ); - /// ``` - /// - /// Similarly, this will **fail** to compile with "index out of bounds" on - /// an `_is_ascii` binding because it contains non-ASCII characters: - /// - /// ```rust,compile_fail - /// # use smol_str::SmolStr; - /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii( - /// 2, - /// &[209, 139], - /// ); - /// ``` - /// - /// Last but not least, given a `len` that is larger than the number of - /// bytes in `bytes`, it will fail to compile with "index out of bounds: the - /// len is 5 but the index is 5" on a binding called `byte`: - /// - /// ```rust,compile_fail - /// # use smol_str::SmolStr; - /// const IDENT: SmolStr = SmolStr::new_inline_from_ascii(10, b"hello"); - /// ``` + #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { let _len_is_short = [(); INLINE_CAP + 1][len]; @@ -108,6 +48,23 @@ impl SmolStr { }) } + /// Constructs inline variant of `SmolStr`. + /// + /// Panics if `text.len() > 22`. + #[inline] + pub const fn new_inline(text: &str) -> SmolStr { + let mut buf = [0; INLINE_CAP]; + let mut i = 0; + while i < text.len() { + buf[i] = text.as_bytes()[i]; + i += 1 + } + SmolStr(Repr::Inline { + len: text.len() as u8, + buf, + }) + } + pub fn new(text: T) -> SmolStr where T: AsRef, diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 57c0e8447f..b067e00904 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -27,6 +27,20 @@ fn conversions() { #[test] fn const_fn_ctor() { + const EMPTY: SmolStr = SmolStr::new_inline(""); + const A: SmolStr = SmolStr::new_inline("A"); + const HELLO: SmolStr = SmolStr::new_inline("HELLO"); + const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUV"); + + assert_eq!(EMPTY, SmolStr::from("")); + assert_eq!(A, SmolStr::from("A")); + assert_eq!(HELLO, SmolStr::from("HELLO")); + assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUV")); +} + +#[allow(deprecated)] +#[test] +fn old_const_fn_ctor() { const EMPTY: SmolStr = SmolStr::new_inline_from_ascii(0, b""); const A: SmolStr = SmolStr::new_inline_from_ascii(1, b"A"); const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); From e412a0199fad66dc1d193aa974c21d4f6d8b1e24 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:49:57 +0200 Subject: [PATCH 055/132] :arrow_up: proptest --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/tests/test.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index eeb3b1b092..8f760011d9 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -11,7 +11,7 @@ edition = "2018" serde = { version = "1", optional = true, default_features = false, features = [ "std" ] } [dev-dependencies] -proptest = "0.8.3" +proptest = "0.10" serde_json = "1" serde = { version = "1", features = [ "derive" ] } criterion = "0.2" diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index b067e00904..537df8ddb2 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -1,5 +1,4 @@ -#[macro_use] -extern crate proptest; +use proptest::{prop_assert, prop_assert_eq, proptest}; use smol_str::SmolStr; From 3d5b7e3476f91280aedda3900c13838b00d64a9d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:54:54 +0200 Subject: [PATCH 056/132] Switch CI to actions --- lib/smol_str/.github/ci.rs | 116 +++++++++++++++++++++++++ lib/smol_str/.github/workflows/ci.yaml | 38 ++++++++ lib/smol_str/.gitignore | 3 +- lib/smol_str/.travis.yml | 4 - lib/smol_str/bors.toml | 4 +- 5 files changed, 157 insertions(+), 8 deletions(-) create mode 100644 lib/smol_str/.github/ci.rs create mode 100644 lib/smol_str/.github/workflows/ci.yaml delete mode 100644 lib/smol_str/.travis.yml diff --git a/lib/smol_str/.github/ci.rs b/lib/smol_str/.github/ci.rs new file mode 100644 index 0000000000..b293ebbcb7 --- /dev/null +++ b/lib/smol_str/.github/ci.rs @@ -0,0 +1,116 @@ +use std::{ + env, fs, + process::{self, Command, ExitStatus, Stdio}, + time::Instant, +}; + +type Error = Box; +type Result = std::result::Result; + +fn main() { + if let Err(err) = try_main() { + eprintln!("{}", err); + process::exit(1); + } +} + +fn try_main() -> Result<()> { + let cwd = env::current_dir()?; + let cargo_toml = cwd.join("Cargo.toml"); + assert!( + cargo_toml.exists(), + "Cargo.toml not found, cwd: {}", + cwd.display() + ); + + { + let _s = Section::new("BUILD"); + shell("cargo test --all-features --workspace --no-run")?; + } + + { + let _s = Section::new("TEST"); + shell("cargo test --all-features --workspace")?; + } + + let current_branch = shell_output("git branch --show-current")?; + if ¤t_branch == "master" { + let _s = Section::new("PUBLISH"); + let manifest = fs::read_to_string(&cargo_toml)?; + let version = get_field(&manifest, "version")?; + let tag = format!("v{}", version); + let tags = shell_output("git tag --list")?; + + if !tags.contains(&tag) { + let token = env::var("CRATES_IO_TOKEN").unwrap(); + shell(&format!("git tag v{}", version))?; + shell(&format!("cargo publish --token {}", token))?; + shell("git push --tags")?; + } + } + Ok(()) +} + +fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> { + for line in text.lines() { + let words = line.split_ascii_whitespace().collect::>(); + match words.as_slice() { + [n, "=", v, ..] if n.trim() == name => { + assert!(v.starts_with('"') && v.ends_with('"')); + return Ok(&v[1..v.len() - 1]); + } + _ => (), + } + } + Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))? +} + +fn shell(cmd: &str) -> Result<()> { + let status = command(cmd).status()?; + check_status(status) +} + +fn shell_output(cmd: &str) -> Result { + let output = command(cmd).stderr(Stdio::inherit()).output()?; + check_status(output.status)?; + let res = String::from_utf8(output.stdout)?; + let res = res.trim().to_string(); + println!("{}", res); + Ok(res) +} + +fn command(cmd: &str) -> Command { + eprintln!("> {}", cmd); + let words = cmd.split_ascii_whitespace().collect::>(); + let (cmd, args) = words.split_first().unwrap(); + let mut res = Command::new(cmd); + res.args(args); + res +} + +fn check_status(status: ExitStatus) -> Result<()> { + if !status.success() { + Err(format!("$status: {}", status))?; + } + Ok(()) +} + +struct Section { + name: &'static str, + start: Instant, +} + +impl Section { + fn new(name: &'static str) -> Section { + println!("::group::{}", name); + let start = Instant::now(); + Section { name, start } + } +} + +impl Drop for Section { + fn drop(&mut self) { + eprintln!("{}: {:.2?}", self.name, self.start.elapsed()); + println!("::endgroup::"); + } +} diff --git a/lib/smol_str/.github/workflows/ci.yaml b/lib/smol_str/.github/workflows/ci.yaml new file mode 100644 index 0000000000..b1bc2175ca --- /dev/null +++ b/lib/smol_str/.github/workflows/ci.yaml @@ -0,0 +1,38 @@ +name: CI +on: + pull_request: + push: + branches: + - master + - staging + - trying + +env: + CARGO_INCREMENTAL: 0 + CARGO_NET_RETRY: 10 + CI: 1 + RUST_BACKTRACE: short + RUSTFLAGS: -D warnings + RUSTUP_MAX_RETRIES: 10 + +jobs: + rust: + name: Rust + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Install Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + override: true + + - run: rustc ./.github/ci.rs && ./ci + env: + CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} diff --git a/lib/smol_str/.gitignore b/lib/smol_str/.gitignore index 4470988469..6b500aacba 100644 --- a/lib/smol_str/.gitignore +++ b/lib/smol_str/.gitignore @@ -1,2 +1,3 @@ -target/ +/target +/ci Cargo.lock \ No newline at end of file diff --git a/lib/smol_str/.travis.yml b/lib/smol_str/.travis.yml deleted file mode 100644 index 56abf36895..0000000000 --- a/lib/smol_str/.travis.yml +++ /dev/null @@ -1,4 +0,0 @@ -language: rust - -script: - - cargo test --all-features diff --git a/lib/smol_str/bors.toml b/lib/smol_str/bors.toml index 574c56320f..b92b99ac30 100644 --- a/lib/smol_str/bors.toml +++ b/lib/smol_str/bors.toml @@ -1,4 +1,2 @@ -status = [ - "continuous-integration/travis-ci/push", -] +status = [ "Rust" ] delete_merged_branches = true From 6c8f7ce3cc7c3c906783337d4fcd3f111b8a6b00 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:58:53 +0200 Subject: [PATCH 057/132] Bump major version --- lib/smol_str/Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 8f760011d9..683aeddd55 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "smol_str" -version = "0.1.16" -authors = ["Aleksey Kladov "] -repository = "https://github.com/matklad/smol_str" +version = "0.1.17" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" +repository = "https://github.com/matklad/smol_str" +authors = ["Aleksey Kladov "] edition = "2018" [dependencies] From 4ff9ad2b27832d51f7819700b06fd538f3453240 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 20 Sep 2020 09:59:31 +0200 Subject: [PATCH 058/132] Drop benchmarking I don't really look at the results of the benchmarks anyway, so having them in the repo creates a false sense of benchmarkdness. If I get to implementing proper benchmarking, I'd probably stay away from criterion -- we need something much much simpler for this crate. --- lib/smol_str/Cargo.toml | 5 ---- lib/smol_str/benches/building.rs | 44 -------------------------------- 2 files changed, 49 deletions(-) delete mode 100644 lib/smol_str/benches/building.rs diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 683aeddd55..fee00ec2ba 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -14,8 +14,3 @@ serde = { version = "1", optional = true, default_features = false, features = [ proptest = "0.10" serde_json = "1" serde = { version = "1", features = [ "derive" ] } -criterion = "0.2" - -[[bench]] -name = "building" -harness = false diff --git a/lib/smol_str/benches/building.rs b/lib/smol_str/benches/building.rs deleted file mode 100644 index 1983314676..0000000000 --- a/lib/smol_str/benches/building.rs +++ /dev/null @@ -1,44 +0,0 @@ -#[macro_use] -extern crate criterion; -extern crate smol_str; - -use criterion::{Criterion, ParameterizedBenchmark, Throughput}; -use smol_str::SmolStr; - -fn from_str_iter(c: &mut Criterion) { - use std::iter::FromIterator; - - const SIZES: &[usize] = &[0, 5, 10, 15, 20, 2 << 4, 2 << 5, 2 << 6, 2 << 7, 2 << 8]; - - fn test_data(input: &str, size: usize) -> Vec<&str> { - std::iter::repeat(input).take(size / input.len()).collect() - } - - c.bench( - "FromIterator", - ParameterizedBenchmark::new( - "SmolStr, one byte elements", - |b, &&size| { - let src = test_data("x", size); - b.iter(|| SmolStr::from_iter(src.iter().cloned()).len()) - }, - SIZES, - ) - .with_function("SmolStr, five byte elements", |b, &&size| { - let src = test_data("helloo", size); - b.iter(|| SmolStr::from_iter(src.iter().cloned()).len()) - }) - .with_function("String, one byte elements", |b, &&size| { - let src = test_data("x", size); - b.iter(|| String::from_iter(src.iter().cloned()).len()) - }) - .with_function("String, five byte elements", |b, &&size| { - let src = test_data("hello", size); - b.iter(|| String::from_iter(src.iter().cloned()).len()) - }) - .throughput(|elems| Throughput::Bytes(**elems as u32)), - ); -} - -criterion_group!(benches, from_str_iter); -criterion_main!(benches); From 4966a00ca0bad48d17cdddae138b7890fff2b20b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 21 Sep 2020 17:15:24 +0200 Subject: [PATCH 059/132] Document MSRV --- lib/smol_str/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index 949f6e6ebf..ad57a10f90 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -20,3 +20,9 @@ Unlike `String`, however, `SmolStr` is immutable. The primary use case for languages. Strings consisting of a series of newlines, followed by a series of whitespace are a typical pattern in computer programs because of indentation. Note that a specialized interner might be a better solution for some use cases. + +## MSRV Policy + +Minimal Supported Rust Version: latest stable. + +Bumping MSRV is not considered a semver-breaking change. From 1f5f91a64d161238edf055635d794de5eb072f8e Mon Sep 17 00:00:00 2001 From: Daniel Johnson Date: Tue, 6 Oct 2020 19:43:15 -0700 Subject: [PATCH 060/132] Update CI badge in readme to point to Github Actions The Travis workflow was deleted in 3d5b7e3476f91280aedda3900c13838b00d64a9d --- lib/smol_str/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index ad57a10f90..2e61b9ee42 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -1,6 +1,6 @@ # smol_str -[![Build Status](https://travis-ci.org/matklad/smol_str.svg?branch=master)](https://travis-ci.org/matklad/smol_str) +[![CI](https://github.com/rust-analyzer/smol_str/workflows/CI/badge.svg)](https://github.com/rust-analyzer/smol_str/actions?query=branch%3Amaster+workflow%3ACI) [![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str) [![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/) From cfe22778a1b5011cea1d105b9a2d3fd8156aa9de Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Sat, 26 Jun 2021 08:27:17 +0100 Subject: [PATCH 061/132] Implement arbitrary behind a feature flag --- lib/smol_str/Cargo.toml | 3 ++- lib/smol_str/src/lib.rs | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index fee00ec2ba..fe2497cfd6 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.17" +version = "0.1.18" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/smol_str" @@ -9,6 +9,7 @@ edition = "2018" [dependencies] serde = { version = "1", optional = true, default_features = false, features = [ "std" ] } +arbitrary = { version = "1", optional = true } [dev-dependencies] proptest = "0.10" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 6136878b3e..1583dfe077 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -300,6 +300,14 @@ impl Borrow for SmolStr { } } +#[cfg(feature = "arbitrary")] +impl<'a> arbitrary::Arbitrary<'a> for SmolStr { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { + let s = <&str>::arbitrary(u)?; + Ok(SmolStr::new(s)) + } +} + const INLINE_CAP: usize = 22; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; From d5bf6c8dab7259a5112085c093e8c694721a9a86 Mon Sep 17 00:00:00 2001 From: Arsenii Lyashenko Date: Thu, 12 Aug 2021 15:00:02 +0300 Subject: [PATCH 062/132] Add `#![no_std]` support --- lib/smol_str/.github/ci.rs | 5 +++++ lib/smol_str/Cargo.toml | 6 +++++- lib/smol_str/src/lib.rs | 16 ++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/.github/ci.rs b/lib/smol_str/.github/ci.rs index b293ebbcb7..98017ad97f 100644 --- a/lib/smol_str/.github/ci.rs +++ b/lib/smol_str/.github/ci.rs @@ -23,6 +23,11 @@ fn try_main() -> Result<()> { cwd.display() ); + { + let _s = Section::new("BUILD_NO_DEFAULT_FEATURES"); + shell("cargo test --all-features --workspace --no-run --no-default-features")?; + } + { let _s = Section::new("BUILD"); shell("cargo test --all-features --workspace --no-run")?; diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index fe2497cfd6..f9c3a350f2 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -8,10 +8,14 @@ authors = ["Aleksey Kladov "] edition = "2018" [dependencies] -serde = { version = "1", optional = true, default_features = false, features = [ "std" ] } +serde = { version = "1", optional = true, default_features = false } arbitrary = { version = "1", optional = true } [dev-dependencies] proptest = "0.10" serde_json = "1" serde = { version = "1", features = [ "derive" ] } + +[features] +default = ["std"] +std = ["serde/std"] diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 1583dfe077..9f99153f2a 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,11 +1,27 @@ +#![cfg_attr(not(feature = "std"), no_std)] + +#[cfg(not(feature = "std"))] +extern crate core as std; + +#[cfg(not(feature = "std"))] +extern crate alloc; + use std::{ borrow::Borrow, cmp::{self, Ordering}, fmt, hash, iter, ops::Deref, +}; + +#[cfg(not(feature = "std"))] +use alloc::{ + string::{String, ToString}, sync::Arc, }; +#[cfg(feature = "std")] +use std::sync::Arc; + /// A `SmolStr` is a string type that has the following properties: /// /// * `size_of::() == size_of::()` From 14baf0779de1ff4da71fb05107402faa9587235b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 1 Nov 2021 14:33:22 +0300 Subject: [PATCH 063/132] implement FromStr closes rust-analyzer/smol_str#31 --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/src/lib.rs | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index f9c3a350f2..d00ca31123 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.18" +version = "0.1.19" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/smol_str" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 9f99153f2a..d819fe2dd9 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -6,11 +6,13 @@ extern crate core as std; #[cfg(not(feature = "std"))] extern crate alloc; +use core::convert::Infallible; use std::{ borrow::Borrow, cmp::{self, Ordering}, fmt, hash, iter, ops::Deref, + str::FromStr, }; #[cfg(not(feature = "std"))] @@ -316,6 +318,15 @@ impl Borrow for SmolStr { } } +impl FromStr for SmolStr { + type Err = Infallible; + + #[inline] + fn from_str(s: &str) -> Result { + Ok(SmolStr::from(s)) + } +} + #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for SmolStr { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { From 661ca5b452838ecd887f07082b0e487361f84686 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 1 Nov 2021 16:04:40 +0300 Subject: [PATCH 064/132] fix no_std support --- lib/smol_str/.github/ci.rs | 1 + lib/smol_str/Cargo.toml | 2 +- lib/smol_str/src/lib.rs | 47 ++++++++++++++++---------------------- 3 files changed, 22 insertions(+), 28 deletions(-) diff --git a/lib/smol_str/.github/ci.rs b/lib/smol_str/.github/ci.rs index 98017ad97f..21c8584fb9 100644 --- a/lib/smol_str/.github/ci.rs +++ b/lib/smol_str/.github/ci.rs @@ -36,6 +36,7 @@ fn try_main() -> Result<()> { { let _s = Section::new("TEST"); shell("cargo test --all-features --workspace")?; + shell("cargo test --no-default-features --workspace")?; } let current_branch = shell_output("git branch --show-current")?; diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index d00ca31123..912b7f1345 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.19" +version = "0.1.20" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/matklad/smol_str" diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index d819fe2dd9..c542fe639f 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,28 +1,18 @@ -#![cfg_attr(not(feature = "std"), no_std)] - -#[cfg(not(feature = "std"))] -extern crate core as std; - -#[cfg(not(feature = "std"))] +#![no_std] extern crate alloc; -use core::convert::Infallible; -use std::{ - borrow::Borrow, - cmp::{self, Ordering}, - fmt, hash, iter, - ops::Deref, - str::FromStr, -}; - -#[cfg(not(feature = "std"))] use alloc::{ string::{String, ToString}, sync::Arc, }; - -#[cfg(feature = "std")] -use std::sync::Arc; +use core::{ + borrow::Borrow, + cmp::{self, Ordering}, + convert::Infallible, + fmt, hash, iter, + ops::Deref, + str::FromStr, +}; /// A `SmolStr` is a string type that has the following properties: /// @@ -131,7 +121,7 @@ impl SmolStr { if size + len > INLINE_CAP { let (min_remaining, _) = iter.size_hint(); let mut heap = String::with_capacity(size + len + min_remaining); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); heap.push(ch); heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); @@ -265,7 +255,7 @@ where let size = slice.len(); if size + len > INLINE_CAP { let mut heap = String::with_capacity(size + len); - heap.push_str(std::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); heap.push_str(&slice); heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); @@ -411,7 +401,7 @@ impl Repr { Repr::Inline { len, buf } => { let len = *len as usize; let buf = &buf[..len]; - unsafe { ::std::str::from_utf8_unchecked(buf) } + unsafe { ::core::str::from_utf8_unchecked(buf) } } Repr::Substring { newlines, spaces } => { let newlines = *newlines; @@ -425,9 +415,12 @@ impl Repr { #[cfg(feature = "serde")] mod serde { - use super::SmolStr; - use ::serde::de::{Deserializer, Error, Unexpected, Visitor}; - use std::fmt; + use alloc::{string::String, vec::Vec}; + use core::fmt; + + use serde::de::{Deserializer, Error, Unexpected, Visitor}; + + use crate::SmolStr; // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result @@ -468,7 +461,7 @@ mod serde { where E: Error, { - match std::str::from_utf8(v) { + match core::str::from_utf8(v) { Ok(s) => Ok(SmolStr::from(s)), Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), } @@ -478,7 +471,7 @@ mod serde { where E: Error, { - match std::str::from_utf8(v) { + match core::str::from_utf8(v) { Ok(s) => Ok(SmolStr::from(s)), Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), } From 7771472941a63112c8b7f863ba77d41d13032968 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 4 Nov 2021 18:42:22 +0100 Subject: [PATCH 065/132] Remove unnecessary Into bound from From impl --- lib/smol_str/Cargo.toml | 6 +++--- lib/smol_str/src/lib.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 912b7f1345..4efaaf90ef 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,9 +1,9 @@ [package] name = "smol_str" -version = "0.1.20" +version = "0.1.21" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" -repository = "https://github.com/matklad/smol_str" +repository = "https://github.com/rust-analyzer/smol_str" authors = ["Aleksey Kladov "] edition = "2018" @@ -14,7 +14,7 @@ arbitrary = { version = "1", optional = true } [dev-dependencies] proptest = "0.10" serde_json = "1" -serde = { version = "1", features = [ "derive" ] } +serde = { version = "1", features = [ "derive" ] } [features] default = ["std"] diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index c542fe639f..45ec174a20 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -289,7 +289,7 @@ impl<'a> iter::FromIterator<&'a str> for SmolStr { impl From for SmolStr where - T: Into + AsRef, + T: AsRef, { fn from(text: T) -> Self { Self::new(text) From a1cbd1feeedee153f06f646b600eccc2b0b34fd9 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 7 Jul 2020 09:57:19 -0400 Subject: [PATCH 066/132] Add a new test for bad size hint Changes in PR rust-analyzer/smol_str#20 allow for an incorrect size hint to create a non-canonical SmolStr. Add a new test which will fail if we ever rely on SmolStrs to be canonical when comparing for equality. --- lib/smol_str/tests/test.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 537df8ddb2..cdcc9bf670 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -209,6 +209,8 @@ fn test_from_char_iterator() { ("사회과학원 어학연구소", true), // String containing diverse characters ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), + // String which has too many characters to even consider inlining + ("☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺", true), ]; for (raw, is_heap) in &examples { let s: SmolStr = raw.chars().collect(); @@ -216,3 +218,32 @@ fn test_from_char_iterator() { assert_eq!(s.is_heap_allocated(), *is_heap); } } + +#[test] +fn test_bad_size_hint_char_iter() { + struct BadSizeHint(I); + + impl> Iterator for BadSizeHint { + type Item = T; + + fn next(&mut self) -> Option { + self.0.next() + } + + fn size_hint(&self) -> (usize, Option) { + (1024, None) + } + } + + let data = "testing"; + let collected: SmolStr = BadSizeHint(data.chars()).collect(); + let new = SmolStr::new(data); + + // Because of the bad size hint, `collected` will be heap allocated, but `new` will be inline + + // If we try to use the type of the string (inline/heap) to quickly test for equality, we need to ensure + // `collected` is inline allocated instead + assert!(collected.is_heap_allocated()); + assert!(!new.is_heap_allocated()); + assert_eq!(new, collected); +} From 77c6c2b2ae448f8cccb213d6cd179cf9482e7c7f Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Thu, 10 Mar 2022 19:20:59 -0500 Subject: [PATCH 067/132] Use ASCII to get the "too big" char iterator Additionally, make the construction of the string mechanical --- lib/smol_str/tests/test.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index cdcc9bf670..934cfa3c05 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -209,14 +209,19 @@ fn test_from_char_iterator() { ("사회과학원 어학연구소", true), // String containing diverse characters ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), - // String which has too many characters to even consider inlining - ("☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺☺", true), ]; for (raw, is_heap) in &examples { let s: SmolStr = raw.chars().collect(); assert_eq!(s.as_str(), *raw); assert_eq!(s.is_heap_allocated(), *is_heap); } + // String which has too many characters to even consider inlining: Chars::size_hint uses + // (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately + // heap allocate + let raw: String = std::iter::repeat('a').take(22 * 4 + 1).collect(); + let s: SmolStr = raw.chars().collect(); + assert_eq!(s.as_str(), raw); + assert!(s.is_heap_allocated()); } #[test] From a9562451715c1ae3bee0a2c93e5b74c60dc23c01 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 3 Apr 2022 16:10:03 +0200 Subject: [PATCH 068/132] Clarify what `WS` is --- lib/smol_str/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index c542fe639f..96d464fec3 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -29,6 +29,8 @@ use core::{ /// languages. Strings consisting of a series of newlines, followed by a series of /// whitespace are a typical pattern in computer programs because of indentation. /// Note that a specialized interner might be a better solution for some use cases. +/// +/// `WS`: A string of 32 newlines followed by 128 spaces. #[derive(Clone)] pub struct SmolStr(Repr); From fa9d3154e9760e524238af762617b2a50bd21da5 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Sun, 3 Apr 2022 16:17:08 +0200 Subject: [PATCH 069/132] Bump dependencies --- lib/smol_str/Cargo.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 4efaaf90ef..5bfe18cf1f 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -8,13 +8,13 @@ authors = ["Aleksey Kladov "] edition = "2018" [dependencies] -serde = { version = "1", optional = true, default_features = false } -arbitrary = { version = "1", optional = true } +serde = { version = "1.0.136", optional = true, default_features = false } +arbitrary = { version = "1.1.0", optional = true } [dev-dependencies] -proptest = "0.10" -serde_json = "1" -serde = { version = "1", features = [ "derive" ] } +proptest = "1.0.0" +serde_json = "1.0.79" +serde = { version = "1.0.136", features = ["derive"] } [features] default = ["std"] From 844ee7798d4d5f51c892200fb002bf34d6943709 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 8 Apr 2022 21:04:22 +0200 Subject: [PATCH 070/132] Use new optional dependency feature syntax making serde truly optional --- lib/smol_str/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 5bfe18cf1f..ea4c8c09f6 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.21" +version = "0.1.22" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" @@ -18,4 +18,4 @@ serde = { version = "1.0.136", features = ["derive"] } [features] default = ["std"] -std = ["serde/std"] +std = ["serde?/std"] From 25d7fa7105e7aeaba6a3d27737ed3ddc15975d9e Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 26 Apr 2022 23:57:07 +0200 Subject: [PATCH 071/132] Revert usage of optional dependency feature syntax --- lib/smol_str/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index ea4c8c09f6..b7bd8f7314 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.22" +version = "0.1.23" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" @@ -18,4 +18,4 @@ serde = { version = "1.0.136", features = ["derive"] } [features] default = ["std"] -std = ["serde?/std"] +std = ["serde/std"] From c5a4d2475e58b31b619256c9bcdb606f299d8e11 Mon Sep 17 00:00:00 2001 From: austaras Date: Sun, 22 Jan 2023 23:55:58 +0800 Subject: [PATCH 072/132] One more byte for inlined --- lib/smol_str/src/lib.rs | 62 ++++++++++++++++++++++++++++++-------- lib/smol_str/tests/test.rs | 8 ++--- 2 files changed, 53 insertions(+), 17 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 8c92e51e1c..296945d4d3 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -10,6 +10,7 @@ use core::{ cmp::{self, Ordering}, convert::Infallible, fmt, hash, iter, + mem::transmute, ops::Deref, str::FromStr, }; @@ -19,8 +20,8 @@ use core::{ /// * `size_of::() == size_of::()` /// * `Clone` is `O(1)` /// * Strings are stack-allocated if they are: -/// * Up to 22 bytes long -/// * Longer than 22 bytes, but substrings of `WS` (see below). Such strings consist +/// * Up to 23 bytes long +/// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist /// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated /// @@ -51,16 +52,16 @@ impl SmolStr { buf[$idx] = byte }); } - s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); SmolStr(Repr::Inline { - len: len as u8, + len: unsafe { transmute(len as u8) }, buf, }) } /// Constructs inline variant of `SmolStr`. /// - /// Panics if `text.len() > 22`. + /// Panics if `text.len() > 23`. #[inline] pub const fn new_inline(text: &str) -> SmolStr { let mut buf = [0; INLINE_CAP]; @@ -70,7 +71,7 @@ impl SmolStr { i += 1 } SmolStr(Repr::Inline { - len: text.len() as u8, + len: unsafe { transmute(text.len() as u8) }, buf, }) } @@ -132,7 +133,7 @@ impl SmolStr { len += size; } SmolStr(Repr::Inline { - len: len as u8, + len: unsafe { transmute(len as u8) }, buf, }) } @@ -266,7 +267,7 @@ where len += size; } SmolStr(Repr::Inline { - len: len as u8, + len: unsafe { transmute(len as u8) }, buf, }) } @@ -327,17 +328,52 @@ impl<'a> arbitrary::Arbitrary<'a> for SmolStr { } } -const INLINE_CAP: usize = 22; +const INLINE_CAP: usize = 23; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; +#[derive(Clone, Copy, Debug)] +#[repr(u8)] +enum InlineSize { + _V0 = 0, + _V1 = 1, + _V2 = 2, + _V3 = 3, + _V4 = 4, + _V5 = 5, + _V6 = 6, + _V7 = 7, + _V8 = 8, + _V9 = 9, + _V10 = 10, + _V11 = 11, + _V12 = 12, + _V13 = 13, + _V14 = 14, + _V15 = 15, + _V16 = 16, + _V17 = 17, + _V18 = 18, + _V19 = 19, + _V20 = 20, + _V21 = 21, + _V22 = 22, + _V23 = 23, +} + #[derive(Clone, Debug)] enum Repr { Heap(Arc), - Inline { len: u8, buf: [u8; INLINE_CAP] }, - Substring { newlines: usize, spaces: usize }, + Inline { + len: InlineSize, + buf: [u8; INLINE_CAP], + }, + Substring { + newlines: usize, + spaces: usize, + }, } impl Repr { @@ -353,7 +389,7 @@ impl Repr { let mut buf = [0; INLINE_CAP]; buf[..len].copy_from_slice(text.as_bytes()); return Repr::Inline { - len: len as u8, + len: unsafe { transmute(len as u8) }, buf, }; } @@ -390,7 +426,7 @@ impl Repr { fn is_empty(&self) -> bool { match self { Repr::Heap(data) => data.is_empty(), - Repr::Inline { len, .. } => *len == 0, + Repr::Inline { len, .. } => *len as u8 == 0, // A substring isn't created for an empty string. Repr::Substring { .. } => false, } diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 934cfa3c05..609a8f7e6e 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -29,12 +29,12 @@ fn const_fn_ctor() { const EMPTY: SmolStr = SmolStr::new_inline(""); const A: SmolStr = SmolStr::new_inline("A"); const HELLO: SmolStr = SmolStr::new_inline("HELLO"); - const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUV"); + const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUVW"); assert_eq!(EMPTY, SmolStr::from("")); assert_eq!(A, SmolStr::from("A")); assert_eq!(HELLO, SmolStr::from("HELLO")); - assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUV")); + assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); } #[allow(deprecated)] @@ -43,12 +43,12 @@ fn old_const_fn_ctor() { const EMPTY: SmolStr = SmolStr::new_inline_from_ascii(0, b""); const A: SmolStr = SmolStr::new_inline_from_ascii(1, b"A"); const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); - const LONG: SmolStr = SmolStr::new_inline_from_ascii(22, b"ABCDEFGHIZKLMNOPQRSTUV"); + const LONG: SmolStr = SmolStr::new_inline_from_ascii(23, b"ABCDEFGHIZKLMNOPQRSTUVW"); assert_eq!(EMPTY, SmolStr::from("")); assert_eq!(A, SmolStr::from("A")); assert_eq!(HELLO, SmolStr::from("HELLO")); - assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUV")); + assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); } fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { From ec90e593f3cd44b2ff7a76cfdee110912d8a610f Mon Sep 17 00:00:00 2001 From: austaras Date: Mon, 23 Jan 2023 18:57:29 +0800 Subject: [PATCH 073/132] 0.1.24 --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index b7bd8f7314..e46b14fa83 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.23" +version = "0.1.24" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 8eced95f00c52e595c7d5cf80de1ad83fac310e8 Mon Sep 17 00:00:00 2001 From: austaras Date: Mon, 23 Jan 2023 21:19:58 +0800 Subject: [PATCH 074/132] Update README.MD --- lib/smol_str/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index 2e61b9ee42..0cc1910181 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -10,8 +10,8 @@ A `SmolStr` is a string type that has the following properties: * `size_of::() == size_of::()` * `Clone` is `O(1)` * Strings are stack-allocated if they are: - * Up to 22 bytes long - * Longer than 22 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist + * Up to 23 bytes long + * Longer than 23 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist solely of consecutive newlines, followed by consecutive spaces * If a string does not satisfy the aforementioned conditions, it is heap-allocated From 46e5bd0097e967bc999aeda410778e9c14f10ae1 Mon Sep 17 00:00:00 2001 From: austaras Date: Tue, 24 Jan 2023 18:03:45 +0800 Subject: [PATCH 075/132] Remove redundant enum value --- lib/smol_str/src/lib.rs | 46 ++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 296945d4d3..775c5d8f22 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -338,29 +338,29 @@ const WS: &str = #[repr(u8)] enum InlineSize { _V0 = 0, - _V1 = 1, - _V2 = 2, - _V3 = 3, - _V4 = 4, - _V5 = 5, - _V6 = 6, - _V7 = 7, - _V8 = 8, - _V9 = 9, - _V10 = 10, - _V11 = 11, - _V12 = 12, - _V13 = 13, - _V14 = 14, - _V15 = 15, - _V16 = 16, - _V17 = 17, - _V18 = 18, - _V19 = 19, - _V20 = 20, - _V21 = 21, - _V22 = 22, - _V23 = 23, + _V1, + _V2, + _V3, + _V4, + _V5, + _V6, + _V7, + _V8, + _V9, + _V10, + _V11, + _V12, + _V13, + _V14, + _V15, + _V16, + _V17, + _V18, + _V19, + _V20, + _V21, + _V22, + _V23, } #[derive(Clone, Debug)] From 5f367d76ae191654512d7c47e1d503fe95380038 Mon Sep 17 00:00:00 2001 From: austaras Date: Sun, 5 Feb 2023 08:13:08 +0800 Subject: [PATCH 076/132] Update test --- lib/smol_str/tests/test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 609a8f7e6e..187b39f001 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -55,7 +55,7 @@ fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner prop_assert_eq!(smol.as_str(), std_str); prop_assert_eq!(smol.len(), std_str.len()); prop_assert_eq!(smol.is_empty(), std_str.is_empty()); - if smol.len() <= 22 { + if smol.len() <= 23 { prop_assert!(!smol.is_heap_allocated()); } Ok(()) @@ -218,7 +218,7 @@ fn test_from_char_iterator() { // String which has too many characters to even consider inlining: Chars::size_hint uses // (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately // heap allocate - let raw: String = std::iter::repeat('a').take(22 * 4 + 1).collect(); + let raw: String = std::iter::repeat('a').take(23 * 4 + 1).collect(); let s: SmolStr = raw.chars().collect(); assert_eq!(s.as_str(), raw); assert!(s.is_heap_allocated()); From ae67412164266fb90315c5eeb83372121581f84f Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 30 Mar 2023 17:36:41 +0200 Subject: [PATCH 077/132] Use optional dependency feature syntax to make serde actually optional --- lib/smol_str/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index e46b14fa83..aa729865aa 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.24" +version = "0.1.25" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" @@ -18,4 +18,4 @@ serde = { version = "1.0.136", features = ["derive"] } [features] default = ["std"] -std = ["serde/std"] +std = ["serde?/std"] From 8f9fefd8a0d880eaea12df9d1ac24b6e3adc50e8 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 31 Mar 2023 07:32:55 +0200 Subject: [PATCH 078/132] Implement AsRef in favor of generic From impls --- lib/smol_str/src/lib.rs | 54 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 775c5d8f22..a67a7fc096 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -2,6 +2,8 @@ extern crate alloc; use alloc::{ + borrow::Cow, + boxed::Box, string::{String, ToString}, sync::Arc, }; @@ -290,22 +292,64 @@ impl<'a> iter::FromIterator<&'a str> for SmolStr { } } -impl From for SmolStr -where - T: AsRef, -{ - fn from(text: T) -> Self { +impl AsRef for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl From<&str> for SmolStr { + #[inline] + fn from(s: &str) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<&mut str> for SmolStr { + #[inline] + fn from(s: &mut str) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<&String> for SmolStr { + #[inline] + fn from(s: &String) -> SmolStr { + SmolStr::new(s) + } +} + +impl From for SmolStr { + #[inline(always)] + fn from(text: String) -> Self { Self::new(text) } } +impl From> for SmolStr { + #[inline] + fn from(s: Box) -> SmolStr { + SmolStr::new(s) + } +} + +impl<'a> From> for SmolStr { + #[inline] + fn from(s: Cow<'a, str>) -> SmolStr { + SmolStr::new(s) + } +} + impl From for String { + #[inline(always)] fn from(text: SmolStr) -> Self { text.as_str().into() } } impl Borrow for SmolStr { + #[inline(always)] fn borrow(&self) -> &str { self.as_str() } From 4ad02f720fda86c9a2caf867f674892ec99b9962 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 31 Mar 2023 07:37:08 +0200 Subject: [PATCH 079/132] Clarify size of SmolStr better --- lib/smol_str/README.md | 2 +- lib/smol_str/src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index 0cc1910181..610726a216 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -7,7 +7,7 @@ A `SmolStr` is a string type that has the following properties: -* `size_of::() == size_of::()` +* `size_of::() == 24 (therefor == size_of::() on 64 bit platforms) * `Clone` is `O(1)` * Strings are stack-allocated if they are: * Up to 23 bytes long diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index a67a7fc096..91dc6252cc 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -19,7 +19,7 @@ use core::{ /// A `SmolStr` is a string type that has the following properties: /// -/// * `size_of::() == size_of::()` +/// * `size_of::() == 24 (therefor == size_of::() on 64 bit platforms) /// * `Clone` is `O(1)` /// * Strings are stack-allocated if they are: /// * Up to 23 bytes long From ea478f81f15cb7a6006fbf436ae23901d5154fa1 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 31 Mar 2023 07:37:30 +0200 Subject: [PATCH 080/132] Release 0.2.0 --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index aa729865aa..c7a646e527 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.1.25" +version = "0.2.0" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index 610726a216..5e3506846f 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -7,7 +7,7 @@ A `SmolStr` is a string type that has the following properties: -* `size_of::() == 24 (therefor == size_of::() on 64 bit platforms) +* `size_of::() == 24 (therefore == size_of::() on 64 bit platforms) * `Clone` is `O(1)` * Strings are stack-allocated if they are: * Up to 23 bytes long From ced7e87db15f30c53ab2bbb2ff55e730e45ffd8d Mon Sep 17 00:00:00 2001 From: Scott Driggers Date: Fri, 25 Aug 2023 09:39:14 -0400 Subject: [PATCH 081/132] Implementing `From> for SmolStr` and `From for Arc` Also adding one test to verify --- lib/smol_str/src/lib.rs | 17 +++++++++++++++++ lib/smol_str/tests/test.rs | 8 +++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 91dc6252cc..f09d0010c4 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -334,6 +334,13 @@ impl From> for SmolStr { } } +impl From> for SmolStr { + #[inline] + fn from(s: Arc) -> SmolStr { + SmolStr(Repr::Heap(s)) + } +} + impl<'a> From> for SmolStr { #[inline] fn from(s: Cow<'a, str>) -> SmolStr { @@ -341,6 +348,16 @@ impl<'a> From> for SmolStr { } } +impl From for Arc { + #[inline(always)] + fn from(text: SmolStr) -> Self { + match text.0 { + Repr::Heap(data) => data, + _ => text.as_str().into(), + } + } +} + impl From for String { #[inline(always)] fn from(text: SmolStr) -> Self { diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 187b39f001..1fbe7d667d 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use proptest::{prop_assert, prop_assert_eq, proptest}; use smol_str::SmolStr; @@ -21,7 +23,11 @@ fn assert_traits() { fn conversions() { let s: SmolStr = "Hello, World!".into(); let s: String = s.into(); - assert_eq!(s, "Hello, World!") + assert_eq!(s, "Hello, World!"); + + let s: SmolStr = Arc::::from("Hello, World!").into(); + let s: Arc = s.into(); + assert_eq!(s.as_ref(), "Hello, World!"); } #[test] From 0eed716dccc9c4f8cf9af4e1c0381c1766c0a46e Mon Sep 17 00:00:00 2001 From: Scott Driggers Date: Tue, 5 Sep 2023 14:45:32 -0400 Subject: [PATCH 082/132] Enforcing stack if can be put on stack --- lib/smol_str/src/lib.rs | 68 ++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index f09d0010c4..5f0431d419 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -337,7 +337,8 @@ impl From> for SmolStr { impl From> for SmolStr { #[inline] fn from(s: Arc) -> SmolStr { - SmolStr(Repr::Heap(s)) + let repr = Repr::new_on_stack(s.as_ref()).unwrap_or_else(|| Repr::Heap(s)); + Self(repr) } } @@ -438,40 +439,45 @@ enum Repr { } impl Repr { + /// This function tries to create a new Repr::Inline or Repr::Substring + /// If it isn't possible, this function returns None + fn new_on_stack(text: T) -> Option + where + T: AsRef, + { + let text = text.as_ref(); + + let len = text.len(); + if len <= INLINE_CAP { + let mut buf = [0; INLINE_CAP]; + buf[..len].copy_from_slice(text.as_bytes()); + return Some(Repr::Inline { + len: unsafe { transmute(len as u8) }, + buf, + }); + } + + if len <= N_NEWLINES + N_SPACES { + let bytes = text.as_bytes(); + let possible_newline_count = cmp::min(len, N_NEWLINES); + let newlines = bytes[..possible_newline_count] + .iter() + .take_while(|&&b| b == b'\n') + .count(); + let possible_space_count = len - newlines; + if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') { + let spaces = possible_space_count; + return Some(Repr::Substring { newlines, spaces }); + } + } + None + } + fn new(text: T) -> Self where T: AsRef, { - { - let text = text.as_ref(); - - let len = text.len(); - if len <= INLINE_CAP { - let mut buf = [0; INLINE_CAP]; - buf[..len].copy_from_slice(text.as_bytes()); - return Repr::Inline { - len: unsafe { transmute(len as u8) }, - buf, - }; - } - - if len <= N_NEWLINES + N_SPACES { - let bytes = text.as_bytes(); - let possible_newline_count = cmp::min(len, N_NEWLINES); - let newlines = bytes[..possible_newline_count] - .iter() - .take_while(|&&b| b == b'\n') - .count(); - let possible_space_count = len - newlines; - if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') - { - let spaces = possible_space_count; - return Repr::Substring { newlines, spaces }; - } - } - } - - Repr::Heap(text.as_ref().into()) + Self::new_on_stack(text.as_ref()).unwrap_or_else(|| Repr::Heap(text.as_ref().into())) } #[inline(always)] From 8797f4fdfba8efcba5525fd123be7ad064ca3ee4 Mon Sep 17 00:00:00 2001 From: MultisampledNight Date: Wed, 20 Sep 2023 18:26:08 +0200 Subject: [PATCH 083/132] docs: fix missing codeblock backtick --- lib/smol_str/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 5f0431d419..692803e447 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -19,7 +19,7 @@ use core::{ /// A `SmolStr` is a string type that has the following properties: /// -/// * `size_of::() == 24 (therefor == size_of::() on 64 bit platforms) +/// * `size_of::() == 24` (therefor `== size_of::()` on 64 bit platforms) /// * `Clone` is `O(1)` /// * Strings are stack-allocated if they are: /// * Up to 23 bytes long From f93c6b2cb05c2be1a80ff8e9dc8b7779f0ec6d11 Mon Sep 17 00:00:00 2001 From: Moulins Date: Tue, 2 Jan 2024 01:13:21 +0100 Subject: [PATCH 084/132] feat: Add `SmolStr::from_static` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows creating `SmolStr`s longer than 23 bytes in constant contexts. This is done by replacing the `Repr::Substring` variant by a more general `Repr::Static(&'static str)` variant, and borrowing from ̀`WS` directly instead of storing two `usize`s. As a bonus, it also simplifies the `as_str` implementation, hopefully saving an extra branch. --- lib/smol_str/README.md | 3 ++- lib/smol_str/src/lib.rs | 39 ++++++++++++++++++++++++--------------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index 5e3506846f..ce16759e81 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -7,13 +7,14 @@ A `SmolStr` is a string type that has the following properties: -* `size_of::() == 24 (therefore == size_of::() on 64 bit platforms) +* `size_of::() == 24` (therefore `== size_of::()` on 64 bit platforms) * `Clone` is `O(1)` * Strings are stack-allocated if they are: * Up to 23 bytes long * Longer than 23 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist solely of consecutive newlines, followed by consecutive spaces * If a string does not satisfy the aforementioned conditions, it is heap-allocated +* Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation Unlike `String`, however, `SmolStr` is immutable. The primary use case for `SmolStr` is a good enough default storage for tokens of typical programming diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 692803e447..a27b14c0ad 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -26,6 +26,7 @@ use core::{ /// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist /// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated +/// * Additionally, a `SmolStr` can be explicitely created from a `&'static str` without allocation /// /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming @@ -78,6 +79,17 @@ impl SmolStr { }) } + /// Constructs a `SmolStr` from a statically allocated string. + /// + /// This never allocates. + #[inline(always)] + pub const fn new_static(text: &'static str) -> SmolStr { + // NOTE: this never uses the inline storage; if a canonical + // representation is needed, we could check for `len() < INLINE_CAP` + // and call `new_inline`, but this would mean an extra branch. + SmolStr(Repr::Static(text)) + } + pub fn new(text: T) -> SmolStr where T: AsRef, @@ -395,6 +407,11 @@ const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; +const _: () = { + assert!(WS.len() == N_NEWLINES + N_SPACES); + assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n'); + assert!(WS.as_bytes()[N_NEWLINES] == b' '); +}; #[derive(Clone, Copy, Debug)] #[repr(u8)] @@ -428,18 +445,15 @@ enum InlineSize { #[derive(Clone, Debug)] enum Repr { Heap(Arc), + Static(&'static str), Inline { len: InlineSize, buf: [u8; INLINE_CAP], }, - Substring { - newlines: usize, - spaces: usize, - }, } impl Repr { - /// This function tries to create a new Repr::Inline or Repr::Substring + /// This function tries to create a new Repr::Inline or Repr::Static /// If it isn't possible, this function returns None fn new_on_stack(text: T) -> Option where @@ -467,7 +481,8 @@ impl Repr { let possible_space_count = len - newlines; if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') { let spaces = possible_space_count; - return Some(Repr::Substring { newlines, spaces }); + let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]; + return Some(Repr::Static(substring)); } } None @@ -484,8 +499,8 @@ impl Repr { fn len(&self) -> usize { match self { Repr::Heap(data) => data.len(), + Repr::Static(data) => data.len(), Repr::Inline { len, .. } => *len as usize, - Repr::Substring { newlines, spaces } => *newlines + *spaces, } } @@ -493,9 +508,8 @@ impl Repr { fn is_empty(&self) -> bool { match self { Repr::Heap(data) => data.is_empty(), + Repr::Static(data) => data.is_empty(), Repr::Inline { len, .. } => *len as u8 == 0, - // A substring isn't created for an empty string. - Repr::Substring { .. } => false, } } @@ -503,17 +517,12 @@ impl Repr { fn as_str(&self) -> &str { match self { Repr::Heap(data) => &*data, + Repr::Static(data) => data, Repr::Inline { len, buf } => { let len = *len as usize; let buf = &buf[..len]; unsafe { ::core::str::from_utf8_unchecked(buf) } } - Repr::Substring { newlines, spaces } => { - let newlines = *newlines; - let spaces = *spaces; - assert!(newlines <= N_NEWLINES && spaces <= N_SPACES); - &WS[N_NEWLINES - newlines..N_NEWLINES + spaces] - } } } } From 39257f6b057a02a50d344d148d1d86ec7d5279dd Mon Sep 17 00:00:00 2001 From: novacrazy Date: Mon, 15 Jan 2024 17:35:11 -0600 Subject: [PATCH 085/132] Add Writer and ToSmolStr --- lib/smol_str/src/lib.rs | 89 ++++++++++++++++++++++++++++++++++++++ lib/smol_str/tests/test.rs | 12 +++++ 2 files changed, 101 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index a27b14c0ad..c81d6ed5e5 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -527,6 +527,95 @@ impl Repr { } } +/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating. +/// +/// Almost identical to [`ToString`], but converts to `SmolStr` instead. +pub trait ToSmolStr { + fn to_smolstr(&self) -> SmolStr; +} + +/// Formats arguments to a [`SmolStr`], potentially without allocating. +/// +/// See [`alloc::format!`] or [`format_args!`] for syntax documentation. +#[macro_export] +macro_rules! format_smolstr { + ($($tt:tt)*) => {{ + use ::core::fmt::Write; + let mut w = $crate::Writer::new(); + w.write_fmt(format_args!($($tt)*)).expect("a formatting trait implementation returned an error"); + $crate::SmolStr::from(w) + }}; +} + +#[doc(hidden)] +pub struct Writer { + inline: [u8; INLINE_CAP], + heap: String, + len: usize, +} + +impl Writer { + pub const fn new() -> Self { + Writer { + inline: [0; INLINE_CAP], + heap: String::new(), + len: 0, + } + } +} + +impl fmt::Write for Writer { + fn write_str(&mut self, s: &str) -> fmt::Result { + // if currently on the stack + if self.len <= INLINE_CAP { + let old_len = self.len; + self.len += s.len(); + + // if the new length will fit on the stack (even if it fills it entirely) + if self.len <= INLINE_CAP { + self.inline[old_len..self.len].copy_from_slice(s.as_bytes()); + + return Ok(()); // skip the heap push below + } else { + self.heap.reserve(self.len); + + // copy existing inline bytes over to the heap + unsafe { + self.heap + .as_mut_vec() + .extend_from_slice(&self.inline[..old_len]); + } + } + } + + self.heap.push_str(s); + + Ok(()) + } +} + +impl From for SmolStr { + fn from(value: Writer) -> Self { + SmolStr(if value.len <= INLINE_CAP { + Repr::Inline { + len: unsafe { transmute(value.len as u8) }, + buf: value.inline, + } + } else { + Repr::new(value.heap) + }) + } +} + +impl ToSmolStr for T +where + T: fmt::Display + ?Sized, +{ + fn to_smolstr(&self) -> SmolStr { + format_smolstr!("{}", self) + } +} + #[cfg(feature = "serde")] mod serde { use alloc::{string::String, vec::Vec}; diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 1fbe7d667d..ef5749ac9c 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -258,3 +258,15 @@ fn test_bad_size_hint_char_iter() { assert!(!new.is_heap_allocated()); assert_eq!(new, collected); } + +#[test] +fn test_to_smolstr() { + use smol_str::ToSmolStr; + + for i in 0..26 { + let a = &"abcdefghijklmnopqrstuvwxyz"[i..]; + + assert_eq!(a, a.to_smolstr()); + assert_eq!(a, smol_str::format_smolstr!("{}", a)); + } +} From 5559e23a505fb5a4607548139ab4f27bb52ba188 Mon Sep 17 00:00:00 2001 From: novacrazy Date: Tue, 16 Jan 2024 03:30:06 -0600 Subject: [PATCH 086/132] Cleanup --- lib/smol_str/src/lib.rs | 53 +++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index c81d6ed5e5..37151adcdd 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,12 +1,7 @@ #![no_std] extern crate alloc; -use alloc::{ - borrow::Cow, - boxed::Box, - string::{String, ToString}, - sync::Arc, -}; +use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc}; use core::{ borrow::Borrow, cmp::{self, Ordering}, @@ -41,7 +36,7 @@ pub struct SmolStr(Repr); impl SmolStr { #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { - let _len_is_short = [(); INLINE_CAP + 1][len]; + assert!(len <= INLINE_CAP); const ZEROS: &[u8] = &[0; INLINE_CAP]; @@ -102,9 +97,12 @@ impl SmolStr { self.0.as_str() } + #[allow(clippy::inherent_to_string_shadow_display)] #[inline(always)] pub fn to_string(&self) -> String { - self.as_str().to_string() + use alloc::borrow::ToOwned; + + self.as_str().to_owned() } #[inline(always)] @@ -118,11 +116,8 @@ impl SmolStr { } #[inline(always)] - pub fn is_heap_allocated(&self) -> bool { - match self.0 { - Repr::Heap(..) => true, - _ => false, - } + pub const fn is_heap_allocated(&self) -> bool { + matches!(self.0, Repr::Heap(..)) } fn from_char_iter>(mut iter: I) -> SmolStr { @@ -154,14 +149,19 @@ impl SmolStr { } impl Default for SmolStr { + #[inline(always)] fn default() -> SmolStr { - SmolStr::new("") + SmolStr(Repr::Inline { + len: InlineSize::_V0, + buf: [0; INLINE_CAP], + }) } } impl Deref for SmolStr { type Target = str; + #[inline(always)] fn deref(&self) -> &str { self.as_str() } @@ -237,7 +237,7 @@ impl PartialOrd for SmolStr { impl hash::Hash for SmolStr { fn hash(&self, hasher: &mut H) { - self.as_str().hash(hasher) + self.as_str().hash(hasher); } } @@ -273,11 +273,11 @@ where if size + len > INLINE_CAP { let mut heap = String::with_capacity(size + len); heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); - heap.push_str(&slice); + heap.push_str(slice); heap.extend(iter); return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } - (&mut buf[len..][..size]).copy_from_slice(slice.as_bytes()); + buf[len..][..size].copy_from_slice(slice.as_bytes()); len += size; } SmolStr(Repr::Inline { @@ -516,7 +516,7 @@ impl Repr { #[inline] fn as_str(&self) -> &str { match self { - Repr::Heap(data) => &*data, + Repr::Heap(data) => data, Repr::Static(data) => data, Repr::Inline { len, buf } => { let len = *len as usize; @@ -555,6 +555,7 @@ pub struct Writer { } impl Writer { + #[must_use] pub const fn new() -> Self { Writer { inline: [0; INLINE_CAP], @@ -576,15 +577,15 @@ impl fmt::Write for Writer { self.inline[old_len..self.len].copy_from_slice(s.as_bytes()); return Ok(()); // skip the heap push below - } else { - self.heap.reserve(self.len); + } - // copy existing inline bytes over to the heap - unsafe { - self.heap - .as_mut_vec() - .extend_from_slice(&self.inline[..old_len]); - } + self.heap.reserve(self.len); + + // copy existing inline bytes over to the heap + unsafe { + self.heap + .as_mut_vec() + .extend_from_slice(&self.inline[..old_len]); } } From ebb96d259dd7213ba4df0313d91534a4f986e8f2 Mon Sep 17 00:00:00 2001 From: novacrazy Date: Tue, 16 Jan 2024 03:37:11 -0600 Subject: [PATCH 087/132] Fix new_inline codegen --- lib/smol_str/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 37151adcdd..24b3254fd4 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -62,6 +62,8 @@ impl SmolStr { /// Panics if `text.len() > 23`. #[inline] pub const fn new_inline(text: &str) -> SmolStr { + assert!(text.len() <= INLINE_CAP); // avoids checks in loop + let mut buf = [0; INLINE_CAP]; let mut i = 0; while i < text.len() { From 13cac19135facf35b06d4b6caf31351b93e9a518 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 16 Jan 2024 10:55:38 +0100 Subject: [PATCH 088/132] Publish 0.2.1 --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index c7a646e527..1d04d43767 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.2.0" +version = "0.2.1" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 471f1b6137d79b0cca8d32ea127bc6c93fb28103 Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 17 Jan 2024 19:30:57 +0100 Subject: [PATCH 089/132] add benchmarks This regressed from a previous attempt. The worst of the old results were in the range 450.000 current: test bench::bench_derive_clone ... bench: 1,653,247 ns/iter (+/- 32,781) test bench::bench_match_clone ... bench: 1,716,482 ns/iter (+/- 34,192) test bench::bench_new_clone ... bench: 1,717,985 ns/iter (+/- 52,137) --- lib/smol_str/src/lib.rs | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 24b3254fd4..e5521944c3 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(core_intrinsics, test)] #![no_std] extern crate alloc; @@ -33,7 +34,63 @@ use core::{ #[derive(Clone)] pub struct SmolStr(Repr); +mod bench { + extern crate test; + use test::Bencher; + fn test_strings() -> [crate::SmolStr; 200] { + [0; 200].map(|_| crate::SmolStr::new("0123456780")) + } + #[bench] + fn bench_derive_clone(b: &mut Bencher) { + let it = test::black_box(test_strings()); + b.iter(|| { + (0..1000) + .map(|_| it.iter().map(|e| e.clone())) + .flatten() + .filter(|o| o.is_heap_allocated()) + .count() + }) + } + #[bench] + fn bench_new_clone(b: &mut Bencher) { + let it = test::black_box(test_strings()); + b.iter(|| { + (0..1000) + .map(|_| it.iter().map(|e| e.new_clone())) + .flatten() + .filter(|o| o.is_heap_allocated()) + .count() + }) + } + #[bench] + fn bench_match_clone(b: &mut Bencher) { + let it = test::black_box(test_strings()); + b.iter(|| { + (0..1000) + .map(|_| it.iter().map(|e| e.match_clone())) + .flatten() + .filter(|o| o.is_heap_allocated()) + .count() + }) + } +} + impl SmolStr { + + #[inline(always)] + pub fn new_clone(&self) -> Self { + if !self.is_heap_allocated() { + return unsafe { core::mem::transmute_copy(self) }; + } + Self(self.0.clone()) + } + #[inline(always)] + pub fn match_clone(&self) -> Self { + match &self.0 { + Repr::Heap(h) => return Self(Repr::Heap(h.clone())), + _ => unsafe { core::mem::transmute_copy(self) }, + } + } #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { assert!(len <= INLINE_CAP); From 6df2f06299939c5fe8c485cff96c362c7c9d78c1 Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 17 Jan 2024 19:31:42 +0100 Subject: [PATCH 090/132] fix clone regression test bench::bench_derive_clone ... bench: 454,318 ns/iter (+/- 11,401) test bench::bench_match_clone ... bench: 183,570 ns/iter (+/- 10,652) test bench::bench_new_clone ... bench: 177,907 ns/iter (+/- 2,234) --- lib/smol_str/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index e5521944c3..654ed660ef 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -503,12 +503,12 @@ enum InlineSize { #[derive(Clone, Debug)] enum Repr { - Heap(Arc), - Static(&'static str), Inline { len: InlineSize, buf: [u8; INLINE_CAP], }, + Static(&'static str), + Heap(Arc), } impl Repr { From 1f338f7d8c5e8543935ff654eb43100714be70b5 Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 17 Jan 2024 19:40:14 +0100 Subject: [PATCH 091/132] Manually impl clone --- lib/smol_str/src/lib.rs | 60 +++++------------------------------------ 1 file changed, 7 insertions(+), 53 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 654ed660ef..eda9aa3203 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -31,66 +31,20 @@ use core::{ /// Note that a specialized interner might be a better solution for some use cases. /// /// `WS`: A string of 32 newlines followed by 128 spaces. -#[derive(Clone)] pub struct SmolStr(Repr); -mod bench { - extern crate test; - use test::Bencher; - fn test_strings() -> [crate::SmolStr; 200] { - [0; 200].map(|_| crate::SmolStr::new("0123456780")) - } - #[bench] - fn bench_derive_clone(b: &mut Bencher) { - let it = test::black_box(test_strings()); - b.iter(|| { - (0..1000) - .map(|_| it.iter().map(|e| e.clone())) - .flatten() - .filter(|o| o.is_heap_allocated()) - .count() - }) - } - #[bench] - fn bench_new_clone(b: &mut Bencher) { - let it = test::black_box(test_strings()); - b.iter(|| { - (0..1000) - .map(|_| it.iter().map(|e| e.new_clone())) - .flatten() - .filter(|o| o.is_heap_allocated()) - .count() - }) - } - #[bench] - fn bench_match_clone(b: &mut Bencher) { - let it = test::black_box(test_strings()); - b.iter(|| { - (0..1000) - .map(|_| it.iter().map(|e| e.match_clone())) - .flatten() - .filter(|o| o.is_heap_allocated()) - .count() - }) +impl Clone for SmolStr{ + #[inline] + fn clone(&self) -> Self { + if !self.is_heap_allocated() { + return unsafe { core::ptr::read(self as *const SmolStr) }; + } + Self(self.0.clone()) } } impl SmolStr { - #[inline(always)] - pub fn new_clone(&self) -> Self { - if !self.is_heap_allocated() { - return unsafe { core::mem::transmute_copy(self) }; - } - Self(self.0.clone()) - } - #[inline(always)] - pub fn match_clone(&self) -> Self { - match &self.0 { - Repr::Heap(h) => return Self(Repr::Heap(h.clone())), - _ => unsafe { core::mem::transmute_copy(self) }, - } - } #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { assert!(len <= INLINE_CAP); From 257ece4d7354db26bf4406e6214ba1dc1423a122 Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 17 Jan 2024 20:01:51 +0100 Subject: [PATCH 092/132] remove nightly benchmark feature --- lib/smol_str/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index eda9aa3203..b78a86b1bc 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,4 +1,3 @@ -#![feature(core_intrinsics, test)] #![no_std] extern crate alloc; From 22359392971997fbf5259c20eaa1dd0d7e41a356 Mon Sep 17 00:00:00 2001 From: Anton Sol Date: Wed, 31 Jan 2024 17:40:49 +0100 Subject: [PATCH 093/132] reformat --- lib/smol_str/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index b78a86b1bc..e403233694 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -32,7 +32,7 @@ use core::{ /// `WS`: A string of 32 newlines followed by 128 spaces. pub struct SmolStr(Repr); -impl Clone for SmolStr{ +impl Clone for SmolStr { #[inline] fn clone(&self) -> Self { if !self.is_heap_allocated() { @@ -43,7 +43,6 @@ impl Clone for SmolStr{ } impl SmolStr { - #[deprecated = "Use `new_inline` instead"] pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { assert!(len <= INLINE_CAP); From 79bb5e39b5e655dc391c47c692865cf018e5243d Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Tue, 30 Jan 2024 01:04:53 +0000 Subject: [PATCH 094/132] Add StrExt, to_lowercase_smolstr & friends --- lib/smol_str/src/lib.rs | 55 ++++++++++++++++++++++++++++++++++++++ lib/smol_str/tests/test.rs | 45 ++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index e403233694..79a22b6021 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -545,6 +545,61 @@ pub trait ToSmolStr { fn to_smolstr(&self) -> SmolStr; } +/// [`str`] methods producing [`SmolStr`]s. +pub trait StrExt: private::Sealed { + /// Returns the lowercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_lowercase`]. + fn to_lowercase_smolstr(&self) -> SmolStr; + + /// Returns the uppercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_uppercase`]. + fn to_uppercase_smolstr(&self) -> SmolStr; + + /// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_ascii_lowercase`]. + fn to_ascii_lowercase_smolstr(&self) -> SmolStr; + + /// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_ascii_uppercase`]. + fn to_ascii_uppercase_smolstr(&self) -> SmolStr; +} + +impl StrExt for str { + #[inline] + fn to_lowercase_smolstr(&self) -> SmolStr { + SmolStr::from_char_iter(self.chars().flat_map(|c| c.to_lowercase())) + } + + #[inline] + fn to_uppercase_smolstr(&self) -> SmolStr { + SmolStr::from_char_iter(self.chars().flat_map(|c| c.to_uppercase())) + } + + #[inline] + fn to_ascii_lowercase_smolstr(&self) -> SmolStr { + SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_lowercase())) + } + + #[inline] + fn to_ascii_uppercase_smolstr(&self) -> SmolStr { + SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) + } +} + +mod private { + /// No downstream impls allowed. + pub trait Sealed {} + impl Sealed for str {} +} + /// Formats arguments to a [`SmolStr`], potentially without allocating. /// /// See [`alloc::format!`] or [`format_args!`] for syntax documentation. diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index ef5749ac9c..11b7df710a 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -224,7 +224,7 @@ fn test_from_char_iterator() { // String which has too many characters to even consider inlining: Chars::size_hint uses // (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately // heap allocate - let raw: String = std::iter::repeat('a').take(23 * 4 + 1).collect(); + let raw = "a".repeat(23 * 4 + 1); let s: SmolStr = raw.chars().collect(); assert_eq!(s.as_str(), raw); assert!(s.is_heap_allocated()); @@ -270,3 +270,46 @@ fn test_to_smolstr() { assert_eq!(a, smol_str::format_smolstr!("{}", a)); } } + +#[cfg(test)] +mod test_str_ext { + use smol_str::StrExt; + + #[test] + fn large() { + let lowercase = "aaaaaaAAAAAaaaaaaaaaaaaaaaaaaaaaAAAAaaaaaaaaaaaaaa".to_lowercase_smolstr(); + assert_eq!( + lowercase, + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + ); + assert!(lowercase.is_heap_allocated()); + } + + #[test] + fn to_lowercase() { + let lowercase = "aßΔC".to_lowercase_smolstr(); + assert_eq!(lowercase, "aßδc"); + assert!(!lowercase.is_heap_allocated()); + } + + #[test] + fn to_uppercase() { + let uppercase = "aßΔC".to_uppercase_smolstr(); + assert_eq!(uppercase, "ASSΔC"); + assert!(!uppercase.is_heap_allocated()); + } + + #[test] + fn to_ascii_lowercase() { + let uppercase = "aßΔC".to_ascii_lowercase_smolstr(); + assert_eq!(uppercase, "aßΔc"); + assert!(!uppercase.is_heap_allocated()); + } + + #[test] + fn to_ascii_uppercase() { + let uppercase = "aßΔC".to_ascii_uppercase_smolstr(); + assert_eq!(uppercase, "AßΔC"); + assert!(!uppercase.is_heap_allocated()); + } +} From da4eb11ce953e41a8b53678e257d30da747c7136 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Thu, 8 Feb 2024 19:02:39 +0000 Subject: [PATCH 095/132] Add must_use to StrExt methods --- lib/smol_str/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 79a22b6021..192f972353 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -551,24 +551,28 @@ pub trait StrExt: private::Sealed { /// potentially without allocating. /// /// See [`str::to_lowercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] fn to_lowercase_smolstr(&self) -> SmolStr; /// Returns the uppercase equivalent of this string slice as a new [`SmolStr`], /// potentially without allocating. /// /// See [`str::to_uppercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] fn to_uppercase_smolstr(&self) -> SmolStr; /// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`], /// potentially without allocating. /// /// See [`str::to_ascii_lowercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] fn to_ascii_lowercase_smolstr(&self) -> SmolStr; /// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`], /// potentially without allocating. /// /// See [`str::to_ascii_uppercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] fn to_ascii_uppercase_smolstr(&self) -> SmolStr; } From 29f5ba6318454b601d50630f3b0f853a2232469a Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Wed, 17 Jan 2024 21:55:59 +0100 Subject: [PATCH 096/132] Add `PartialEq` shortcut for `ptr_eq` strings This first compares the `Repr` before falling back to actually comparing the raw `as_str` itself. In some micro-benchmarks, this speeds up inline and heap string comparisons when equal by ~70%. There is a tiny hit in the non-equal case however. It is also noteworthy that the assembly generated for `Repr` is horrible, and looks like its above the inlining threshold now. --- lib/smol_str/src/lib.rs | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 192f972353..375a4a5b6b 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -180,7 +180,7 @@ impl Deref for SmolStr { impl PartialEq for SmolStr { fn eq(&self, other: &SmolStr) -> bool { - self.as_str() == other.as_str() + self.0.ptr_eq(&other.0) || self.as_str() == other.as_str() } } @@ -424,7 +424,7 @@ const _: () = { assert!(WS.as_bytes()[N_NEWLINES] == b' '); }; -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, PartialEq)] #[repr(u8)] enum InlineSize { _V0 = 0, @@ -536,6 +536,24 @@ impl Repr { } } } + + fn ptr_eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0), + (Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0), + ( + Self::Inline { + len: l_len, + buf: l_buf, + }, + Self::Inline { + len: r_len, + buf: r_buf, + }, + ) => l_len == r_len && l_buf == r_buf, + _ => false, + } + } } /// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating. From 860343ab8501ef8a5422c5505347a535dc04e75e Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Thu, 8 Feb 2024 19:00:25 +0000 Subject: [PATCH 097/132] Add StrExt::replace_smolstr, replacen_smolstr --- lib/smol_str/src/lib.rs | 47 ++++++++++++++++++++++++++++++++++---- lib/smol_str/tests/test.rs | 14 ++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 375a4a5b6b..9afe2a932c 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -592,6 +592,22 @@ pub trait StrExt: private::Sealed { /// See [`str::to_ascii_uppercase`]. #[must_use = "this returns a new SmolStr without modifying the original"] fn to_ascii_uppercase_smolstr(&self) -> SmolStr; + + /// Replaces all matches of a &str with another &str returning a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::replace`]. + // TODO: Use `Pattern` when stable. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr; + + /// Replaces first N matches of a &str with another &str returning a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::replacen`]. + // TODO: Use `Pattern` when stable. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr; } impl StrExt for str { @@ -614,6 +630,24 @@ impl StrExt for str { fn to_ascii_uppercase_smolstr(&self) -> SmolStr { SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) } + + #[inline] + fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr { + self.replacen_smolstr(from, to, usize::MAX) + } + + #[inline] + fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr { + let mut result = Writer::new(); + let mut last_end = 0; + for (start, part) in self.match_indices(from).take(count) { + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + SmolStr::from(result) + } } mod private { @@ -651,10 +685,8 @@ impl Writer { len: 0, } } -} -impl fmt::Write for Writer { - fn write_str(&mut self, s: &str) -> fmt::Result { + fn push_str(&mut self, s: &str) { // if currently on the stack if self.len <= INLINE_CAP { let old_len = self.len; @@ -663,8 +695,7 @@ impl fmt::Write for Writer { // if the new length will fit on the stack (even if it fills it entirely) if self.len <= INLINE_CAP { self.inline[old_len..self.len].copy_from_slice(s.as_bytes()); - - return Ok(()); // skip the heap push below + return; // skip the heap push below } self.heap.reserve(self.len); @@ -678,7 +709,13 @@ impl fmt::Write for Writer { } self.heap.push_str(s); + } +} +impl fmt::Write for Writer { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); Ok(()) } } diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 11b7df710a..655f30cbb0 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -312,4 +312,18 @@ mod test_str_ext { assert_eq!(uppercase, "AßΔC"); assert!(!uppercase.is_heap_allocated()); } + + #[test] + fn replace() { + let result = "foo_bar_baz".replace_smolstr("ba", "do"); + assert_eq!(result, "foo_dor_doz"); + assert!(!result.is_heap_allocated()); + } + + #[test] + fn replacen() { + let result = "foo_bar_baz".replacen_smolstr("ba", "do", 1); + assert_eq!(result, "foo_dor_baz"); + assert!(!result.is_heap_allocated()); + } } From 89a555b9b5529a91ef9d5c5e97a677ed1e3877d9 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 14 May 2024 10:09:33 +0200 Subject: [PATCH 098/132] Document unsafe blocks --- lib/smol_str/src/lib.rs | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 9afe2a932c..78c4e9a74e 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -61,7 +61,9 @@ impl SmolStr { } s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); SmolStr(Repr::Inline { - len: unsafe { transmute(len as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we asserted it. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, buf, }) } @@ -80,7 +82,9 @@ impl SmolStr { i += 1 } SmolStr(Repr::Inline { - len: unsafe { transmute(text.len() as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we asserted it. + len: unsafe { InlineSize::transmute_from_u8(text.len() as u8) }, buf, }) } @@ -153,7 +157,9 @@ impl SmolStr { len += size; } SmolStr(Repr::Inline { - len: unsafe { transmute(len as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, buf, }) } @@ -292,7 +298,9 @@ where len += size; } SmolStr(Repr::Inline { - len: unsafe { transmute(len as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, buf, }) } @@ -413,7 +421,7 @@ impl<'a> arbitrary::Arbitrary<'a> for SmolStr { } } -const INLINE_CAP: usize = 23; +const INLINE_CAP: usize = InlineSize::_V23 as usize; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; const WS: &str = @@ -453,6 +461,14 @@ enum InlineSize { _V23, } +impl InlineSize { + #[inline(always)] + const unsafe fn transmute_from_u8(value: u8) -> Self { + debug_assert!(value <= InlineSize::_V23 as u8); + unsafe { transmute::(value) } + } +} + #[derive(Clone, Debug)] enum Repr { Inline { @@ -477,7 +493,8 @@ impl Repr { let mut buf = [0; INLINE_CAP]; buf[..len].copy_from_slice(text.as_bytes()); return Some(Repr::Inline { - len: unsafe { transmute(len as u8) }, + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, buf, }); } @@ -532,6 +549,7 @@ impl Repr { Repr::Inline { len, buf } => { let len = *len as usize; let buf = &buf[..len]; + // SAFETY: buf is guaranteed to be valid utf8 for ..len bytes unsafe { ::core::str::from_utf8_unchecked(buf) } } } @@ -641,10 +659,14 @@ impl StrExt for str { let mut result = Writer::new(); let mut last_end = 0; for (start, part) in self.match_indices(from).take(count) { + // SAFETY: `start` is guaranteed to be within the bounds of `self` as per + // `match_indices` and last_end is always less than or equal to `start` result.push_str(unsafe { self.get_unchecked(last_end..start) }); result.push_str(to); last_end = start + part.len(); } + // SAFETY: `self.len()` is guaranteed to be within the bounds of `self` and last_end is + // always less than or equal to `self.len()` result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); SmolStr::from(result) } @@ -701,6 +723,7 @@ impl Writer { self.heap.reserve(self.len); // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes unsafe { self.heap .as_mut_vec() @@ -724,7 +747,8 @@ impl From for SmolStr { fn from(value: Writer) -> Self { SmolStr(if value.len <= INLINE_CAP { Repr::Inline { - len: unsafe { transmute(value.len as u8) }, + // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(value.len as u8) }, buf: value.inline, } } else { From 7cb89f14c62c15608acd24438d9d58719bcdd5f4 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 14 May 2024 10:15:12 +0200 Subject: [PATCH 099/132] Publish 0.2.2 --- lib/smol_str/Cargo.toml | 2 +- lib/smol_str/tests/test.rs | 3 +++ lib/smol_str/tests/tidy.rs | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 1d04d43767..dcefb03231 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.2.1" +version = "0.2.2" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 655f30cbb0..2e2914d797 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +#[cfg(not(miri))] use proptest::{prop_assert, prop_assert_eq, proptest}; use smol_str::SmolStr; @@ -57,6 +58,7 @@ fn old_const_fn_ctor() { assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); } +#[cfg(not(miri))] fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { prop_assert_eq!(smol.as_str(), std_str); prop_assert_eq!(smol.len(), std_str.len()); @@ -67,6 +69,7 @@ fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner Ok(()) } +#[cfg(not(miri))] proptest! { #[test] fn roundtrip(s: String) { diff --git a/lib/smol_str/tests/tidy.rs b/lib/smol_str/tests/tidy.rs index a716e35b2f..e2d809e40f 100644 --- a/lib/smol_str/tests/tidy.rs +++ b/lib/smol_str/tests/tidy.rs @@ -1,3 +1,4 @@ +#![cfg(not(miri))] use std::{ env, path::{Path, PathBuf}, From 5146c928fdd77afc296c5526389edf23035a9735 Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Sat, 1 Jun 2024 05:41:34 +0100 Subject: [PATCH 100/132] Document crate feature guards --- lib/smol_str/Cargo.toml | 6 +++++- lib/smol_str/src/lib.rs | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index dcefb03231..b04a6f8e5b 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -7,8 +7,12 @@ repository = "https://github.com/rust-analyzer/smol_str" authors = ["Aleksey Kladov "] edition = "2018" +[package.metadata.docs.rs] +rustdoc-args = ["--cfg", "docsrs"] +all-features = true + [dependencies] -serde = { version = "1.0.136", optional = true, default_features = false } +serde = { version = "1.0.136", optional = true, default-features = false } arbitrary = { version = "1.1.0", optional = true } [dev-dependencies] diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 78c4e9a74e..f49cfbfe40 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,4 +1,6 @@ #![no_std] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + extern crate alloc; use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc}; @@ -21,7 +23,7 @@ use core::{ /// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist /// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated -/// * Additionally, a `SmolStr` can be explicitely created from a `&'static str` without allocation +/// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation /// /// Unlike `String`, however, `SmolStr` is immutable. The primary use case for /// `SmolStr` is a good enough default storage for tokens of typical programming From c1327b21ba893b9a1a4fd57450d6bfa50dd95b0a Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Sat, 1 Jun 2024 05:47:05 +0100 Subject: [PATCH 101/132] Migrate CI to actions-rust-lang/setup-rust-toolchain --- lib/smol_str/.github/workflows/ci.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/smol_str/.github/workflows/ci.yaml b/lib/smol_str/.github/workflows/ci.yaml index b1bc2175ca..1c2e347374 100644 --- a/lib/smol_str/.github/workflows/ci.yaml +++ b/lib/smol_str/.github/workflows/ci.yaml @@ -27,11 +27,9 @@ jobs: fetch-depth: 0 - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 + uses: actions-rust-lang/setup-rust-toolchain@v1 with: - toolchain: stable - profile: minimal - override: true + cache: false - run: rustc ./.github/ci.rs && ./ci env: From 28b9403e1579b847041a3cbe39b07019cafe1bec Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 2 Sep 2024 20:40:49 +0200 Subject: [PATCH 102/132] Bump deps --- lib/smol_str/.gitignore | 3 +- lib/smol_str/Cargo.toml | 13 ++--- lib/smol_str/src/lib.rs | 99 +-------------------------------------- lib/smol_str/src/serde.rs | 96 +++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 105 deletions(-) create mode 100644 lib/smol_str/src/serde.rs diff --git a/lib/smol_str/.gitignore b/lib/smol_str/.gitignore index 6b500aacba..0c8227b253 100644 --- a/lib/smol_str/.gitignore +++ b/lib/smol_str/.gitignore @@ -1,3 +1,4 @@ /target /ci -Cargo.lock \ No newline at end of file +/.vscode +Cargo.lock diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index b04a6f8e5b..659c797f5f 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -5,21 +5,22 @@ description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" authors = ["Aleksey Kladov "] -edition = "2018" +edition = "2021" [package.metadata.docs.rs] rustdoc-args = ["--cfg", "docsrs"] all-features = true [dependencies] -serde = { version = "1.0.136", optional = true, default-features = false } -arbitrary = { version = "1.1.0", optional = true } +serde = { version = "1.0", optional = true, default-features = false } +arbitrary = { version = "1.3", optional = true } [dev-dependencies] -proptest = "1.0.0" -serde_json = "1.0.79" -serde = { version = "1.0.136", features = ["derive"] } +proptest = "1.5" +serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } [features] default = ["std"] std = ["serde?/std"] +serde = ["dep:serde"] diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index f49cfbfe40..ca9944ce9b 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -769,101 +769,4 @@ where } #[cfg(feature = "serde")] -mod serde { - use alloc::{string::String, vec::Vec}; - use core::fmt; - - use serde::de::{Deserializer, Error, Unexpected, Visitor}; - - use crate::SmolStr; - - // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 - fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct SmolStrVisitor; - - impl<'a> Visitor<'a> for SmolStrVisitor { - type Value = SmolStr; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a string") - } - - fn visit_str(self, v: &str) -> Result - where - E: Error, - { - Ok(SmolStr::from(v)) - } - - fn visit_borrowed_str(self, v: &'a str) -> Result - where - E: Error, - { - Ok(SmolStr::from(v)) - } - - fn visit_string(self, v: String) -> Result - where - E: Error, - { - Ok(SmolStr::from(v)) - } - - fn visit_bytes(self, v: &[u8]) -> Result - where - E: Error, - { - match core::str::from_utf8(v) { - Ok(s) => Ok(SmolStr::from(s)), - Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), - } - } - - fn visit_borrowed_bytes(self, v: &'a [u8]) -> Result - where - E: Error, - { - match core::str::from_utf8(v) { - Ok(s) => Ok(SmolStr::from(s)), - Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), - } - } - - fn visit_byte_buf(self, v: Vec) -> Result - where - E: Error, - { - match String::from_utf8(v) { - Ok(s) => Ok(SmolStr::from(s)), - Err(e) => Err(Error::invalid_value( - Unexpected::Bytes(&e.into_bytes()), - &self, - )), - } - } - } - - deserializer.deserialize_str(SmolStrVisitor) - } - - impl serde::Serialize for SmolStr { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - self.as_str().serialize(serializer) - } - } - - impl<'de> serde::Deserialize<'de> for SmolStr { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - smol_str(deserializer) - } - } -} +mod serde; diff --git a/lib/smol_str/src/serde.rs b/lib/smol_str/src/serde.rs new file mode 100644 index 0000000000..05b8fecacc --- /dev/null +++ b/lib/smol_str/src/serde.rs @@ -0,0 +1,96 @@ +use alloc::{string::String, vec::Vec}; +use core::fmt; + +use serde::de::{Deserializer, Error, Unexpected, Visitor}; + +use crate::SmolStr; + +// https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 +fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + struct SmolStrVisitor; + + impl<'a> Visitor<'a> for SmolStrVisitor { + type Value = SmolStr; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string") + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_borrowed_str(self, v: &'a str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + match core::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_borrowed_bytes(self, v: &'a [u8]) -> Result + where + E: Error, + { + match core::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + match String::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(e) => Err(Error::invalid_value( + Unexpected::Bytes(&e.into_bytes()), + &self, + )), + } + } + } + + deserializer.deserialize_str(SmolStrVisitor) +} + +impl serde::Serialize for SmolStr { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.as_str().serialize(serializer) + } +} + +impl<'de> serde::Deserialize<'de> for SmolStr { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + smol_str(deserializer) + } +} From 6b0d9ffdb51c920b1ed45f8935e03d938de85d22 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 2 Sep 2024 21:36:34 +0200 Subject: [PATCH 103/132] Various cleanups --- lib/smol_str/Cargo.toml | 1 - lib/smol_str/src/lib.rs | 200 +++++++++++++++++-------------------- lib/smol_str/tests/test.rs | 20 +--- 3 files changed, 95 insertions(+), 126 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 659c797f5f..c1e34e7d7b 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -23,4 +23,3 @@ serde = { version = "1.0", features = ["derive"] } [features] default = ["std"] std = ["serde?/std"] -serde = ["dep:serde"] diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index ca9944ce9b..9845e4da05 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -8,9 +8,7 @@ use core::{ borrow::Borrow, cmp::{self, Ordering}, convert::Infallible, - fmt, hash, iter, - mem::transmute, - ops::Deref, + fmt, hash, iter, mem, ops, str::FromStr, }; @@ -34,53 +32,23 @@ use core::{ /// `WS`: A string of 32 newlines followed by 128 spaces. pub struct SmolStr(Repr); -impl Clone for SmolStr { - #[inline] - fn clone(&self) -> Self { - if !self.is_heap_allocated() { - return unsafe { core::ptr::read(self as *const SmolStr) }; - } - Self(self.0.clone()) - } -} - impl SmolStr { - #[deprecated = "Use `new_inline` instead"] - pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr { - assert!(len <= INLINE_CAP); - - const ZEROS: &[u8] = &[0; INLINE_CAP]; - - let mut buf = [0; INLINE_CAP]; - macro_rules! s { - ($($idx:literal),*) => ( $(s!(set $idx);)* ); - (set $idx:literal) => ({ - let src: &[u8] = [ZEROS, bytes][($idx < len) as usize]; - let byte = src[$idx]; - let _is_ascii = [(); 128][byte as usize]; - buf[$idx] = byte - }); - } - s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); - SmolStr(Repr::Inline { - // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` - // as we asserted it. - len: unsafe { InlineSize::transmute_from_u8(len as u8) }, - buf, - }) - } - - /// Constructs inline variant of `SmolStr`. + /// Constructs an inline variant of `SmolStr`. + /// + /// This never allocates. + /// + /// # Panics /// /// Panics if `text.len() > 23`. #[inline] pub const fn new_inline(text: &str) -> SmolStr { - assert!(text.len() <= INLINE_CAP); // avoids checks in loop + assert!(text.len() <= INLINE_CAP); // avoids bounds checks in loop + let text = text.as_bytes(); let mut buf = [0; INLINE_CAP]; let mut i = 0; while i < text.len() { - buf[i] = text.as_bytes()[i]; + buf[i] = text[i]; i += 1 } SmolStr(Repr::Inline { @@ -102,68 +70,45 @@ impl SmolStr { SmolStr(Repr::Static(text)) } - pub fn new(text: T) -> SmolStr - where - T: AsRef, - { - SmolStr(Repr::new(text)) + /// Constructs a `SmolStr` from a `str`, heap-allocating if necessary. + #[inline(always)] + pub fn new(text: impl AsRef) -> SmolStr { + SmolStr(Repr::new(text.as_ref())) } + /// Returns a `&str` slice of this `SmolStr`. #[inline(always)] pub fn as_str(&self) -> &str { self.0.as_str() } - #[allow(clippy::inherent_to_string_shadow_display)] - #[inline(always)] - pub fn to_string(&self) -> String { - use alloc::borrow::ToOwned; - - self.as_str().to_owned() - } - + /// Returns the length of `self` in bytes. #[inline(always)] pub fn len(&self) -> usize { self.0.len() } + /// Returns `true` if `self` has a length of zero bytes. #[inline(always)] pub fn is_empty(&self) -> bool { self.0.is_empty() } + /// Returns `true` if `self` is heap-allocated. #[inline(always)] pub const fn is_heap_allocated(&self) -> bool { matches!(self.0, Repr::Heap(..)) } +} - fn from_char_iter>(mut iter: I) -> SmolStr { - let (min_size, _) = iter.size_hint(); - if min_size > INLINE_CAP { - let heap: String = iter.collect(); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); +impl Clone for SmolStr { + #[inline] + fn clone(&self) -> Self { + if !self.is_heap_allocated() { + // SAFETY: We verified that the payload of `Repr` is a POD + return unsafe { core::ptr::read(self as *const SmolStr) }; } - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; - while let Some(ch) = iter.next() { - let size = ch.len_utf8(); - if size + len > INLINE_CAP { - let (min_remaining, _) = iter.size_hint(); - let mut heap = String::with_capacity(size + len + min_remaining); - heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); - heap.push(ch); - heap.extend(iter); - return SmolStr(Repr::Heap(heap.into_boxed_str().into())); - } - ch.encode_utf8(&mut buf[len..]); - len += size; - } - SmolStr(Repr::Inline { - // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` - // as we otherwise return early. - len: unsafe { InlineSize::transmute_from_u8(len as u8) }, - buf, - }) + Self(self.0.clone()) } } @@ -177,7 +122,7 @@ impl Default for SmolStr { } } -impl Deref for SmolStr { +impl ops::Deref for SmolStr { type Target = str; #[inline(always)] @@ -186,61 +131,71 @@ impl Deref for SmolStr { } } +// region: PartialEq implementations + +impl Eq for SmolStr {} impl PartialEq for SmolStr { fn eq(&self, other: &SmolStr) -> bool { self.0.ptr_eq(&other.0) || self.as_str() == other.as_str() } } -impl Eq for SmolStr {} - impl PartialEq for SmolStr { + #[inline(always)] fn eq(&self, other: &str) -> bool { self.as_str() == other } } impl PartialEq for str { + #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { other == self } } impl<'a> PartialEq<&'a str> for SmolStr { + #[inline(always)] fn eq(&self, other: &&'a str) -> bool { self == *other } } impl<'a> PartialEq for &'a str { + #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { *self == other } } impl PartialEq for SmolStr { + #[inline(always)] fn eq(&self, other: &String) -> bool { self.as_str() == other } } impl PartialEq for String { + #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { other == self } } impl<'a> PartialEq<&'a String> for SmolStr { + #[inline(always)] fn eq(&self, other: &&'a String) -> bool { self == *other } } impl<'a> PartialEq for &'a String { + #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { *self == other } } +// endregion: PartialEq implementations impl Ord for SmolStr { fn cmp(&self, other: &SmolStr) -> Ordering { @@ -274,11 +229,43 @@ impl fmt::Display for SmolStr { impl iter::FromIterator for SmolStr { fn from_iter>(iter: I) -> SmolStr { - let iter = iter.into_iter(); - Self::from_char_iter(iter) + from_char_iter(iter.into_iter()) } } +fn from_char_iter(mut iter: impl Iterator) -> SmolStr { + let (min_size, _) = iter.size_hint(); + if min_size > INLINE_CAP { + let heap: String = iter.collect(); + if heap.len() <= INLINE_CAP { + // size hint lied + return SmolStr::new_inline(&heap); + } + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + while let Some(ch) = iter.next() { + let size = ch.len_utf8(); + if size + len > INLINE_CAP { + let (min_remaining, _) = iter.size_hint(); + let mut heap = String::with_capacity(size + len + min_remaining); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); + heap.push(ch); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + ch.encode_utf8(&mut buf[len..]); + len += size; + } + SmolStr(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) +} + fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr where T: AsRef, @@ -415,14 +402,6 @@ impl FromStr for SmolStr { } } -#[cfg(feature = "arbitrary")] -impl<'a> arbitrary::Arbitrary<'a> for SmolStr { - fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { - let s = <&str>::arbitrary(u)?; - Ok(SmolStr::new(s)) - } -} - const INLINE_CAP: usize = InlineSize::_V23 as usize; const N_NEWLINES: usize = 32; const N_SPACES: usize = 128; @@ -434,6 +413,7 @@ const _: () = { assert!(WS.as_bytes()[N_NEWLINES] == b' '); }; +/// A [`u8`] with a bunch of niches. #[derive(Clone, Copy, Debug, PartialEq)] #[repr(u8)] enum InlineSize { @@ -464,10 +444,12 @@ enum InlineSize { } impl InlineSize { + /// SAFETY: `value` must be less than or equal to [`INLINE_CAP`] #[inline(always)] const unsafe fn transmute_from_u8(value: u8) -> Self { debug_assert!(value <= InlineSize::_V23 as u8); - unsafe { transmute::(value) } + // SAFETY: The caller is responsible to uphold this invariant + unsafe { mem::transmute::(value) } } } @@ -518,11 +500,8 @@ impl Repr { None } - fn new(text: T) -> Self - where - T: AsRef, - { - Self::new_on_stack(text.as_ref()).unwrap_or_else(|| Repr::Heap(text.as_ref().into())) + fn new(text: &str) -> Self { + Self::new_on_stack(text).unwrap_or_else(|| Repr::Heap(Arc::from(text))) } #[inline(always)] @@ -539,7 +518,7 @@ impl Repr { match self { Repr::Heap(data) => data.is_empty(), Repr::Static(data) => data.is_empty(), - Repr::Inline { len, .. } => *len as u8 == 0, + &Repr::Inline { len, .. } => len as u8 == 0, } } @@ -550,7 +529,8 @@ impl Repr { Repr::Static(data) => data, Repr::Inline { len, buf } => { let len = *len as usize; - let buf = &buf[..len]; + // SAFETY: len is guaranteed to be <= INLINE_CAP + let buf = unsafe { buf.get_unchecked(..len) }; // SAFETY: buf is guaranteed to be valid utf8 for ..len bytes unsafe { ::core::str::from_utf8_unchecked(buf) } } @@ -633,22 +613,22 @@ pub trait StrExt: private::Sealed { impl StrExt for str { #[inline] fn to_lowercase_smolstr(&self) -> SmolStr { - SmolStr::from_char_iter(self.chars().flat_map(|c| c.to_lowercase())) + from_char_iter(self.chars().flat_map(|c| c.to_lowercase())) } #[inline] fn to_uppercase_smolstr(&self) -> SmolStr { - SmolStr::from_char_iter(self.chars().flat_map(|c| c.to_uppercase())) + from_char_iter(self.chars().flat_map(|c| c.to_uppercase())) } #[inline] fn to_ascii_lowercase_smolstr(&self) -> SmolStr { - SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_lowercase())) + from_char_iter(self.chars().map(|c| c.to_ascii_lowercase())) } #[inline] fn to_ascii_uppercase_smolstr(&self) -> SmolStr { - SmolStr::from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) + from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) } #[inline] @@ -754,7 +734,7 @@ impl From for SmolStr { buf: value.inline, } } else { - Repr::new(value.heap) + Repr::new(&value.heap) }) } } @@ -768,5 +748,13 @@ where } } +#[cfg(feature = "arbitrary")] +impl<'a> arbitrary::Arbitrary<'a> for SmolStr { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { + let s = <&str>::arbitrary(u)?; + Ok(SmolStr::new(s)) + } +} + #[cfg(feature = "serde")] mod serde; diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 2e2914d797..631f7d78bf 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -44,20 +44,6 @@ fn const_fn_ctor() { assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); } -#[allow(deprecated)] -#[test] -fn old_const_fn_ctor() { - const EMPTY: SmolStr = SmolStr::new_inline_from_ascii(0, b""); - const A: SmolStr = SmolStr::new_inline_from_ascii(1, b"A"); - const HELLO: SmolStr = SmolStr::new_inline_from_ascii(5, b"HELLO"); - const LONG: SmolStr = SmolStr::new_inline_from_ascii(23, b"ABCDEFGHIZKLMNOPQRSTUVW"); - - assert_eq!(EMPTY, SmolStr::from("")); - assert_eq!(A, SmolStr::from("A")); - assert_eq!(HELLO, SmolStr::from("HELLO")); - assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); -} - #[cfg(not(miri))] fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { prop_assert_eq!(smol.as_str(), std_str); @@ -253,11 +239,7 @@ fn test_bad_size_hint_char_iter() { let collected: SmolStr = BadSizeHint(data.chars()).collect(); let new = SmolStr::new(data); - // Because of the bad size hint, `collected` will be heap allocated, but `new` will be inline - - // If we try to use the type of the string (inline/heap) to quickly test for equality, we need to ensure - // `collected` is inline allocated instead - assert!(collected.is_heap_allocated()); + assert!(!collected.is_heap_allocated()); assert!(!new.is_heap_allocated()); assert_eq!(new, collected); } From a2163274c478f5496146e994c2d880172848bca1 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 2 Sep 2024 21:45:25 +0200 Subject: [PATCH 104/132] Add more trait impls --- lib/smol_str/src/lib.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 9845e4da05..73e757bcad 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -1,4 +1,4 @@ -#![no_std] +#![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_auto_cfg))] extern crate alloc; @@ -319,6 +319,29 @@ impl AsRef for SmolStr { } } +impl AsRef<[u8]> for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &[u8] { + self.as_str().as_bytes() + } +} + +#[cfg(feature = "std")] +impl AsRef for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &std::ffi::OsStr { + AsRef::::as_ref(self.as_str()) + } +} + +#[cfg(feature = "std")] +impl AsRef for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &std::path::Path { + AsRef::::as_ref(self.as_str()) + } +} + impl From<&str> for SmolStr { #[inline] fn from(s: &str) -> SmolStr { From aa63570498b0aa4fc3caae2855d55fbfc2fe0219 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:11:57 +0200 Subject: [PATCH 105/132] Expose SmolStrBuilder --- lib/smol_str/src/lib.rs | 130 +++++++++++++++++++++---------------- lib/smol_str/tests/test.rs | 38 ++++++++++- 2 files changed, 110 insertions(+), 58 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 73e757bcad..448315c338 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -620,7 +620,6 @@ pub trait StrExt: private::Sealed { /// potentially without allocating. /// /// See [`str::replace`]. - // TODO: Use `Pattern` when stable. #[must_use = "this returns a new SmolStr without modifying the original"] fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr; @@ -628,7 +627,6 @@ pub trait StrExt: private::Sealed { /// potentially without allocating. /// /// See [`str::replacen`]. - // TODO: Use `Pattern` when stable. #[must_use = "this returns a new SmolStr without modifying the original"] fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr; } @@ -661,7 +659,7 @@ impl StrExt for str { #[inline] fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr { - let mut result = Writer::new(); + let mut result = SmolStrBuilder::new(); let mut last_end = 0; for (start, part) in self.match_indices(from).take(count) { // SAFETY: `start` is guaranteed to be within the bounds of `self` as per @@ -677,6 +675,15 @@ impl StrExt for str { } } +impl ToSmolStr for T +where + T: fmt::Display + ?Sized, +{ + fn to_smolstr(&self) -> SmolStr { + format_smolstr!("{}", self) + } +} + mod private { /// No downstream impls allowed. pub trait Sealed {} @@ -689,58 +696,84 @@ mod private { #[macro_export] macro_rules! format_smolstr { ($($tt:tt)*) => {{ - use ::core::fmt::Write; - let mut w = $crate::Writer::new(); - w.write_fmt(format_args!($($tt)*)).expect("a formatting trait implementation returned an error"); - $crate::SmolStr::from(w) + let mut w = $crate::SmolStrBuilder::new(); + ::core::fmt::Write::write_fmt(&mut w, format_args!($($tt)*)).expect("a formatting trait implementation returned an error"); + w.finish() }}; } -#[doc(hidden)] -pub struct Writer { - inline: [u8; INLINE_CAP], - heap: String, - len: usize, +/// A builder that can be used to efficiently build a [`SmolStr`]. +/// +/// This won't allocate if the final string fits into the inline buffer. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum SmolStrBuilder { + Inline { len: usize, buf: [u8; INLINE_CAP] }, + Heap(String), } -impl Writer { +impl Default for SmolStrBuilder { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl SmolStrBuilder { + /// Creates a new empty [`SmolStrBuilder`]. #[must_use] pub const fn new() -> Self { - Writer { - inline: [0; INLINE_CAP], - heap: String::new(), + SmolStrBuilder::Inline { + buf: [0; INLINE_CAP], len: 0, } } - fn push_str(&mut self, s: &str) { + /// Builds a [`SmolStr`] from `self`. + #[must_use] + pub fn finish(&self) -> SmolStr { + SmolStr(match self { + &SmolStrBuilder::Inline { len, buf } => { + debug_assert!(len <= INLINE_CAP); + Repr::Inline { + // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + } + } + SmolStrBuilder::Heap(heap) => Repr::new(heap), + }) + } + + /// Appends a given string slice onto the end of `self`'s buffer. + pub fn push_str(&mut self, s: &str) { // if currently on the stack - if self.len <= INLINE_CAP { - let old_len = self.len; - self.len += s.len(); + match self { + Self::Inline { len, buf } => { + let old_len = *len; + *len += s.len(); - // if the new length will fit on the stack (even if it fills it entirely) - if self.len <= INLINE_CAP { - self.inline[old_len..self.len].copy_from_slice(s.as_bytes()); - return; // skip the heap push below - } - - self.heap.reserve(self.len); - - // copy existing inline bytes over to the heap - // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes - unsafe { - self.heap - .as_mut_vec() - .extend_from_slice(&self.inline[..old_len]); + // if the new length will fit on the stack (even if it fills it entirely) + if *len <= INLINE_CAP { + buf[old_len..*len].copy_from_slice(s.as_bytes()); + return; // skip the heap push below + } + + let mut heap = String::with_capacity(*len); + + // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes + unsafe { + heap.as_mut_vec().extend_from_slice(&buf[..old_len]); + } + heap.push_str(s); + *self = SmolStrBuilder::Heap(heap); } + SmolStrBuilder::Heap(heap) => heap.push_str(s), } - - self.heap.push_str(s); } } -impl fmt::Write for Writer { +impl fmt::Write for SmolStrBuilder { #[inline] fn write_str(&mut self, s: &str) -> fmt::Result { self.push_str(s); @@ -748,26 +781,9 @@ impl fmt::Write for Writer { } } -impl From for SmolStr { - fn from(value: Writer) -> Self { - SmolStr(if value.len <= INLINE_CAP { - Repr::Inline { - // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` - len: unsafe { InlineSize::transmute_from_u8(value.len as u8) }, - buf: value.inline, - } - } else { - Repr::new(&value.heap) - }) - } -} - -impl ToSmolStr for T -where - T: fmt::Display + ?Sized, -{ - fn to_smolstr(&self) -> SmolStr { - format_smolstr!("{}", self) +impl From for SmolStr { + fn from(value: SmolStrBuilder) -> Self { + value.finish() } } diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 631f7d78bf..0d553caabc 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -3,7 +3,7 @@ use std::sync::Arc; #[cfg(not(miri))] use proptest::{prop_assert, prop_assert_eq, proptest}; -use smol_str::SmolStr; +use smol_str::{SmolStr, SmolStrBuilder}; #[test] #[cfg(target_pointer_width = "64")] @@ -255,6 +255,42 @@ fn test_to_smolstr() { assert_eq!(a, smol_str::format_smolstr!("{}", a)); } } +#[test] +fn test_builder() { + //empty + let builder = SmolStrBuilder::new(); + assert_eq!("", builder.finish()); + + // inline push + let mut builder = SmolStrBuilder::new(); + builder.push_str("a"); + builder.push_str("b"); + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("ab", s); + + // inline max push + let mut builder = SmolStrBuilder::new(); + builder.push_str(&"a".repeat(23)); + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("a".repeat(23), s); + + // heap push immediate + let mut builder = SmolStrBuilder::new(); + builder.push_str(&"a".repeat(24)); + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("a".repeat(24), s); + + // heap push succession + let mut builder = SmolStrBuilder::new(); + builder.push_str(&"a".repeat(23)); + builder.push_str(&"a".repeat(23)); + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("a".repeat(46), s); +} #[cfg(test)] mod test_str_ext { From bf431789d3b5f982ac6eba379f3945791572906a Mon Sep 17 00:00:00 2001 From: Corvin Paul Date: Wed, 17 Apr 2024 12:28:14 +0100 Subject: [PATCH 106/132] Add support for borsh --- lib/smol_str/Cargo.toml | 3 +- lib/smol_str/src/borsh.rs | 58 ++++++++++++++++++++++++++++++++++++++ lib/smol_str/src/lib.rs | 2 ++ lib/smol_str/tests/test.rs | 52 ++++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 lib/smol_str/src/borsh.rs diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index c1e34e7d7b..7dd7a5f9bb 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -13,6 +13,7 @@ all-features = true [dependencies] serde = { version = "1.0", optional = true, default-features = false } +borsh = { version = "1.4.0", optional = true, default-features = false } arbitrary = { version = "1.3", optional = true } [dev-dependencies] @@ -22,4 +23,4 @@ serde = { version = "1.0", features = ["derive"] } [features] default = ["std"] -std = ["serde?/std"] +std = ["serde?/std", "borsh?/std"] diff --git a/lib/smol_str/src/borsh.rs b/lib/smol_str/src/borsh.rs new file mode 100644 index 0000000000..12580cb4f2 --- /dev/null +++ b/lib/smol_str/src/borsh.rs @@ -0,0 +1,58 @@ +use crate::{Repr, SmolStr, INLINE_CAP}; +use alloc::string::{String, ToString}; +use borsh::io::{Error, ErrorKind, Read, Write}; +use borsh::{BorshDeserialize, BorshSerialize}; +use core::intrinsics::transmute; + +impl BorshSerialize for SmolStr { + fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { + self.as_str().serialize(writer) + } +} + +impl BorshDeserialize for SmolStr { + #[inline] + fn deserialize_reader(reader: &mut R) -> borsh::io::Result { + let len = u32::deserialize_reader(reader)?; + if (len as usize) < INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + reader.read_exact(&mut buf[..len as usize])?; + _ = core::str::from_utf8(&buf[..len as usize]).map_err(|err| { + let msg = err.to_string(); + Error::new(ErrorKind::InvalidData, msg) + })?; + Ok(SmolStr(Repr::Inline { + len: unsafe { transmute(len as u8) }, + buf, + })) + } else { + // u8::vec_from_reader always returns Some on success in current implementation + let vec = u8::vec_from_reader(len, reader)?.ok_or_else(|| { + Error::new( + ErrorKind::Other, + "u8::vec_from_reader unexpectedly returned None".to_string(), + ) + })?; + Ok(SmolStr::from(String::from_utf8(vec).map_err(|err| { + let msg = err.to_string(); + Error::new(ErrorKind::InvalidData, msg) + })?)) + } + } +} + +#[cfg(feature = "borsh/unstable__schema")] +mod schema { + use alloc::collections::BTreeMap; + use borsh::schema::{Declaration, Definition}; + use borsh::BorshSchema; + impl BorshSchema for SmolStr { + fn add_definitions_recursively(definitions: &mut BTreeMap) { + str::add_definitions_recursively(definitions) + } + + fn declaration() -> Declaration { + str::declaration() + } + } +} diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 448315c338..cc8612ee45 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -795,5 +795,7 @@ impl<'a> arbitrary::Arbitrary<'a> for SmolStr { } } +#[cfg(feature = "borsh")] +mod borsh; #[cfg(feature = "serde")] mod serde; diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 0d553caabc..22b9df2afd 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -348,3 +348,55 @@ mod test_str_ext { assert!(!result.is_heap_allocated()); } } +#[cfg(feature = "borsh")] + +mod borsh_tests { + use borsh::BorshDeserialize; + use smol_str::{SmolStr, ToSmolStr}; + use std::io::Cursor; + + #[test] + fn borsh_serialize_stack() { + let smolstr_on_stack = "aßΔCaßδc".to_smolstr(); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&smolstr_on_stack, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap(); + assert_eq!(smolstr_on_stack, decoded); + } + #[test] + fn borsh_serialize_heap() { + let smolstr_on_heap = "aßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδc".to_smolstr(); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&smolstr_on_heap, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap(); + assert_eq!(smolstr_on_heap, decoded); + } + #[test] + fn borsh_non_utf8_stack() { + let invalid_utf8: Vec = vec![0xF0, 0x9F, 0x8F]; // Incomplete UTF-8 sequence + + let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) }); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let result = SmolStr::deserialize_reader(&mut cursor); + assert!(result.is_err()); + } + + #[test] + fn borsh_non_utf8_heap() { + let invalid_utf8: Vec = vec![ + 0xC1, 0x8A, 0x5F, 0xE2, 0x3A, 0x9E, 0x3B, 0xAA, 0x01, 0x08, 0x6F, 0x2F, 0xC0, 0x32, + 0xAB, 0xE1, 0x9A, 0x2F, 0x4A, 0x3F, 0x25, 0x0D, 0x8A, 0x2A, 0x19, 0x11, 0xF0, 0x7F, + 0x0E, 0x80, + ]; + let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) }); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let result = SmolStr::deserialize_reader(&mut cursor); + assert!(result.is_err()); + } +} From cf33d6ddb51a3170b23b9751fbab6abab537f7b9 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:18:31 +0200 Subject: [PATCH 107/132] Drop unstable__schema --- lib/smol_str/src/borsh.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/lib/smol_str/src/borsh.rs b/lib/smol_str/src/borsh.rs index 12580cb4f2..5617bce371 100644 --- a/lib/smol_str/src/borsh.rs +++ b/lib/smol_str/src/borsh.rs @@ -40,19 +40,3 @@ impl BorshDeserialize for SmolStr { } } } - -#[cfg(feature = "borsh/unstable__schema")] -mod schema { - use alloc::collections::BTreeMap; - use borsh::schema::{Declaration, Definition}; - use borsh::BorshSchema; - impl BorshSchema for SmolStr { - fn add_definitions_recursively(definitions: &mut BTreeMap) { - str::add_definitions_recursively(definitions) - } - - fn declaration() -> Declaration { - str::declaration() - } - } -} From 6216c056908e718dfe4b48da740b915f55feab20 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:27:28 +0200 Subject: [PATCH 108/132] SmolStrBuilder::push --- lib/smol_str/src/lib.rs | 27 +++++++++++++++++++++++---- lib/smol_str/tests/test.rs | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index cc8612ee45..9cc21ec780 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -744,9 +744,30 @@ impl SmolStrBuilder { }) } + /// Appends the given [`char`] to the end of `self`'s buffer. + pub fn push(&mut self, c: char) { + match self { + SmolStrBuilder::Inline { len, buf } => { + let char_len = c.len_utf8(); + let new_len = *len + char_len; + if new_len <= INLINE_CAP { + c.encode_utf8(&mut buf[*len..]); + *len += char_len; + } else { + let mut heap = String::with_capacity(new_len); + // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes + unsafe { heap.as_mut_vec().extend_from_slice(buf) }; + heap.push(c); + *self = SmolStrBuilder::Heap(heap); + } + } + SmolStrBuilder::Heap(h) => h.push(c), + } + } + /// Appends a given string slice onto the end of `self`'s buffer. pub fn push_str(&mut self, s: &str) { - // if currently on the stack match self { Self::Inline { len, buf } => { let old_len = *len; @@ -762,9 +783,7 @@ impl SmolStrBuilder { // copy existing inline bytes over to the heap // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes - unsafe { - heap.as_mut_vec().extend_from_slice(&buf[..old_len]); - } + unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) }; heap.push_str(s); *self = SmolStrBuilder::Heap(heap); } diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 22b9df2afd..81bccf106e 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -256,7 +256,7 @@ fn test_to_smolstr() { } } #[test] -fn test_builder() { +fn test_builder_push_str() { //empty let builder = SmolStrBuilder::new(); assert_eq!("", builder.finish()); @@ -292,6 +292,39 @@ fn test_builder() { assert_eq!("a".repeat(46), s); } +#[test] +fn test_builder_push() { + //empty + let builder = SmolStrBuilder::new(); + assert_eq!("", builder.finish()); + + // inline push + let mut builder = SmolStrBuilder::new(); + builder.push('a'); + builder.push('b'); + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("ab", s); + + // inline max push + let mut builder = SmolStrBuilder::new(); + for _ in 0..23 { + builder.push('a'); + } + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("a".repeat(23), s); + + // heap push + let mut builder = SmolStrBuilder::new(); + for _ in 0..24 { + builder.push('a'); + } + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("a".repeat(24), s); +} + #[cfg(test)] mod test_str_ext { use smol_str::StrExt; From e172e4ee256cca5d1f4dbf99ffdac88718636fcd Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:30:04 +0200 Subject: [PATCH 109/132] Publish 0.3.0 --- lib/smol_str/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 7dd7a5f9bb..1f6b0f72d7 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "smol_str" -version = "0.2.2" +version = "0.3.0" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" -authors = ["Aleksey Kladov "] +authors = ["Aleksey Kladov ", "Lukas Wirth "] edition = "2021" [package.metadata.docs.rs] From 4c0d45e59bd47e4dac9621c11c5e1d45b6f945e7 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:39:50 +0200 Subject: [PATCH 110/132] Make SmolStrBuilder fields private --- lib/smol_str/src/lib.rs | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 9cc21ec780..e3a8ef8b8a 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -705,16 +705,22 @@ macro_rules! format_smolstr { /// A builder that can be used to efficiently build a [`SmolStr`]. /// /// This won't allocate if the final string fits into the inline buffer. +#[derive(Clone, Default, Debug, PartialEq, Eq)] +pub struct SmolStrBuilder(SmolStrBuilderRepr); + #[derive(Clone, Debug, PartialEq, Eq)] -pub enum SmolStrBuilder { +enum SmolStrBuilderRepr { Inline { len: usize, buf: [u8; INLINE_CAP] }, Heap(String), } -impl Default for SmolStrBuilder { +impl Default for SmolStrBuilderRepr { #[inline] fn default() -> Self { - Self::new() + SmolStrBuilderRepr::Inline { + buf: [0; INLINE_CAP], + len: 0, + } } } @@ -722,17 +728,17 @@ impl SmolStrBuilder { /// Creates a new empty [`SmolStrBuilder`]. #[must_use] pub const fn new() -> Self { - SmolStrBuilder::Inline { + Self(SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0, - } + }) } /// Builds a [`SmolStr`] from `self`. #[must_use] pub fn finish(&self) -> SmolStr { - SmolStr(match self { - &SmolStrBuilder::Inline { len, buf } => { + SmolStr(match &self.0 { + &SmolStrBuilderRepr::Inline { len, buf } => { debug_assert!(len <= INLINE_CAP); Repr::Inline { // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` @@ -740,14 +746,14 @@ impl SmolStrBuilder { buf, } } - SmolStrBuilder::Heap(heap) => Repr::new(heap), + SmolStrBuilderRepr::Heap(heap) => Repr::new(heap), }) } /// Appends the given [`char`] to the end of `self`'s buffer. pub fn push(&mut self, c: char) { - match self { - SmolStrBuilder::Inline { len, buf } => { + match &mut self.0 { + SmolStrBuilderRepr::Inline { len, buf } => { let char_len = c.len_utf8(); let new_len = *len + char_len; if new_len <= INLINE_CAP { @@ -759,17 +765,17 @@ impl SmolStrBuilder { // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes unsafe { heap.as_mut_vec().extend_from_slice(buf) }; heap.push(c); - *self = SmolStrBuilder::Heap(heap); + self.0 = SmolStrBuilderRepr::Heap(heap); } } - SmolStrBuilder::Heap(h) => h.push(c), + SmolStrBuilderRepr::Heap(h) => h.push(c), } } /// Appends a given string slice onto the end of `self`'s buffer. pub fn push_str(&mut self, s: &str) { - match self { - Self::Inline { len, buf } => { + match &mut self.0 { + SmolStrBuilderRepr::Inline { len, buf } => { let old_len = *len; *len += s.len(); @@ -785,9 +791,9 @@ impl SmolStrBuilder { // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) }; heap.push_str(s); - *self = SmolStrBuilder::Heap(heap); + self.0 = SmolStrBuilderRepr::Heap(heap); } - SmolStrBuilder::Heap(heap) => heap.push_str(s), + SmolStrBuilderRepr::Heap(heap) => heap.push_str(s), } } } From 6db8eda235c41cafd3cb083d3bdfc6d6e20e8a9f Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 09:40:01 +0200 Subject: [PATCH 111/132] Publish 0.3.1 --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 1f6b0f72d7..18506059d8 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.3.0" +version = "0.3.1" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 5e0ab10e12c5170c1775e663dd1d4713b79042e8 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 3 Sep 2024 10:03:47 +0200 Subject: [PATCH 112/132] Add CHANGELOG.md --- lib/smol_str/CHANGELOG.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 lib/smol_str/CHANGELOG.md diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md new file mode 100644 index 0000000000..82e66e1ed4 --- /dev/null +++ b/lib/smol_str/CHANGELOG.md @@ -0,0 +1,14 @@ +# Changelog + +## 0.3.1 - 2024-09-04 + +- Fix `SmolStrBuilder` leaking implementation details + +## 0.3.0 - 2024-09-04 + +- Removed deprecated `SmolStr::new_inline_from_ascii` function +- Removed `SmolStr::to_string` in favor of `ToString::to_string` +- Added `impl AsRef<[u8]> for SmolStr` impl +- Added `impl AsRef for SmolStr` impl +- Added `impl AsRef for SmolStr` impl +- Added `SmolStrBuilder` From 5ca72343dc34d0651bd971be9cfa20100779793a Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Tue, 3 Sep 2024 09:21:42 +0100 Subject: [PATCH 113/132] Add 0.2.2 changelog & fix lints --- lib/smol_str/CHANGELOG.md | 17 +++++++++++------ lib/smol_str/src/borsh.rs | 2 +- lib/smol_str/src/lib.rs | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index 82e66e1ed4..f407bc11a5 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -6,9 +6,14 @@ ## 0.3.0 - 2024-09-04 -- Removed deprecated `SmolStr::new_inline_from_ascii` function -- Removed `SmolStr::to_string` in favor of `ToString::to_string` -- Added `impl AsRef<[u8]> for SmolStr` impl -- Added `impl AsRef for SmolStr` impl -- Added `impl AsRef for SmolStr` impl -- Added `SmolStrBuilder` +- Remove deprecated `SmolStr::new_inline_from_ascii` function +- Remove `SmolStr::to_string` in favor of `ToString::to_string` +- Add `impl AsRef<[u8]> for SmolStr` impl +- Add `impl AsRef for SmolStr` impl +- Add `impl AsRef for SmolStr` impl +- Add `SmolStrBuilder` + +## 0.2.2 - 2024-05-14 + +- Add `StrExt` trait providing `to_lowercase_smolstr`, `replace_smolstr` and similar +- Add `PartialEq` optimisation for `ptr_eq`-able representations diff --git a/lib/smol_str/src/borsh.rs b/lib/smol_str/src/borsh.rs index 5617bce371..362c288d01 100644 --- a/lib/smol_str/src/borsh.rs +++ b/lib/smol_str/src/borsh.rs @@ -22,7 +22,7 @@ impl BorshDeserialize for SmolStr { Error::new(ErrorKind::InvalidData, msg) })?; Ok(SmolStr(Repr::Inline { - len: unsafe { transmute(len as u8) }, + len: unsafe { transmute::(len as u8) }, buf, })) } else { diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index e3a8ef8b8a..d00ec98915 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -19,7 +19,7 @@ use core::{ /// * Strings are stack-allocated if they are: /// * Up to 23 bytes long /// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist -/// solely of consecutive newlines, followed by consecutive spaces +/// solely of consecutive newlines, followed by consecutive spaces /// * If a string does not satisfy the aforementioned conditions, it is heap-allocated /// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation /// From b5d68d17eb4ae181d08cf72c71e633a59d0720b8 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Tue, 3 Sep 2024 09:47:53 +0100 Subject: [PATCH 114/132] Update CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Laurențiu Nicola --- lib/smol_str/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index f407bc11a5..c1346a28a4 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -16,4 +16,4 @@ ## 0.2.2 - 2024-05-14 - Add `StrExt` trait providing `to_lowercase_smolstr`, `replace_smolstr` and similar -- Add `PartialEq` optimisation for `ptr_eq`-able representations +- Add `PartialEq` optimization for `ptr_eq`-able representations From 037436ef16b20d6eb5124e5db98daf40dbcd2135 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Wed, 23 Oct 2024 22:53:32 +0200 Subject: [PATCH 115/132] Fix SmoLStrBuilder pushing null bytes on heap spill --- lib/smol_str/CHANGELOG.md | 5 +++++ lib/smol_str/src/lib.rs | 2 +- lib/smol_str/tests/test.rs | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index c1346a28a4..41f1377430 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 0.3.2 - 2024-10-23 + +- Fix `SmolStrBuilder::push` incorrectly padding null bytes when spilling onto the heap on a + multibyte character push + ## 0.3.1 - 2024-09-04 - Fix `SmolStrBuilder` leaking implementation details diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index d00ec98915..bf88f57cf8 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -763,7 +763,7 @@ impl SmolStrBuilder { let mut heap = String::with_capacity(new_len); // copy existing inline bytes over to the heap // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes - unsafe { heap.as_mut_vec().extend_from_slice(buf) }; + unsafe { heap.as_mut_vec().extend_from_slice(&buf[..*len]) }; heap.push(c); self.0 = SmolStrBuilderRepr::Heap(heap); } diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 81bccf106e..96b8b8f7f0 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -255,6 +255,7 @@ fn test_to_smolstr() { assert_eq!(a, smol_str::format_smolstr!("{}", a)); } } + #[test] fn test_builder_push_str() { //empty @@ -290,6 +291,14 @@ fn test_builder_push_str() { let s = builder.finish(); assert!(s.is_heap_allocated()); assert_eq!("a".repeat(46), s); + + // heap push on multibyte char + let mut builder = SmolStrBuilder::new(); + builder.push_str("ohnonononononononono!"); + builder.push('🤯'); + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("ohnonononononononono!🤯", s); } #[test] From 076e315d60a53fe3d431b85d12344a7a0228bc72 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Wed, 23 Oct 2024 22:54:26 +0200 Subject: [PATCH 116/132] Publish 0.3.2 --- lib/smol_str/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 18506059d8..e89e0e8e02 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.3.1" +version = "0.3.2" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 3e6863eeb16b6d27f18e7bdc6dba24b9d54aa240 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 12:34:12 +0100 Subject: [PATCH 117/132] Fix lints --- lib/smol_str/src/borsh.rs | 16 +++++++--------- lib/smol_str/src/lib.rs | 6 +++--- lib/smol_str/tests/test.rs | 2 +- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/lib/smol_str/src/borsh.rs b/lib/smol_str/src/borsh.rs index 362c288d01..ebb20d71a0 100644 --- a/lib/smol_str/src/borsh.rs +++ b/lib/smol_str/src/borsh.rs @@ -1,8 +1,10 @@ use crate::{Repr, SmolStr, INLINE_CAP}; use alloc::string::{String, ToString}; -use borsh::io::{Error, ErrorKind, Read, Write}; -use borsh::{BorshDeserialize, BorshSerialize}; -use core::intrinsics::transmute; +use borsh::{ + io::{Error, ErrorKind, Read, Write}, + BorshDeserialize, BorshSerialize, +}; +use core::mem::transmute; impl BorshSerialize for SmolStr { fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { @@ -27,12 +29,8 @@ impl BorshDeserialize for SmolStr { })) } else { // u8::vec_from_reader always returns Some on success in current implementation - let vec = u8::vec_from_reader(len, reader)?.ok_or_else(|| { - Error::new( - ErrorKind::Other, - "u8::vec_from_reader unexpectedly returned None".to_string(), - ) - })?; + let vec = u8::vec_from_reader(len, reader)? + .ok_or_else(|| Error::other("u8::vec_from_reader unexpectedly returned None"))?; Ok(SmolStr::from(String::from_utf8(vec).map_err(|err| { let msg = err.to_string(); Error::new(ErrorKind::InvalidData, msg) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index bf88f57cf8..d76f029dbe 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -161,7 +161,7 @@ impl<'a> PartialEq<&'a str> for SmolStr { } } -impl<'a> PartialEq for &'a str { +impl PartialEq for &str { #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { *self == other @@ -189,7 +189,7 @@ impl<'a> PartialEq<&'a String> for SmolStr { } } -impl<'a> PartialEq for &'a String { +impl PartialEq for &String { #[inline(always)] fn eq(&self, other: &SmolStr) -> bool { *self == other @@ -380,7 +380,7 @@ impl From> for SmolStr { impl From> for SmolStr { #[inline] fn from(s: Arc) -> SmolStr { - let repr = Repr::new_on_stack(s.as_ref()).unwrap_or_else(|| Repr::Heap(s)); + let repr = Repr::new_on_stack(s.as_ref()).unwrap_or(Repr::Heap(s)); Self(repr) } } diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 96b8b8f7f0..0070b3a5ec 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -390,8 +390,8 @@ mod test_str_ext { assert!(!result.is_heap_allocated()); } } -#[cfg(feature = "borsh")] +#[cfg(feature = "borsh")] mod borsh_tests { use borsh::BorshDeserialize; use smol_str::{SmolStr, ToSmolStr}; From 4cdc1b1544aae94c0ff487bea8ffba91f0bbae31 Mon Sep 17 00:00:00 2001 From: james7132 Date: Mon, 22 Sep 2025 01:29:52 -0700 Subject: [PATCH 118/132] Use serde_core over serde --- lib/smol_str/Cargo.toml | 5 +++-- lib/smol_str/src/serde.rs | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index e89e0e8e02..d938e40ac2 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -12,7 +12,7 @@ rustdoc-args = ["--cfg", "docsrs"] all-features = true [dependencies] -serde = { version = "1.0", optional = true, default-features = false } +serde_core = { version = "1.0.220", optional = true, default-features = false } borsh = { version = "1.4.0", optional = true, default-features = false } arbitrary = { version = "1.3", optional = true } @@ -23,4 +23,5 @@ serde = { version = "1.0", features = ["derive"] } [features] default = ["std"] -std = ["serde?/std", "borsh?/std"] +std = ["serde_core?/std", "borsh?/std"] +serde = ["dep:serde_core"] diff --git a/lib/smol_str/src/serde.rs b/lib/smol_str/src/serde.rs index 05b8fecacc..4f08b444c5 100644 --- a/lib/smol_str/src/serde.rs +++ b/lib/smol_str/src/serde.rs @@ -2,6 +2,7 @@ use alloc::{string::String, vec::Vec}; use core::fmt; use serde::de::{Deserializer, Error, Unexpected, Visitor}; +use serde_core as serde; use crate::SmolStr; From a267e0905e8ed844ecad8e40d804adf662a2b11c Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 00:36:31 +0100 Subject: [PATCH 119/132] Optimise to_ascii_{upper,lower}case_smolstr --- lib/smol_str/src/lib.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index d76f029dbe..ff25651f54 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -644,12 +644,36 @@ impl StrExt for str { #[inline] fn to_ascii_lowercase_smolstr(&self) -> SmolStr { - from_char_iter(self.chars().map(|c| c.to_ascii_lowercase())) + let len = self.len(); + if len <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + buf[..len].copy_from_slice(self.as_bytes()); + buf[..len].make_ascii_lowercase(); + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) + } else { + self.to_ascii_lowercase().into() + } } #[inline] fn to_ascii_uppercase_smolstr(&self) -> SmolStr { - from_char_iter(self.chars().map(|c| c.to_ascii_uppercase())) + let len = self.len(); + if len <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + buf[..len].copy_from_slice(self.as_bytes()); + buf[..len].make_ascii_uppercase(); + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) + } else { + self.to_ascii_uppercase().into() + } } #[inline] From f8612bc59685172a93d1664789e07b82197656e1 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 01:38:41 +0100 Subject: [PATCH 120/132] Update changelog --- lib/smol_str/CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index 41f1377430..190d6e8309 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Unreleased + +- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` + ~2x speedup inline, ~4-22x for heap. + ## 0.3.2 - 2024-10-23 - Fix `SmolStrBuilder::push` incorrectly padding null bytes when spilling onto the heap on a From 09ecb4609499ac3286ac4bd870abc357f6b7e6bf Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sat, 13 Sep 2025 16:34:55 +0100 Subject: [PATCH 121/132] Add SmolStr vs String benchmarks --- lib/smol_str/Cargo.toml | 9 ++ lib/smol_str/README.md | 6 ++ lib/smol_str/benches/bench.rs | 157 ++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 lib/smol_str/benches/bench.rs diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index d938e40ac2..e6f10a2715 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -20,8 +20,17 @@ arbitrary = { version = "1.3", optional = true } proptest = "1.5" serde_json = "1.0" serde = { version = "1.0", features = ["derive"] } +criterion = "0.7" +rand = "0.9.2" [features] default = ["std"] std = ["serde_core?/std", "borsh?/std"] serde = ["dep:serde_core"] + +[[bench]] +name = "bench" +harness = false + +[profile.bench] +lto = "fat" diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md index ce16759e81..56296fb53f 100644 --- a/lib/smol_str/README.md +++ b/lib/smol_str/README.md @@ -22,6 +22,12 @@ languages. Strings consisting of a series of newlines, followed by a series of whitespace are a typical pattern in computer programs because of indentation. Note that a specialized interner might be a better solution for some use cases. +## Benchmarks +Run criterion benches with +```sh +cargo bench --bench \* -- --quick +``` + ## MSRV Policy Minimal Supported Rust Version: latest stable. diff --git a/lib/smol_str/benches/bench.rs b/lib/smol_str/benches/bench.rs new file mode 100644 index 0000000000..fa4c58832d --- /dev/null +++ b/lib/smol_str/benches/bench.rs @@ -0,0 +1,157 @@ +//! SmolStr vs String benchmarks. +use criterion::{criterion_group, criterion_main, Criterion}; +use rand::distr::{Alphanumeric, SampleString}; +use smol_str::{format_smolstr, SmolStr, StrExt, ToSmolStr}; +use std::hint::black_box; + +/// 12: small (inline) +/// 50: medium (heap) +/// 1000: large (heap) +const TEST_LENS: [usize; 3] = [12, 50, 1000]; + +fn format_bench(c: &mut Criterion) { + for len in TEST_LENS { + let n = rand::random_range(10000..99999); + let str_len = len.checked_sub(n.to_smolstr().len()).unwrap(); + let str = Alphanumeric.sample_string(&mut rand::rng(), str_len); + + c.bench_function(&format!("SmolStr format_smolstr! len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = format_smolstr!("{str}-{n}")); + assert_eq!(v, format!("{str}-{n}")); + }); + c.bench_function(&format!("std format! len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = format!("{str}-{n}")); + assert_eq!(v, format!("{str}-{n}")); + }); + } +} + +fn from_str_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + + c.bench_function(&format!("SmolStr::from len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = SmolStr::from(black_box(&str))); + assert_eq!(v, str); + }); + c.bench_function(&format!("std String::from len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = String::from(black_box(&str))); + assert_eq!(v, str); + }); + } +} + +fn clone_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + let smolstr = SmolStr::new(&str); + + c.bench_function(&format!("SmolStr::clone len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = smolstr.clone()); + assert_eq!(v, str); + }); + c.bench_function(&format!("std String::clone len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.clone()); + assert_eq!(v, str); + }); + } +} + +fn eq_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + let smolstr = SmolStr::new(&str); + + c.bench_function(&format!("SmolStr::eq len={len}"), |b| { + let mut v = false; + b.iter(|| v = smolstr == black_box(&str)); + assert!(v); + }); + c.bench_function(&format!("std String::eq len={len}"), |b| { + let mut v = false; + b.iter(|| v = &str == black_box(&str)); + assert!(v); + }); + } +} + +fn to_lowercase_bench(c: &mut Criterion) { + const END_CHAR: char = 'İ'; + + for len in TEST_LENS { + // mostly ascii seq with some non-ascii at the end + let mut str = Alphanumeric.sample_string(&mut rand::rng(), len - END_CHAR.len_utf8()); + str.push(END_CHAR); + let str = str.as_str(); + + c.bench_function(&format!("SmolStr to_lowercase_smolstr len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_lowercase_smolstr()); + assert_eq!(v, str.to_lowercase()); + }); + c.bench_function(&format!("std to_lowercase len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_lowercase()); + assert_eq!(v, str.to_lowercase()); + }); + } +} + +fn to_ascii_lowercase_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + let str = str.as_str(); + + c.bench_function( + &format!("SmolStr to_ascii_lowercase_smolstr len={len}"), + |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_ascii_lowercase_smolstr()); + assert_eq!(v, str.to_ascii_lowercase()); + }, + ); + c.bench_function(&format!("std to_ascii_lowercase len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_ascii_lowercase()); + assert_eq!(v, str.to_ascii_lowercase()); + }); + } +} + +fn replace_bench(c: &mut Criterion) { + for len in TEST_LENS { + let s_dash_s = Alphanumeric.sample_string(&mut rand::rng(), len / 2) + + "-" + + &Alphanumeric.sample_string(&mut rand::rng(), len - 1 - len / 2); + let str = s_dash_s.as_str(); + + c.bench_function(&format!("SmolStr replace_smolstr len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.replace_smolstr("-", "_")); + assert_eq!(v, str.replace("-", "_")); + }); + c.bench_function(&format!("std replace len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.replace("-", "_")); + assert_eq!(v, str.replace("-", "_")); + }); + } +} + +criterion_group!( + benches, + format_bench, + from_str_bench, + clone_bench, + eq_bench, + to_lowercase_bench, + to_ascii_lowercase_bench, + replace_bench, +); +criterion_main!(benches); From 2cb99fb4186c58237c7bff1528eae623a1119146 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 00:30:49 +0100 Subject: [PATCH 122/132] Only bench smol_str stuff --- lib/smol_str/benches/bench.rs | 49 ++++------------------------------- 1 file changed, 5 insertions(+), 44 deletions(-) diff --git a/lib/smol_str/benches/bench.rs b/lib/smol_str/benches/bench.rs index fa4c58832d..2643b02557 100644 --- a/lib/smol_str/benches/bench.rs +++ b/lib/smol_str/benches/bench.rs @@ -1,4 +1,3 @@ -//! SmolStr vs String benchmarks. use criterion::{criterion_group, criterion_main, Criterion}; use rand::distr::{Alphanumeric, SampleString}; use smol_str::{format_smolstr, SmolStr, StrExt, ToSmolStr}; @@ -15,16 +14,11 @@ fn format_bench(c: &mut Criterion) { let str_len = len.checked_sub(n.to_smolstr().len()).unwrap(); let str = Alphanumeric.sample_string(&mut rand::rng(), str_len); - c.bench_function(&format!("SmolStr format_smolstr! len={len}"), |b| { + c.bench_function(&format!("format_smolstr! len={len}"), |b| { let mut v = <_>::default(); b.iter(|| v = format_smolstr!("{str}-{n}")); assert_eq!(v, format!("{str}-{n}")); }); - c.bench_function(&format!("std format! len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = format!("{str}-{n}")); - assert_eq!(v, format!("{str}-{n}")); - }); } } @@ -37,11 +31,6 @@ fn from_str_bench(c: &mut Criterion) { b.iter(|| v = SmolStr::from(black_box(&str))); assert_eq!(v, str); }); - c.bench_function(&format!("std String::from len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = String::from(black_box(&str))); - assert_eq!(v, str); - }); } } @@ -55,11 +44,6 @@ fn clone_bench(c: &mut Criterion) { b.iter(|| v = smolstr.clone()); assert_eq!(v, str); }); - c.bench_function(&format!("std String::clone len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = str.clone()); - assert_eq!(v, str); - }); } } @@ -73,11 +57,6 @@ fn eq_bench(c: &mut Criterion) { b.iter(|| v = smolstr == black_box(&str)); assert!(v); }); - c.bench_function(&format!("std String::eq len={len}"), |b| { - let mut v = false; - b.iter(|| v = &str == black_box(&str)); - assert!(v); - }); } } @@ -90,16 +69,11 @@ fn to_lowercase_bench(c: &mut Criterion) { str.push(END_CHAR); let str = str.as_str(); - c.bench_function(&format!("SmolStr to_lowercase_smolstr len={len}"), |b| { + c.bench_function(&format!("to_lowercase_smolstr len={len}"), |b| { let mut v = <_>::default(); b.iter(|| v = str.to_lowercase_smolstr()); assert_eq!(v, str.to_lowercase()); }); - c.bench_function(&format!("std to_lowercase len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = str.to_lowercase()); - assert_eq!(v, str.to_lowercase()); - }); } } @@ -108,17 +82,9 @@ fn to_ascii_lowercase_bench(c: &mut Criterion) { let str = Alphanumeric.sample_string(&mut rand::rng(), len); let str = str.as_str(); - c.bench_function( - &format!("SmolStr to_ascii_lowercase_smolstr len={len}"), - |b| { - let mut v = <_>::default(); - b.iter(|| v = str.to_ascii_lowercase_smolstr()); - assert_eq!(v, str.to_ascii_lowercase()); - }, - ); - c.bench_function(&format!("std to_ascii_lowercase len={len}"), |b| { + c.bench_function(&format!("to_ascii_lowercase_smolstr len={len}"), |b| { let mut v = <_>::default(); - b.iter(|| v = str.to_ascii_lowercase()); + b.iter(|| v = str.to_ascii_lowercase_smolstr()); assert_eq!(v, str.to_ascii_lowercase()); }); } @@ -131,16 +97,11 @@ fn replace_bench(c: &mut Criterion) { + &Alphanumeric.sample_string(&mut rand::rng(), len - 1 - len / 2); let str = s_dash_s.as_str(); - c.bench_function(&format!("SmolStr replace_smolstr len={len}"), |b| { + c.bench_function(&format!("replace_smolstr len={len}"), |b| { let mut v = <_>::default(); b.iter(|| v = str.replace_smolstr("-", "_")); assert_eq!(v, str.replace("-", "_")); }); - c.bench_function(&format!("std replace len={len}"), |b| { - let mut v = <_>::default(); - b.iter(|| v = str.replace("-", "_")); - assert_eq!(v, str.replace("-", "_")); - }); } } From eabb2482949ccd7e4021287f8562528f35500292 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 12:42:41 +0100 Subject: [PATCH 123/132] CI: Add TEST_BENCHES --- lib/smol_str/.github/ci.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/smol_str/.github/ci.rs b/lib/smol_str/.github/ci.rs index 21c8584fb9..c594e8973c 100644 --- a/lib/smol_str/.github/ci.rs +++ b/lib/smol_str/.github/ci.rs @@ -39,6 +39,11 @@ fn try_main() -> Result<()> { shell("cargo test --no-default-features --workspace")?; } + { + let _s = Section::new("TEST_BENCHES"); + shell("cargo test --benches --all-features")?; + } + let current_branch = shell_output("git branch --show-current")?; if ¤t_branch == "master" { let _s = Section::new("PUBLISH"); From bc69b025744a7e7de88a32ade30abee43519b863 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 01:23:48 +0100 Subject: [PATCH 124/132] Optimise `to_{lower,upper}case_smolstr` --- lib/smol_str/src/lib.rs | 104 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 6 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index ff25651f54..f2f021a7b5 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -233,8 +233,17 @@ impl iter::FromIterator for SmolStr { } } -fn from_char_iter(mut iter: impl Iterator) -> SmolStr { - let (min_size, _) = iter.size_hint(); +#[inline] +fn from_char_iter(iter: impl Iterator) -> SmolStr { + from_buf_and_chars([0; _], 0, iter) +} + +fn from_buf_and_chars( + mut buf: [u8; INLINE_CAP], + buf_len: usize, + mut iter: impl Iterator, +) -> SmolStr { + let min_size = iter.size_hint().0 + buf_len; if min_size > INLINE_CAP { let heap: String = iter.collect(); if heap.len() <= INLINE_CAP { @@ -243,8 +252,7 @@ fn from_char_iter(mut iter: impl Iterator) -> SmolStr { } return SmolStr(Repr::Heap(heap.into_boxed_str().into())); } - let mut len = 0; - let mut buf = [0u8; INLINE_CAP]; + let mut len = buf_len; while let Some(ch) = iter.next() { let size = ch.len_utf8(); if size + len > INLINE_CAP { @@ -634,12 +642,32 @@ pub trait StrExt: private::Sealed { impl StrExt for str { #[inline] fn to_lowercase_smolstr(&self) -> SmolStr { - from_char_iter(self.chars().flat_map(|c| c.to_lowercase())) + let len = self.len(); + if len <= INLINE_CAP { + let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase); + from_buf_and_chars( + buf, + len - rest.len(), + rest.chars().flat_map(|c| c.to_lowercase()), + ) + } else { + self.to_lowercase().into() + } } #[inline] fn to_uppercase_smolstr(&self) -> SmolStr { - from_char_iter(self.chars().flat_map(|c| c.to_uppercase())) + let len = self.len(); + if len <= INLINE_CAP { + let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase); + from_buf_and_chars( + buf, + len - rest.len(), + rest.chars().flat_map(|c| c.to_uppercase()), + ) + } else { + self.to_uppercase().into() + } } #[inline] @@ -699,6 +727,70 @@ impl StrExt for str { } } +/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23. +#[inline] +fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) { + // Process the input in chunks of 16 bytes to enable auto-vectorization. + // Previously the chunk size depended on the size of `usize`, + // but on 32-bit platforms with sse or neon is also the better choice. + // The only downside on other platforms would be a bit more loop-unrolling. + const N: usize = 16; + + debug_assert!(s.len() <= INLINE_CAP, "only for inline-able strings"); + + let mut slice = s.as_bytes(); + let mut out = [0u8; INLINE_CAP]; + let mut out_slice = &mut out[..slice.len()]; + let mut is_ascii = [false; N]; + + while slice.len() >= N { + // SAFETY: checked in loop condition + let chunk = unsafe { slice.get_unchecked(..N) }; + // SAFETY: out_slice has at least same length as input slice and gets sliced with the same offsets + let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) }; + + for j in 0..N { + is_ascii[j] = chunk[j] <= 127; + } + + // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk + // size gives the best result, specifically a pmovmsk instruction on x86. + // See https://github.com/llvm/llvm-project/issues/96395 for why llvm currently does not + // currently recognize other similar idioms. + if is_ascii.iter().map(|x| *x as u8).sum::() as usize != N { + break; + } + + for j in 0..N { + out_chunk[j] = convert(&chunk[j]); + } + + slice = unsafe { slice.get_unchecked(N..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(N..) }; + } + + // handle the remainder as individual bytes + while !slice.is_empty() { + let byte = slice[0]; + if byte > 127 { + break; + } + // SAFETY: out_slice has at least same length as input slice + unsafe { + *out_slice.get_unchecked_mut(0) = convert(&byte); + } + slice = unsafe { slice.get_unchecked(1..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(1..) }; + } + + unsafe { + // SAFETY: we know this is a valid char boundary + // since we only skipped over leading ascii bytes + let rest = core::str::from_utf8_unchecked(slice); + (out, rest) + } +} + impl ToSmolStr for T where T: fmt::Display + ?Sized, From c9cd0324a7d127b6ee737de3c3bd34de10928070 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Sun, 14 Sep 2025 01:36:41 +0100 Subject: [PATCH 125/132] Update changelog --- lib/smol_str/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index 190d6e8309..2577011ffe 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -4,6 +4,7 @@ - Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` ~2x speedup inline, ~4-22x for heap. +- Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. ## 0.3.2 - 2024-10-23 From b4ce6529fad5afcac9613e9b7415e2fb1940490e Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 09:39:19 +0100 Subject: [PATCH 126/132] Add test from_buf_and_chars_size_hinted_heap & fix --- lib/smol_str/src/lib.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index f2f021a7b5..5ef6260f56 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -245,7 +245,11 @@ fn from_buf_and_chars( ) -> SmolStr { let min_size = iter.size_hint().0 + buf_len; if min_size > INLINE_CAP { - let heap: String = iter.collect(); + let heap: String = core::str::from_utf8(&buf[..buf_len]) + .unwrap() + .chars() + .chain(iter) + .collect(); if heap.len() <= INLINE_CAP { // size hint lied return SmolStr::new_inline(&heap); @@ -940,3 +944,14 @@ impl<'a> arbitrary::Arbitrary<'a> for SmolStr { mod borsh; #[cfg(feature = "serde")] mod serde; + +#[test] +fn from_buf_and_chars_size_hinted_heap() { + let str = from_buf_and_chars( + *b"abcdefghijklmnopqr00000", + 18, + "_0x1x2x3x4x5x6x7x8x9x10x11x12x13".chars(), + ); + + assert_eq!(str, "abcdefghijklmnopqr_0x1x2x3x4x5x6x7x8x9x10x11x12x13"); +} From a2b3d25dbf215eb3f7236a93d4cab0bd51d3ab63 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 20:26:43 +0100 Subject: [PATCH 127/132] Optimise replacen_smolstr for single ascii replace --- lib/smol_str/CHANGELOG.md | 2 ++ lib/smol_str/src/lib.rs | 35 +++++++++++++++++++++++++++++++++++ lib/smol_str/tests/test.rs | 7 +++++++ 3 files changed, 44 insertions(+) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index 2577011ffe..c0193f6fcb 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -5,6 +5,8 @@ - Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` ~2x speedup inline, ~4-22x for heap. - Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. +- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace. + ~3x speedup inline, ~1.8x for heap (len=50). ## 0.3.2 - 2024-10-23 diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 5ef6260f56..d55ba20522 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -715,6 +715,13 @@ impl StrExt for str { #[inline] fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr { + // Fast path for replacing a single ASCII character with another inline. + if let [from_u8] = from.as_bytes() { + if let [to_u8] = to.as_bytes() { + return replacen_1_ascii(self, *from_u8, *to_u8, count); + } + } + let mut result = SmolStrBuilder::new(); let mut last_end = 0; for (start, part) in self.match_indices(from).take(count) { @@ -731,6 +738,34 @@ impl StrExt for str { } } +#[inline] +fn replacen_1_ascii(src: &str, from: u8, to: u8, count: usize) -> SmolStr { + let mut replaced = 0; + let mut ascii_replace = |b: &u8| { + if *b == from && replaced != count { + replaced += 1; + to + } else { + *b + } + }; + if src.len() <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + for (idx, b) in src.as_bytes().iter().enumerate() { + buf[idx] = ascii_replace(b); + } + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) }, + buf, + }) + } else { + let out = src.as_bytes().iter().map(ascii_replace).collect(); + // SAFETY: We replaced ascii with ascii on valid utf8 strings. + unsafe { String::from_utf8_unchecked(out).into() } + } +} + /// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23. #[inline] fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) { diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs index 0070b3a5ec..8f7d9ec39a 100644 --- a/lib/smol_str/tests/test.rs +++ b/lib/smol_str/tests/test.rs @@ -389,6 +389,13 @@ mod test_str_ext { assert_eq!(result, "foo_dor_baz"); assert!(!result.is_heap_allocated()); } + + #[test] + fn replacen_1_ascii() { + let result = "foo_bar_baz".replacen_smolstr("o", "u", 1); + assert_eq!(result, "fuo_bar_baz"); + assert!(!result.is_heap_allocated()); + } } #[cfg(feature = "borsh")] From 25cb3f6c560bfca93bd9c32315dcfc17f41b0763 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 20:46:47 +0100 Subject: [PATCH 128/132] Optimise replacen 1-ascii when count >= len --- lib/smol_str/CHANGELOG.md | 2 +- lib/smol_str/src/lib.rs | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index c0193f6fcb..1dff469b8a 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -6,7 +6,7 @@ ~2x speedup inline, ~4-22x for heap. - Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. - Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace. - ~3x speedup inline, ~1.8x for heap (len=50). + ~3.7x speedup inline, ~2.4x for heap. ## 0.3.2 - 2024-10-23 diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index d55ba20522..3a6442eaab 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -714,11 +714,21 @@ impl StrExt for str { } #[inline] - fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr { + fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr { // Fast path for replacing a single ASCII character with another inline. if let [from_u8] = from.as_bytes() { if let [to_u8] = to.as_bytes() { - return replacen_1_ascii(self, *from_u8, *to_u8, count); + return match self.len() <= count { + true => replacen_1_ascii(self, |b| if b == *from_u8 { *to_u8 } else { b }), + _ => replacen_1_ascii(self, |b| { + if b == *from_u8 && count != 0 { + count -= 1; + *to_u8 + } else { + b + } + }), + }; } } @@ -739,20 +749,11 @@ impl StrExt for str { } #[inline] -fn replacen_1_ascii(src: &str, from: u8, to: u8, count: usize) -> SmolStr { - let mut replaced = 0; - let mut ascii_replace = |b: &u8| { - if *b == from && replaced != count { - replaced += 1; - to - } else { - *b - } - }; +fn replacen_1_ascii(src: &str, mut map: impl FnMut(u8) -> u8) -> SmolStr { if src.len() <= INLINE_CAP { let mut buf = [0u8; INLINE_CAP]; for (idx, b) in src.as_bytes().iter().enumerate() { - buf[idx] = ascii_replace(b); + buf[idx] = map(*b); } SmolStr(Repr::Inline { // SAFETY: `len` is in bounds @@ -760,7 +761,7 @@ fn replacen_1_ascii(src: &str, from: u8, to: u8, count: usize) -> SmolStr { buf, }) } else { - let out = src.as_bytes().iter().map(ascii_replace).collect(); + let out = src.as_bytes().iter().map(|b| map(*b)).collect(); // SAFETY: We replaced ascii with ascii on valid utf8 strings. unsafe { String::from_utf8_unchecked(out).into() } } From 6ca3f3812a1afa61032266510c9ad7f00c6e6814 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 21:22:01 +0100 Subject: [PATCH 129/132] Mark replacen_1_ascii as unsafe --- lib/smol_str/src/lib.rs | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs index 3a6442eaab..ded07c61c6 100644 --- a/lib/smol_str/src/lib.rs +++ b/lib/smol_str/src/lib.rs @@ -718,16 +718,20 @@ impl StrExt for str { // Fast path for replacing a single ASCII character with another inline. if let [from_u8] = from.as_bytes() { if let [to_u8] = to.as_bytes() { - return match self.len() <= count { - true => replacen_1_ascii(self, |b| if b == *from_u8 { *to_u8 } else { b }), - _ => replacen_1_ascii(self, |b| { - if b == *from_u8 && count != 0 { - count -= 1; - *to_u8 - } else { - b - } - }), + return if self.len() <= count { + // SAFETY: `from_u8` & `to_u8` are ascii + unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) } + } else { + unsafe { + replacen_1_ascii(self, |b| { + if b == from_u8 && count != 0 { + count -= 1; + *to_u8 + } else { + *b + } + }) + } }; } } @@ -748,12 +752,13 @@ impl StrExt for str { } } +/// SAFETY: `map` fn must only replace ascii with ascii or return unchanged bytes. #[inline] -fn replacen_1_ascii(src: &str, mut map: impl FnMut(u8) -> u8) -> SmolStr { +unsafe fn replacen_1_ascii(src: &str, mut map: impl FnMut(&u8) -> u8) -> SmolStr { if src.len() <= INLINE_CAP { let mut buf = [0u8; INLINE_CAP]; for (idx, b) in src.as_bytes().iter().enumerate() { - buf[idx] = map(*b); + buf[idx] = map(b); } SmolStr(Repr::Inline { // SAFETY: `len` is in bounds @@ -761,7 +766,7 @@ fn replacen_1_ascii(src: &str, mut map: impl FnMut(u8) -> u8) -> SmolStr { buf, }) } else { - let out = src.as_bytes().iter().map(|b| map(*b)).collect(); + let out = src.as_bytes().iter().map(map).collect(); // SAFETY: We replaced ascii with ascii on valid utf8 strings. unsafe { String::from_utf8_unchecked(out).into() } } From b1acd45fce7b5ae8ae19b275ee3031ee37b8bde8 Mon Sep 17 00:00:00 2001 From: Alex Butler Date: Mon, 15 Sep 2025 21:30:34 +0100 Subject: [PATCH 130/132] Update changelog --- lib/smol_str/CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index 1dff469b8a..c46000eb73 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -5,8 +5,8 @@ - Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` ~2x speedup inline, ~4-22x for heap. - Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. -- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace. - ~3.7x speedup inline, ~2.4x for heap. +- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace, + ~3x speedup inline & heap. ## 0.3.2 - 2024-10-23 From 1af327e78a58cbef5b7c74a1698b593dc4c8bbde Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 23 Oct 2025 13:15:01 +0200 Subject: [PATCH 131/132] Publish 0.3.3 --- lib/smol_str/CHANGELOG.md | 6 ++++-- lib/smol_str/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index c46000eb73..bd29acc6a6 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -1,8 +1,10 @@ # Changelog -## Unreleased +## Unreleased -- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` +## 0.3.3 - 2025-10-23 + +- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` ~2x speedup inline, ~4-22x for heap. - Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. - Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace, diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index e6f10a2715..277bb0b98c 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "smol_str" -version = "0.3.2" +version = "0.3.3" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" From 2fcccbbe293975e8ff694d77ea104616ffb8654d Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 23 Oct 2025 17:10:05 +0200 Subject: [PATCH 132/132] Publish 0.3.4 --- lib/smol_str/CHANGELOG.md | 4 ++++ lib/smol_str/Cargo.toml | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md index bd29acc6a6..fb65d88ad1 100644 --- a/lib/smol_str/CHANGELOG.md +++ b/lib/smol_str/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +## 0.3.4 - 2025-10-23 + +- Added `rust-version` field to `Cargo.toml` + ## 0.3.3 - 2025-10-23 - Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml index 277bb0b98c..4752a84ed4 100644 --- a/lib/smol_str/Cargo.toml +++ b/lib/smol_str/Cargo.toml @@ -1,11 +1,12 @@ [package] name = "smol_str" -version = "0.3.3" +version = "0.3.4" description = "small-string optimized string type with O(1) clone" license = "MIT OR Apache-2.0" repository = "https://github.com/rust-analyzer/smol_str" authors = ["Aleksey Kladov ", "Lukas Wirth "] edition = "2021" +rust-version = "1.89" [package.metadata.docs.rs] rustdoc-args = ["--cfg", "docsrs"]