diff --git a/lib/smol_str/.github/ci.rs b/lib/smol_str/.github/ci.rs new file mode 100644 index 0000000000..c594e8973c --- /dev/null +++ b/lib/smol_str/.github/ci.rs @@ -0,0 +1,127 @@ +use std::{ + env, fs, + process::{self, Command, ExitStatus, Stdio}, + time::Instant, +}; + +type Error = Box; +type Result = std::result::Result; + +fn main() { + if let Err(err) = try_main() { + eprintln!("{}", err); + process::exit(1); + } +} + +fn try_main() -> Result<()> { + let cwd = env::current_dir()?; + let cargo_toml = cwd.join("Cargo.toml"); + assert!( + cargo_toml.exists(), + "Cargo.toml not found, cwd: {}", + cwd.display() + ); + + { + let _s = Section::new("BUILD_NO_DEFAULT_FEATURES"); + shell("cargo test --all-features --workspace --no-run --no-default-features")?; + } + + { + let _s = Section::new("BUILD"); + shell("cargo test --all-features --workspace --no-run")?; + } + + { + let _s = Section::new("TEST"); + shell("cargo test --all-features --workspace")?; + shell("cargo test --no-default-features --workspace")?; + } + + { + let _s = Section::new("TEST_BENCHES"); + shell("cargo test --benches --all-features")?; + } + + let current_branch = shell_output("git branch --show-current")?; + if ¤t_branch == "master" { + let _s = Section::new("PUBLISH"); + let manifest = fs::read_to_string(&cargo_toml)?; + let version = get_field(&manifest, "version")?; + let tag = format!("v{}", version); + let tags = shell_output("git tag --list")?; + + if !tags.contains(&tag) { + let token = env::var("CRATES_IO_TOKEN").unwrap(); + shell(&format!("git tag v{}", version))?; + shell(&format!("cargo publish --token {}", token))?; + shell("git push --tags")?; + } + } + Ok(()) +} + +fn get_field<'a>(text: &'a str, name: &str) -> Result<&'a str> { + for line in text.lines() { + let words = line.split_ascii_whitespace().collect::>(); + match words.as_slice() { + [n, "=", v, ..] if n.trim() == name => { + assert!(v.starts_with('"') && v.ends_with('"')); + return Ok(&v[1..v.len() - 1]); + } + _ => (), + } + } + Err(format!("can't find `{}` in\n----\n{}\n----\n", name, text))? +} + +fn shell(cmd: &str) -> Result<()> { + let status = command(cmd).status()?; + check_status(status) +} + +fn shell_output(cmd: &str) -> Result { + let output = command(cmd).stderr(Stdio::inherit()).output()?; + check_status(output.status)?; + let res = String::from_utf8(output.stdout)?; + let res = res.trim().to_string(); + println!("{}", res); + Ok(res) +} + +fn command(cmd: &str) -> Command { + eprintln!("> {}", cmd); + let words = cmd.split_ascii_whitespace().collect::>(); + let (cmd, args) = words.split_first().unwrap(); + let mut res = Command::new(cmd); + res.args(args); + res +} + +fn check_status(status: ExitStatus) -> Result<()> { + if !status.success() { + Err(format!("$status: {}", status))?; + } + Ok(()) +} + +struct Section { + name: &'static str, + start: Instant, +} + +impl Section { + fn new(name: &'static str) -> Section { + println!("::group::{}", name); + let start = Instant::now(); + Section { name, start } + } +} + +impl Drop for Section { + fn drop(&mut self) { + eprintln!("{}: {:.2?}", self.name, self.start.elapsed()); + println!("::endgroup::"); + } +} diff --git a/lib/smol_str/.github/workflows/ci.yaml b/lib/smol_str/.github/workflows/ci.yaml new file mode 100644 index 0000000000..1c2e347374 --- /dev/null +++ b/lib/smol_str/.github/workflows/ci.yaml @@ -0,0 +1,36 @@ +name: CI +on: + pull_request: + push: + branches: + - master + - staging + - trying + +env: + CARGO_INCREMENTAL: 0 + CARGO_NET_RETRY: 10 + CI: 1 + RUST_BACKTRACE: short + RUSTFLAGS: -D warnings + RUSTUP_MAX_RETRIES: 10 + +jobs: + rust: + name: Rust + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Install Rust toolchain + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + cache: false + + - run: rustc ./.github/ci.rs && ./ci + env: + CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} diff --git a/lib/smol_str/.gitignore b/lib/smol_str/.gitignore new file mode 100644 index 0000000000..0c8227b253 --- /dev/null +++ b/lib/smol_str/.gitignore @@ -0,0 +1,4 @@ +/target +/ci +/.vscode +Cargo.lock diff --git a/lib/smol_str/CHANGELOG.md b/lib/smol_str/CHANGELOG.md new file mode 100644 index 0000000000..fb65d88ad1 --- /dev/null +++ b/lib/smol_str/CHANGELOG.md @@ -0,0 +1,38 @@ +# Changelog + +## Unreleased + +## 0.3.4 - 2025-10-23 + +- Added `rust-version` field to `Cargo.toml` + +## 0.3.3 - 2025-10-23 + +- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr` + ~2x speedup inline, ~4-22x for heap. +- Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap. +- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace, + ~3x speedup inline & heap. + +## 0.3.2 - 2024-10-23 + +- Fix `SmolStrBuilder::push` incorrectly padding null bytes when spilling onto the heap on a + multibyte character push + +## 0.3.1 - 2024-09-04 + +- Fix `SmolStrBuilder` leaking implementation details + +## 0.3.0 - 2024-09-04 + +- Remove deprecated `SmolStr::new_inline_from_ascii` function +- Remove `SmolStr::to_string` in favor of `ToString::to_string` +- Add `impl AsRef<[u8]> for SmolStr` impl +- Add `impl AsRef for SmolStr` impl +- Add `impl AsRef for SmolStr` impl +- Add `SmolStrBuilder` + +## 0.2.2 - 2024-05-14 + +- Add `StrExt` trait providing `to_lowercase_smolstr`, `replace_smolstr` and similar +- Add `PartialEq` optimization for `ptr_eq`-able representations diff --git a/lib/smol_str/Cargo.toml b/lib/smol_str/Cargo.toml new file mode 100644 index 0000000000..4752a84ed4 --- /dev/null +++ b/lib/smol_str/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "smol_str" +version = "0.3.4" +description = "small-string optimized string type with O(1) clone" +license = "MIT OR Apache-2.0" +repository = "https://github.com/rust-analyzer/smol_str" +authors = ["Aleksey Kladov ", "Lukas Wirth "] +edition = "2021" +rust-version = "1.89" + +[package.metadata.docs.rs] +rustdoc-args = ["--cfg", "docsrs"] +all-features = true + +[dependencies] +serde_core = { version = "1.0.220", optional = true, default-features = false } +borsh = { version = "1.4.0", optional = true, default-features = false } +arbitrary = { version = "1.3", optional = true } + +[dev-dependencies] +proptest = "1.5" +serde_json = "1.0" +serde = { version = "1.0", features = ["derive"] } +criterion = "0.7" +rand = "0.9.2" + +[features] +default = ["std"] +std = ["serde_core?/std", "borsh?/std"] +serde = ["dep:serde_core"] + +[[bench]] +name = "bench" +harness = false + +[profile.bench] +lto = "fat" diff --git a/lib/smol_str/LICENSE-APACHE b/lib/smol_str/LICENSE-APACHE new file mode 100644 index 0000000000..16fe87b06e --- /dev/null +++ b/lib/smol_str/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/lib/smol_str/LICENSE-MIT b/lib/smol_str/LICENSE-MIT new file mode 100644 index 0000000000..31aa79387f --- /dev/null +++ b/lib/smol_str/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/lib/smol_str/README.md b/lib/smol_str/README.md new file mode 100644 index 0000000000..56296fb53f --- /dev/null +++ b/lib/smol_str/README.md @@ -0,0 +1,35 @@ +# smol_str + +[![CI](https://github.com/rust-analyzer/smol_str/workflows/CI/badge.svg)](https://github.com/rust-analyzer/smol_str/actions?query=branch%3Amaster+workflow%3ACI) +[![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str) +[![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/) + + +A `SmolStr` is a string type that has the following properties: + +* `size_of::() == 24` (therefore `== size_of::()` on 64 bit platforms) +* `Clone` is `O(1)` +* Strings are stack-allocated if they are: + * Up to 23 bytes long + * Longer than 23 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist + solely of consecutive newlines, followed by consecutive spaces +* If a string does not satisfy the aforementioned conditions, it is heap-allocated +* Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation + +Unlike `String`, however, `SmolStr` is immutable. The primary use case for +`SmolStr` is a good enough default storage for tokens of typical programming +languages. Strings consisting of a series of newlines, followed by a series of +whitespace are a typical pattern in computer programs because of indentation. +Note that a specialized interner might be a better solution for some use cases. + +## Benchmarks +Run criterion benches with +```sh +cargo bench --bench \* -- --quick +``` + +## MSRV Policy + +Minimal Supported Rust Version: latest stable. + +Bumping MSRV is not considered a semver-breaking change. diff --git a/lib/smol_str/benches/bench.rs b/lib/smol_str/benches/bench.rs new file mode 100644 index 0000000000..2643b02557 --- /dev/null +++ b/lib/smol_str/benches/bench.rs @@ -0,0 +1,118 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use rand::distr::{Alphanumeric, SampleString}; +use smol_str::{format_smolstr, SmolStr, StrExt, ToSmolStr}; +use std::hint::black_box; + +/// 12: small (inline) +/// 50: medium (heap) +/// 1000: large (heap) +const TEST_LENS: [usize; 3] = [12, 50, 1000]; + +fn format_bench(c: &mut Criterion) { + for len in TEST_LENS { + let n = rand::random_range(10000..99999); + let str_len = len.checked_sub(n.to_smolstr().len()).unwrap(); + let str = Alphanumeric.sample_string(&mut rand::rng(), str_len); + + c.bench_function(&format!("format_smolstr! len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = format_smolstr!("{str}-{n}")); + assert_eq!(v, format!("{str}-{n}")); + }); + } +} + +fn from_str_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + + c.bench_function(&format!("SmolStr::from len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = SmolStr::from(black_box(&str))); + assert_eq!(v, str); + }); + } +} + +fn clone_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + let smolstr = SmolStr::new(&str); + + c.bench_function(&format!("SmolStr::clone len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = smolstr.clone()); + assert_eq!(v, str); + }); + } +} + +fn eq_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + let smolstr = SmolStr::new(&str); + + c.bench_function(&format!("SmolStr::eq len={len}"), |b| { + let mut v = false; + b.iter(|| v = smolstr == black_box(&str)); + assert!(v); + }); + } +} + +fn to_lowercase_bench(c: &mut Criterion) { + const END_CHAR: char = 'İ'; + + for len in TEST_LENS { + // mostly ascii seq with some non-ascii at the end + let mut str = Alphanumeric.sample_string(&mut rand::rng(), len - END_CHAR.len_utf8()); + str.push(END_CHAR); + let str = str.as_str(); + + c.bench_function(&format!("to_lowercase_smolstr len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_lowercase_smolstr()); + assert_eq!(v, str.to_lowercase()); + }); + } +} + +fn to_ascii_lowercase_bench(c: &mut Criterion) { + for len in TEST_LENS { + let str = Alphanumeric.sample_string(&mut rand::rng(), len); + let str = str.as_str(); + + c.bench_function(&format!("to_ascii_lowercase_smolstr len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.to_ascii_lowercase_smolstr()); + assert_eq!(v, str.to_ascii_lowercase()); + }); + } +} + +fn replace_bench(c: &mut Criterion) { + for len in TEST_LENS { + let s_dash_s = Alphanumeric.sample_string(&mut rand::rng(), len / 2) + + "-" + + &Alphanumeric.sample_string(&mut rand::rng(), len - 1 - len / 2); + let str = s_dash_s.as_str(); + + c.bench_function(&format!("replace_smolstr len={len}"), |b| { + let mut v = <_>::default(); + b.iter(|| v = str.replace_smolstr("-", "_")); + assert_eq!(v, str.replace("-", "_")); + }); + } +} + +criterion_group!( + benches, + format_bench, + from_str_bench, + clone_bench, + eq_bench, + to_lowercase_bench, + to_ascii_lowercase_bench, + replace_bench, +); +criterion_main!(benches); diff --git a/lib/smol_str/bors.toml b/lib/smol_str/bors.toml new file mode 100644 index 0000000000..b92b99ac30 --- /dev/null +++ b/lib/smol_str/bors.toml @@ -0,0 +1,2 @@ +status = [ "Rust" ] +delete_merged_branches = true diff --git a/lib/smol_str/src/borsh.rs b/lib/smol_str/src/borsh.rs new file mode 100644 index 0000000000..ebb20d71a0 --- /dev/null +++ b/lib/smol_str/src/borsh.rs @@ -0,0 +1,40 @@ +use crate::{Repr, SmolStr, INLINE_CAP}; +use alloc::string::{String, ToString}; +use borsh::{ + io::{Error, ErrorKind, Read, Write}, + BorshDeserialize, BorshSerialize, +}; +use core::mem::transmute; + +impl BorshSerialize for SmolStr { + fn serialize(&self, writer: &mut W) -> borsh::io::Result<()> { + self.as_str().serialize(writer) + } +} + +impl BorshDeserialize for SmolStr { + #[inline] + fn deserialize_reader(reader: &mut R) -> borsh::io::Result { + let len = u32::deserialize_reader(reader)?; + if (len as usize) < INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + reader.read_exact(&mut buf[..len as usize])?; + _ = core::str::from_utf8(&buf[..len as usize]).map_err(|err| { + let msg = err.to_string(); + Error::new(ErrorKind::InvalidData, msg) + })?; + Ok(SmolStr(Repr::Inline { + len: unsafe { transmute::(len as u8) }, + buf, + })) + } else { + // u8::vec_from_reader always returns Some on success in current implementation + let vec = u8::vec_from_reader(len, reader)? + .ok_or_else(|| Error::other("u8::vec_from_reader unexpectedly returned None"))?; + Ok(SmolStr::from(String::from_utf8(vec).map_err(|err| { + let msg = err.to_string(); + Error::new(ErrorKind::InvalidData, msg) + })?)) + } + } +} diff --git a/lib/smol_str/src/lib.rs b/lib/smol_str/src/lib.rs new file mode 100644 index 0000000000..ded07c61c6 --- /dev/null +++ b/lib/smol_str/src/lib.rs @@ -0,0 +1,998 @@ +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +extern crate alloc; + +use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc}; +use core::{ + borrow::Borrow, + cmp::{self, Ordering}, + convert::Infallible, + fmt, hash, iter, mem, ops, + str::FromStr, +}; + +/// A `SmolStr` is a string type that has the following properties: +/// +/// * `size_of::() == 24` (therefor `== size_of::()` on 64 bit platforms) +/// * `Clone` is `O(1)` +/// * Strings are stack-allocated if they are: +/// * Up to 23 bytes long +/// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist +/// solely of consecutive newlines, followed by consecutive spaces +/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated +/// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation +/// +/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for +/// `SmolStr` is a good enough default storage for tokens of typical programming +/// languages. Strings consisting of a series of newlines, followed by a series of +/// whitespace are a typical pattern in computer programs because of indentation. +/// Note that a specialized interner might be a better solution for some use cases. +/// +/// `WS`: A string of 32 newlines followed by 128 spaces. +pub struct SmolStr(Repr); + +impl SmolStr { + /// Constructs an inline variant of `SmolStr`. + /// + /// This never allocates. + /// + /// # Panics + /// + /// Panics if `text.len() > 23`. + #[inline] + pub const fn new_inline(text: &str) -> SmolStr { + assert!(text.len() <= INLINE_CAP); // avoids bounds checks in loop + + let text = text.as_bytes(); + let mut buf = [0; INLINE_CAP]; + let mut i = 0; + while i < text.len() { + buf[i] = text[i]; + i += 1 + } + SmolStr(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we asserted it. + len: unsafe { InlineSize::transmute_from_u8(text.len() as u8) }, + buf, + }) + } + + /// Constructs a `SmolStr` from a statically allocated string. + /// + /// This never allocates. + #[inline(always)] + pub const fn new_static(text: &'static str) -> SmolStr { + // NOTE: this never uses the inline storage; if a canonical + // representation is needed, we could check for `len() < INLINE_CAP` + // and call `new_inline`, but this would mean an extra branch. + SmolStr(Repr::Static(text)) + } + + /// Constructs a `SmolStr` from a `str`, heap-allocating if necessary. + #[inline(always)] + pub fn new(text: impl AsRef) -> SmolStr { + SmolStr(Repr::new(text.as_ref())) + } + + /// Returns a `&str` slice of this `SmolStr`. + #[inline(always)] + pub fn as_str(&self) -> &str { + self.0.as_str() + } + + /// Returns the length of `self` in bytes. + #[inline(always)] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns `true` if `self` has a length of zero bytes. + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns `true` if `self` is heap-allocated. + #[inline(always)] + pub const fn is_heap_allocated(&self) -> bool { + matches!(self.0, Repr::Heap(..)) + } +} + +impl Clone for SmolStr { + #[inline] + fn clone(&self) -> Self { + if !self.is_heap_allocated() { + // SAFETY: We verified that the payload of `Repr` is a POD + return unsafe { core::ptr::read(self as *const SmolStr) }; + } + Self(self.0.clone()) + } +} + +impl Default for SmolStr { + #[inline(always)] + fn default() -> SmolStr { + SmolStr(Repr::Inline { + len: InlineSize::_V0, + buf: [0; INLINE_CAP], + }) + } +} + +impl ops::Deref for SmolStr { + type Target = str; + + #[inline(always)] + fn deref(&self) -> &str { + self.as_str() + } +} + +// region: PartialEq implementations + +impl Eq for SmolStr {} +impl PartialEq for SmolStr { + fn eq(&self, other: &SmolStr) -> bool { + self.0.ptr_eq(&other.0) || self.as_str() == other.as_str() + } +} + +impl PartialEq for SmolStr { + #[inline(always)] + fn eq(&self, other: &str) -> bool { + self.as_str() == other + } +} + +impl PartialEq for str { + #[inline(always)] + fn eq(&self, other: &SmolStr) -> bool { + other == self + } +} + +impl<'a> PartialEq<&'a str> for SmolStr { + #[inline(always)] + fn eq(&self, other: &&'a str) -> bool { + self == *other + } +} + +impl PartialEq for &str { + #[inline(always)] + fn eq(&self, other: &SmolStr) -> bool { + *self == other + } +} + +impl PartialEq for SmolStr { + #[inline(always)] + fn eq(&self, other: &String) -> bool { + self.as_str() == other + } +} + +impl PartialEq for String { + #[inline(always)] + fn eq(&self, other: &SmolStr) -> bool { + other == self + } +} + +impl<'a> PartialEq<&'a String> for SmolStr { + #[inline(always)] + fn eq(&self, other: &&'a String) -> bool { + self == *other + } +} + +impl PartialEq for &String { + #[inline(always)] + fn eq(&self, other: &SmolStr) -> bool { + *self == other + } +} +// endregion: PartialEq implementations + +impl Ord for SmolStr { + fn cmp(&self, other: &SmolStr) -> Ordering { + self.as_str().cmp(other.as_str()) + } +} + +impl PartialOrd for SmolStr { + fn partial_cmp(&self, other: &SmolStr) -> Option { + Some(self.cmp(other)) + } +} + +impl hash::Hash for SmolStr { + fn hash(&self, hasher: &mut H) { + self.as_str().hash(hasher); + } +} + +impl fmt::Debug for SmolStr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self.as_str(), f) + } +} + +impl fmt::Display for SmolStr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(self.as_str(), f) + } +} + +impl iter::FromIterator for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + from_char_iter(iter.into_iter()) + } +} + +#[inline] +fn from_char_iter(iter: impl Iterator) -> SmolStr { + from_buf_and_chars([0; _], 0, iter) +} + +fn from_buf_and_chars( + mut buf: [u8; INLINE_CAP], + buf_len: usize, + mut iter: impl Iterator, +) -> SmolStr { + let min_size = iter.size_hint().0 + buf_len; + if min_size > INLINE_CAP { + let heap: String = core::str::from_utf8(&buf[..buf_len]) + .unwrap() + .chars() + .chain(iter) + .collect(); + if heap.len() <= INLINE_CAP { + // size hint lied + return SmolStr::new_inline(&heap); + } + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + let mut len = buf_len; + while let Some(ch) = iter.next() { + let size = ch.len_utf8(); + if size + len > INLINE_CAP { + let (min_remaining, _) = iter.size_hint(); + let mut heap = String::with_capacity(size + len + min_remaining); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); + heap.push(ch); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + ch.encode_utf8(&mut buf[len..]); + len += size; + } + SmolStr(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) +} + +fn build_from_str_iter(mut iter: impl Iterator) -> SmolStr +where + T: AsRef, + String: iter::Extend, +{ + let mut len = 0; + let mut buf = [0u8; INLINE_CAP]; + while let Some(slice) = iter.next() { + let slice = slice.as_ref(); + let size = slice.len(); + if size + len > INLINE_CAP { + let mut heap = String::with_capacity(size + len); + heap.push_str(core::str::from_utf8(&buf[..len]).unwrap()); + heap.push_str(slice); + heap.extend(iter); + return SmolStr(Repr::Heap(heap.into_boxed_str().into())); + } + buf[len..][..size].copy_from_slice(slice.as_bytes()); + len += size; + } + SmolStr(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + // as we otherwise return early. + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) +} + +impl iter::FromIterator for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + build_from_str_iter(iter.into_iter()) + } +} + +impl<'a> iter::FromIterator<&'a String> for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + SmolStr::from_iter(iter.into_iter().map(|x| x.as_str())) + } +} + +impl<'a> iter::FromIterator<&'a str> for SmolStr { + fn from_iter>(iter: I) -> SmolStr { + build_from_str_iter(iter.into_iter()) + } +} + +impl AsRef for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl AsRef<[u8]> for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &[u8] { + self.as_str().as_bytes() + } +} + +#[cfg(feature = "std")] +impl AsRef for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &std::ffi::OsStr { + AsRef::::as_ref(self.as_str()) + } +} + +#[cfg(feature = "std")] +impl AsRef for SmolStr { + #[inline(always)] + fn as_ref(&self) -> &std::path::Path { + AsRef::::as_ref(self.as_str()) + } +} + +impl From<&str> for SmolStr { + #[inline] + fn from(s: &str) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<&mut str> for SmolStr { + #[inline] + fn from(s: &mut str) -> SmolStr { + SmolStr::new(s) + } +} + +impl From<&String> for SmolStr { + #[inline] + fn from(s: &String) -> SmolStr { + SmolStr::new(s) + } +} + +impl From for SmolStr { + #[inline(always)] + fn from(text: String) -> Self { + Self::new(text) + } +} + +impl From> for SmolStr { + #[inline] + fn from(s: Box) -> SmolStr { + SmolStr::new(s) + } +} + +impl From> for SmolStr { + #[inline] + fn from(s: Arc) -> SmolStr { + let repr = Repr::new_on_stack(s.as_ref()).unwrap_or(Repr::Heap(s)); + Self(repr) + } +} + +impl<'a> From> for SmolStr { + #[inline] + fn from(s: Cow<'a, str>) -> SmolStr { + SmolStr::new(s) + } +} + +impl From for Arc { + #[inline(always)] + fn from(text: SmolStr) -> Self { + match text.0 { + Repr::Heap(data) => data, + _ => text.as_str().into(), + } + } +} + +impl From for String { + #[inline(always)] + fn from(text: SmolStr) -> Self { + text.as_str().into() + } +} + +impl Borrow for SmolStr { + #[inline(always)] + fn borrow(&self) -> &str { + self.as_str() + } +} + +impl FromStr for SmolStr { + type Err = Infallible; + + #[inline] + fn from_str(s: &str) -> Result { + Ok(SmolStr::from(s)) + } +} + +const INLINE_CAP: usize = InlineSize::_V23 as usize; +const N_NEWLINES: usize = 32; +const N_SPACES: usize = 128; +const WS: &str = + "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n "; +const _: () = { + assert!(WS.len() == N_NEWLINES + N_SPACES); + assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n'); + assert!(WS.as_bytes()[N_NEWLINES] == b' '); +}; + +/// A [`u8`] with a bunch of niches. +#[derive(Clone, Copy, Debug, PartialEq)] +#[repr(u8)] +enum InlineSize { + _V0 = 0, + _V1, + _V2, + _V3, + _V4, + _V5, + _V6, + _V7, + _V8, + _V9, + _V10, + _V11, + _V12, + _V13, + _V14, + _V15, + _V16, + _V17, + _V18, + _V19, + _V20, + _V21, + _V22, + _V23, +} + +impl InlineSize { + /// SAFETY: `value` must be less than or equal to [`INLINE_CAP`] + #[inline(always)] + const unsafe fn transmute_from_u8(value: u8) -> Self { + debug_assert!(value <= InlineSize::_V23 as u8); + // SAFETY: The caller is responsible to uphold this invariant + unsafe { mem::transmute::(value) } + } +} + +#[derive(Clone, Debug)] +enum Repr { + Inline { + len: InlineSize, + buf: [u8; INLINE_CAP], + }, + Static(&'static str), + Heap(Arc), +} + +impl Repr { + /// This function tries to create a new Repr::Inline or Repr::Static + /// If it isn't possible, this function returns None + fn new_on_stack(text: T) -> Option + where + T: AsRef, + { + let text = text.as_ref(); + + let len = text.len(); + if len <= INLINE_CAP { + let mut buf = [0; INLINE_CAP]; + buf[..len].copy_from_slice(text.as_bytes()); + return Some(Repr::Inline { + // SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }); + } + + if len <= N_NEWLINES + N_SPACES { + let bytes = text.as_bytes(); + let possible_newline_count = cmp::min(len, N_NEWLINES); + let newlines = bytes[..possible_newline_count] + .iter() + .take_while(|&&b| b == b'\n') + .count(); + let possible_space_count = len - newlines; + if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') { + let spaces = possible_space_count; + let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces]; + return Some(Repr::Static(substring)); + } + } + None + } + + fn new(text: &str) -> Self { + Self::new_on_stack(text).unwrap_or_else(|| Repr::Heap(Arc::from(text))) + } + + #[inline(always)] + fn len(&self) -> usize { + match self { + Repr::Heap(data) => data.len(), + Repr::Static(data) => data.len(), + Repr::Inline { len, .. } => *len as usize, + } + } + + #[inline(always)] + fn is_empty(&self) -> bool { + match self { + Repr::Heap(data) => data.is_empty(), + Repr::Static(data) => data.is_empty(), + &Repr::Inline { len, .. } => len as u8 == 0, + } + } + + #[inline] + fn as_str(&self) -> &str { + match self { + Repr::Heap(data) => data, + Repr::Static(data) => data, + Repr::Inline { len, buf } => { + let len = *len as usize; + // SAFETY: len is guaranteed to be <= INLINE_CAP + let buf = unsafe { buf.get_unchecked(..len) }; + // SAFETY: buf is guaranteed to be valid utf8 for ..len bytes + unsafe { ::core::str::from_utf8_unchecked(buf) } + } + } + } + + fn ptr_eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0), + (Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0), + ( + Self::Inline { + len: l_len, + buf: l_buf, + }, + Self::Inline { + len: r_len, + buf: r_buf, + }, + ) => l_len == r_len && l_buf == r_buf, + _ => false, + } + } +} + +/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating. +/// +/// Almost identical to [`ToString`], but converts to `SmolStr` instead. +pub trait ToSmolStr { + fn to_smolstr(&self) -> SmolStr; +} + +/// [`str`] methods producing [`SmolStr`]s. +pub trait StrExt: private::Sealed { + /// Returns the lowercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_lowercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn to_lowercase_smolstr(&self) -> SmolStr; + + /// Returns the uppercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_uppercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn to_uppercase_smolstr(&self) -> SmolStr; + + /// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_ascii_lowercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn to_ascii_lowercase_smolstr(&self) -> SmolStr; + + /// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::to_ascii_uppercase`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn to_ascii_uppercase_smolstr(&self) -> SmolStr; + + /// Replaces all matches of a &str with another &str returning a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::replace`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr; + + /// Replaces first N matches of a &str with another &str returning a new [`SmolStr`], + /// potentially without allocating. + /// + /// See [`str::replacen`]. + #[must_use = "this returns a new SmolStr without modifying the original"] + fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr; +} + +impl StrExt for str { + #[inline] + fn to_lowercase_smolstr(&self) -> SmolStr { + let len = self.len(); + if len <= INLINE_CAP { + let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase); + from_buf_and_chars( + buf, + len - rest.len(), + rest.chars().flat_map(|c| c.to_lowercase()), + ) + } else { + self.to_lowercase().into() + } + } + + #[inline] + fn to_uppercase_smolstr(&self) -> SmolStr { + let len = self.len(); + if len <= INLINE_CAP { + let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase); + from_buf_and_chars( + buf, + len - rest.len(), + rest.chars().flat_map(|c| c.to_uppercase()), + ) + } else { + self.to_uppercase().into() + } + } + + #[inline] + fn to_ascii_lowercase_smolstr(&self) -> SmolStr { + let len = self.len(); + if len <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + buf[..len].copy_from_slice(self.as_bytes()); + buf[..len].make_ascii_lowercase(); + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) + } else { + self.to_ascii_lowercase().into() + } + } + + #[inline] + fn to_ascii_uppercase_smolstr(&self) -> SmolStr { + let len = self.len(); + if len <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + buf[..len].copy_from_slice(self.as_bytes()); + buf[..len].make_ascii_uppercase(); + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + }) + } else { + self.to_ascii_uppercase().into() + } + } + + #[inline] + fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr { + self.replacen_smolstr(from, to, usize::MAX) + } + + #[inline] + fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr { + // Fast path for replacing a single ASCII character with another inline. + if let [from_u8] = from.as_bytes() { + if let [to_u8] = to.as_bytes() { + return if self.len() <= count { + // SAFETY: `from_u8` & `to_u8` are ascii + unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) } + } else { + unsafe { + replacen_1_ascii(self, |b| { + if b == from_u8 && count != 0 { + count -= 1; + *to_u8 + } else { + *b + } + }) + } + }; + } + } + + let mut result = SmolStrBuilder::new(); + let mut last_end = 0; + for (start, part) in self.match_indices(from).take(count) { + // SAFETY: `start` is guaranteed to be within the bounds of `self` as per + // `match_indices` and last_end is always less than or equal to `start` + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + // SAFETY: `self.len()` is guaranteed to be within the bounds of `self` and last_end is + // always less than or equal to `self.len()` + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + SmolStr::from(result) + } +} + +/// SAFETY: `map` fn must only replace ascii with ascii or return unchanged bytes. +#[inline] +unsafe fn replacen_1_ascii(src: &str, mut map: impl FnMut(&u8) -> u8) -> SmolStr { + if src.len() <= INLINE_CAP { + let mut buf = [0u8; INLINE_CAP]; + for (idx, b) in src.as_bytes().iter().enumerate() { + buf[idx] = map(b); + } + SmolStr(Repr::Inline { + // SAFETY: `len` is in bounds + len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) }, + buf, + }) + } else { + let out = src.as_bytes().iter().map(map).collect(); + // SAFETY: We replaced ascii with ascii on valid utf8 strings. + unsafe { String::from_utf8_unchecked(out).into() } + } +} + +/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23. +#[inline] +fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) { + // Process the input in chunks of 16 bytes to enable auto-vectorization. + // Previously the chunk size depended on the size of `usize`, + // but on 32-bit platforms with sse or neon is also the better choice. + // The only downside on other platforms would be a bit more loop-unrolling. + const N: usize = 16; + + debug_assert!(s.len() <= INLINE_CAP, "only for inline-able strings"); + + let mut slice = s.as_bytes(); + let mut out = [0u8; INLINE_CAP]; + let mut out_slice = &mut out[..slice.len()]; + let mut is_ascii = [false; N]; + + while slice.len() >= N { + // SAFETY: checked in loop condition + let chunk = unsafe { slice.get_unchecked(..N) }; + // SAFETY: out_slice has at least same length as input slice and gets sliced with the same offsets + let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) }; + + for j in 0..N { + is_ascii[j] = chunk[j] <= 127; + } + + // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk + // size gives the best result, specifically a pmovmsk instruction on x86. + // See https://github.com/llvm/llvm-project/issues/96395 for why llvm currently does not + // currently recognize other similar idioms. + if is_ascii.iter().map(|x| *x as u8).sum::() as usize != N { + break; + } + + for j in 0..N { + out_chunk[j] = convert(&chunk[j]); + } + + slice = unsafe { slice.get_unchecked(N..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(N..) }; + } + + // handle the remainder as individual bytes + while !slice.is_empty() { + let byte = slice[0]; + if byte > 127 { + break; + } + // SAFETY: out_slice has at least same length as input slice + unsafe { + *out_slice.get_unchecked_mut(0) = convert(&byte); + } + slice = unsafe { slice.get_unchecked(1..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(1..) }; + } + + unsafe { + // SAFETY: we know this is a valid char boundary + // since we only skipped over leading ascii bytes + let rest = core::str::from_utf8_unchecked(slice); + (out, rest) + } +} + +impl ToSmolStr for T +where + T: fmt::Display + ?Sized, +{ + fn to_smolstr(&self) -> SmolStr { + format_smolstr!("{}", self) + } +} + +mod private { + /// No downstream impls allowed. + pub trait Sealed {} + impl Sealed for str {} +} + +/// Formats arguments to a [`SmolStr`], potentially without allocating. +/// +/// See [`alloc::format!`] or [`format_args!`] for syntax documentation. +#[macro_export] +macro_rules! format_smolstr { + ($($tt:tt)*) => {{ + let mut w = $crate::SmolStrBuilder::new(); + ::core::fmt::Write::write_fmt(&mut w, format_args!($($tt)*)).expect("a formatting trait implementation returned an error"); + w.finish() + }}; +} + +/// A builder that can be used to efficiently build a [`SmolStr`]. +/// +/// This won't allocate if the final string fits into the inline buffer. +#[derive(Clone, Default, Debug, PartialEq, Eq)] +pub struct SmolStrBuilder(SmolStrBuilderRepr); + +#[derive(Clone, Debug, PartialEq, Eq)] +enum SmolStrBuilderRepr { + Inline { len: usize, buf: [u8; INLINE_CAP] }, + Heap(String), +} + +impl Default for SmolStrBuilderRepr { + #[inline] + fn default() -> Self { + SmolStrBuilderRepr::Inline { + buf: [0; INLINE_CAP], + len: 0, + } + } +} + +impl SmolStrBuilder { + /// Creates a new empty [`SmolStrBuilder`]. + #[must_use] + pub const fn new() -> Self { + Self(SmolStrBuilderRepr::Inline { + buf: [0; INLINE_CAP], + len: 0, + }) + } + + /// Builds a [`SmolStr`] from `self`. + #[must_use] + pub fn finish(&self) -> SmolStr { + SmolStr(match &self.0 { + &SmolStrBuilderRepr::Inline { len, buf } => { + debug_assert!(len <= INLINE_CAP); + Repr::Inline { + // SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize` + len: unsafe { InlineSize::transmute_from_u8(len as u8) }, + buf, + } + } + SmolStrBuilderRepr::Heap(heap) => Repr::new(heap), + }) + } + + /// Appends the given [`char`] to the end of `self`'s buffer. + pub fn push(&mut self, c: char) { + match &mut self.0 { + SmolStrBuilderRepr::Inline { len, buf } => { + let char_len = c.len_utf8(); + let new_len = *len + char_len; + if new_len <= INLINE_CAP { + c.encode_utf8(&mut buf[*len..]); + *len += char_len; + } else { + let mut heap = String::with_capacity(new_len); + // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes + unsafe { heap.as_mut_vec().extend_from_slice(&buf[..*len]) }; + heap.push(c); + self.0 = SmolStrBuilderRepr::Heap(heap); + } + } + SmolStrBuilderRepr::Heap(h) => h.push(c), + } + } + + /// Appends a given string slice onto the end of `self`'s buffer. + pub fn push_str(&mut self, s: &str) { + match &mut self.0 { + SmolStrBuilderRepr::Inline { len, buf } => { + let old_len = *len; + *len += s.len(); + + // if the new length will fit on the stack (even if it fills it entirely) + if *len <= INLINE_CAP { + buf[old_len..*len].copy_from_slice(s.as_bytes()); + return; // skip the heap push below + } + + let mut heap = String::with_capacity(*len); + + // copy existing inline bytes over to the heap + // SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes + unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) }; + heap.push_str(s); + self.0 = SmolStrBuilderRepr::Heap(heap); + } + SmolStrBuilderRepr::Heap(heap) => heap.push_str(s), + } + } +} + +impl fmt::Write for SmolStrBuilder { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); + Ok(()) + } +} + +impl From for SmolStr { + fn from(value: SmolStrBuilder) -> Self { + value.finish() + } +} + +#[cfg(feature = "arbitrary")] +impl<'a> arbitrary::Arbitrary<'a> for SmolStr { + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result { + let s = <&str>::arbitrary(u)?; + Ok(SmolStr::new(s)) + } +} + +#[cfg(feature = "borsh")] +mod borsh; +#[cfg(feature = "serde")] +mod serde; + +#[test] +fn from_buf_and_chars_size_hinted_heap() { + let str = from_buf_and_chars( + *b"abcdefghijklmnopqr00000", + 18, + "_0x1x2x3x4x5x6x7x8x9x10x11x12x13".chars(), + ); + + assert_eq!(str, "abcdefghijklmnopqr_0x1x2x3x4x5x6x7x8x9x10x11x12x13"); +} diff --git a/lib/smol_str/src/serde.rs b/lib/smol_str/src/serde.rs new file mode 100644 index 0000000000..4f08b444c5 --- /dev/null +++ b/lib/smol_str/src/serde.rs @@ -0,0 +1,97 @@ +use alloc::{string::String, vec::Vec}; +use core::fmt; + +use serde::de::{Deserializer, Error, Unexpected, Visitor}; +use serde_core as serde; + +use crate::SmolStr; + +// https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125 +fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + struct SmolStrVisitor; + + impl<'a> Visitor<'a> for SmolStrVisitor { + type Value = SmolStr; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string") + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_borrowed_str(self, v: &'a str) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(SmolStr::from(v)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + match core::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_borrowed_bytes(self, v: &'a [u8]) -> Result + where + E: Error, + { + match core::str::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + match String::from_utf8(v) { + Ok(s) => Ok(SmolStr::from(s)), + Err(e) => Err(Error::invalid_value( + Unexpected::Bytes(&e.into_bytes()), + &self, + )), + } + } + } + + deserializer.deserialize_str(SmolStrVisitor) +} + +impl serde::Serialize for SmolStr { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.as_str().serialize(serializer) + } +} + +impl<'de> serde::Deserialize<'de> for SmolStr { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + smol_str(deserializer) + } +} diff --git a/lib/smol_str/tests/test.rs b/lib/smol_str/tests/test.rs new file mode 100644 index 0000000000..8f7d9ec39a --- /dev/null +++ b/lib/smol_str/tests/test.rs @@ -0,0 +1,451 @@ +use std::sync::Arc; + +#[cfg(not(miri))] +use proptest::{prop_assert, prop_assert_eq, proptest}; + +use smol_str::{SmolStr, SmolStrBuilder}; + +#[test] +#[cfg(target_pointer_width = "64")] +fn smol_str_is_smol() { + assert_eq!( + ::std::mem::size_of::(), + ::std::mem::size_of::(), + ); +} + +#[test] +fn assert_traits() { + fn f() {} + f::(); +} + +#[test] +fn conversions() { + let s: SmolStr = "Hello, World!".into(); + let s: String = s.into(); + assert_eq!(s, "Hello, World!"); + + let s: SmolStr = Arc::::from("Hello, World!").into(); + let s: Arc = s.into(); + assert_eq!(s.as_ref(), "Hello, World!"); +} + +#[test] +fn const_fn_ctor() { + const EMPTY: SmolStr = SmolStr::new_inline(""); + const A: SmolStr = SmolStr::new_inline("A"); + const HELLO: SmolStr = SmolStr::new_inline("HELLO"); + const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUVW"); + + assert_eq!(EMPTY, SmolStr::from("")); + assert_eq!(A, SmolStr::from("A")); + assert_eq!(HELLO, SmolStr::from("HELLO")); + assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW")); +} + +#[cfg(not(miri))] +fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> { + prop_assert_eq!(smol.as_str(), std_str); + prop_assert_eq!(smol.len(), std_str.len()); + prop_assert_eq!(smol.is_empty(), std_str.is_empty()); + if smol.len() <= 23 { + prop_assert!(!smol.is_heap_allocated()); + } + Ok(()) +} + +#[cfg(not(miri))] +proptest! { + #[test] + fn roundtrip(s: String) { + check_props(s.as_str(), SmolStr::new(s.clone()))?; + } + + #[test] + fn roundtrip_spaces(s in r"( )*") { + check_props(s.as_str(), SmolStr::new(s.clone()))?; + } + + #[test] + fn roundtrip_newlines(s in r"\n*") { + check_props(s.as_str(), SmolStr::new(s.clone()))?; + } + + #[test] + fn roundtrip_ws(s in r"( |\n)*") { + check_props(s.as_str(), SmolStr::new(s.clone()))?; + } + + #[test] + fn from_string_iter(slices in proptest::collection::vec(".*", 1..100)) { + let string: String = slices.iter().map(|x| x.as_str()).collect(); + let smol: SmolStr = slices.into_iter().collect(); + check_props(string.as_str(), smol)?; + } + + #[test] + fn from_str_iter(slices in proptest::collection::vec(".*", 1..100)) { + let string: String = slices.iter().map(|x| x.as_str()).collect(); + let smol: SmolStr = slices.iter().collect(); + check_props(string.as_str(), smol)?; + } +} + +#[cfg(feature = "serde")] +mod serde_tests { + use super::*; + use serde::{Deserialize, Serialize}; + use std::collections::HashMap; + + #[derive(Serialize, Deserialize)] + struct SmolStrStruct { + pub(crate) s: SmolStr, + pub(crate) vec: Vec, + pub(crate) map: HashMap, + } + + #[test] + fn test_serde() { + let s = SmolStr::new("Hello, World"); + let s = serde_json::to_string(&s).unwrap(); + assert_eq!(s, "\"Hello, World\""); + let s: SmolStr = serde_json::from_str(&s).unwrap(); + assert_eq!(s, "Hello, World"); + } + + #[test] + fn test_serde_reader() { + let s = SmolStr::new("Hello, World"); + let s = serde_json::to_string(&s).unwrap(); + assert_eq!(s, "\"Hello, World\""); + let s: SmolStr = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + assert_eq!(s, "Hello, World"); + } + + #[test] + fn test_serde_struct() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let struct_ = SmolStrStruct { + s: SmolStr::new("Hello, World"), + vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")], + map, + }; + let s = serde_json::to_string(&struct_).unwrap(); + let _new_struct: SmolStrStruct = serde_json::from_str(&s).unwrap(); + } + + #[test] + fn test_serde_struct_reader() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let struct_ = SmolStrStruct { + s: SmolStr::new("Hello, World"), + vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")], + map, + }; + let s = serde_json::to_string(&struct_).unwrap(); + let _new_struct: SmolStrStruct = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + } + + #[test] + fn test_serde_hashmap() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let s = serde_json::to_string(&map).unwrap(); + let _s: HashMap = serde_json::from_str(&s).unwrap(); + } + + #[test] + fn test_serde_hashmap_reader() { + let mut map = HashMap::new(); + map.insert(SmolStr::new("a"), SmolStr::new("ohno")); + let s = serde_json::to_string(&map).unwrap(); + let _s: HashMap = + serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + } + + #[test] + fn test_serde_vec() { + let vec = vec![SmolStr::new(""), SmolStr::new("b")]; + let s = serde_json::to_string(&vec).unwrap(); + let _s: Vec = serde_json::from_str(&s).unwrap(); + } + + #[test] + fn test_serde_vec_reader() { + let vec = vec![SmolStr::new(""), SmolStr::new("b")]; + let s = serde_json::to_string(&vec).unwrap(); + let _s: Vec = serde_json::from_reader(std::io::Cursor::new(s)).unwrap(); + } +} + +#[test] +fn test_search_in_hashmap() { + let mut m = ::std::collections::HashMap::::new(); + m.insert("aaa".into(), 17); + assert_eq!(17, *m.get("aaa").unwrap()); +} + +#[test] +fn test_from_char_iterator() { + let examples = [ + // Simple keyword-like strings + ("if", false), + ("for", false), + ("impl", false), + // Strings containing two-byte characters + ("パーティーへ行かないか", true), + ("パーティーへ行か", true), + ("パーティーへ行_", false), + ("和製漢語", false), + ("部落格", false), + ("사회과학원 어학연구소", true), + // String containing diverse characters + ("表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", true), + ]; + for (raw, is_heap) in &examples { + let s: SmolStr = raw.chars().collect(); + assert_eq!(s.as_str(), *raw); + assert_eq!(s.is_heap_allocated(), *is_heap); + } + // String which has too many characters to even consider inlining: Chars::size_hint uses + // (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately + // heap allocate + let raw = "a".repeat(23 * 4 + 1); + let s: SmolStr = raw.chars().collect(); + assert_eq!(s.as_str(), raw); + assert!(s.is_heap_allocated()); +} + +#[test] +fn test_bad_size_hint_char_iter() { + struct BadSizeHint(I); + + impl> Iterator for BadSizeHint { + type Item = T; + + fn next(&mut self) -> Option { + self.0.next() + } + + fn size_hint(&self) -> (usize, Option) { + (1024, None) + } + } + + let data = "testing"; + let collected: SmolStr = BadSizeHint(data.chars()).collect(); + let new = SmolStr::new(data); + + assert!(!collected.is_heap_allocated()); + assert!(!new.is_heap_allocated()); + assert_eq!(new, collected); +} + +#[test] +fn test_to_smolstr() { + use smol_str::ToSmolStr; + + for i in 0..26 { + let a = &"abcdefghijklmnopqrstuvwxyz"[i..]; + + assert_eq!(a, a.to_smolstr()); + assert_eq!(a, smol_str::format_smolstr!("{}", a)); + } +} + +#[test] +fn test_builder_push_str() { + //empty + let builder = SmolStrBuilder::new(); + assert_eq!("", builder.finish()); + + // inline push + let mut builder = SmolStrBuilder::new(); + builder.push_str("a"); + builder.push_str("b"); + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("ab", s); + + // inline max push + let mut builder = SmolStrBuilder::new(); + builder.push_str(&"a".repeat(23)); + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("a".repeat(23), s); + + // heap push immediate + let mut builder = SmolStrBuilder::new(); + builder.push_str(&"a".repeat(24)); + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("a".repeat(24), s); + + // heap push succession + let mut builder = SmolStrBuilder::new(); + builder.push_str(&"a".repeat(23)); + builder.push_str(&"a".repeat(23)); + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("a".repeat(46), s); + + // heap push on multibyte char + let mut builder = SmolStrBuilder::new(); + builder.push_str("ohnonononononononono!"); + builder.push('🤯'); + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("ohnonononononononono!🤯", s); +} + +#[test] +fn test_builder_push() { + //empty + let builder = SmolStrBuilder::new(); + assert_eq!("", builder.finish()); + + // inline push + let mut builder = SmolStrBuilder::new(); + builder.push('a'); + builder.push('b'); + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("ab", s); + + // inline max push + let mut builder = SmolStrBuilder::new(); + for _ in 0..23 { + builder.push('a'); + } + let s = builder.finish(); + assert!(!s.is_heap_allocated()); + assert_eq!("a".repeat(23), s); + + // heap push + let mut builder = SmolStrBuilder::new(); + for _ in 0..24 { + builder.push('a'); + } + let s = builder.finish(); + assert!(s.is_heap_allocated()); + assert_eq!("a".repeat(24), s); +} + +#[cfg(test)] +mod test_str_ext { + use smol_str::StrExt; + + #[test] + fn large() { + let lowercase = "aaaaaaAAAAAaaaaaaaaaaaaaaaaaaaaaAAAAaaaaaaaaaaaaaa".to_lowercase_smolstr(); + assert_eq!( + lowercase, + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + ); + assert!(lowercase.is_heap_allocated()); + } + + #[test] + fn to_lowercase() { + let lowercase = "aßΔC".to_lowercase_smolstr(); + assert_eq!(lowercase, "aßδc"); + assert!(!lowercase.is_heap_allocated()); + } + + #[test] + fn to_uppercase() { + let uppercase = "aßΔC".to_uppercase_smolstr(); + assert_eq!(uppercase, "ASSΔC"); + assert!(!uppercase.is_heap_allocated()); + } + + #[test] + fn to_ascii_lowercase() { + let uppercase = "aßΔC".to_ascii_lowercase_smolstr(); + assert_eq!(uppercase, "aßΔc"); + assert!(!uppercase.is_heap_allocated()); + } + + #[test] + fn to_ascii_uppercase() { + let uppercase = "aßΔC".to_ascii_uppercase_smolstr(); + assert_eq!(uppercase, "AßΔC"); + assert!(!uppercase.is_heap_allocated()); + } + + #[test] + fn replace() { + let result = "foo_bar_baz".replace_smolstr("ba", "do"); + assert_eq!(result, "foo_dor_doz"); + assert!(!result.is_heap_allocated()); + } + + #[test] + fn replacen() { + let result = "foo_bar_baz".replacen_smolstr("ba", "do", 1); + assert_eq!(result, "foo_dor_baz"); + assert!(!result.is_heap_allocated()); + } + + #[test] + fn replacen_1_ascii() { + let result = "foo_bar_baz".replacen_smolstr("o", "u", 1); + assert_eq!(result, "fuo_bar_baz"); + assert!(!result.is_heap_allocated()); + } +} + +#[cfg(feature = "borsh")] +mod borsh_tests { + use borsh::BorshDeserialize; + use smol_str::{SmolStr, ToSmolStr}; + use std::io::Cursor; + + #[test] + fn borsh_serialize_stack() { + let smolstr_on_stack = "aßΔCaßδc".to_smolstr(); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&smolstr_on_stack, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap(); + assert_eq!(smolstr_on_stack, decoded); + } + #[test] + fn borsh_serialize_heap() { + let smolstr_on_heap = "aßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδc".to_smolstr(); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&smolstr_on_heap, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap(); + assert_eq!(smolstr_on_heap, decoded); + } + #[test] + fn borsh_non_utf8_stack() { + let invalid_utf8: Vec = vec![0xF0, 0x9F, 0x8F]; // Incomplete UTF-8 sequence + + let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) }); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let result = SmolStr::deserialize_reader(&mut cursor); + assert!(result.is_err()); + } + + #[test] + fn borsh_non_utf8_heap() { + let invalid_utf8: Vec = vec![ + 0xC1, 0x8A, 0x5F, 0xE2, 0x3A, 0x9E, 0x3B, 0xAA, 0x01, 0x08, 0x6F, 0x2F, 0xC0, 0x32, + 0xAB, 0xE1, 0x9A, 0x2F, 0x4A, 0x3F, 0x25, 0x0D, 0x8A, 0x2A, 0x19, 0x11, 0xF0, 0x7F, + 0x0E, 0x80, + ]; + let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) }); + let mut buffer = Vec::new(); + borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap(); + let mut cursor = Cursor::new(buffer); + let result = SmolStr::deserialize_reader(&mut cursor); + assert!(result.is_err()); + } +} diff --git a/lib/smol_str/tests/tidy.rs b/lib/smol_str/tests/tidy.rs new file mode 100644 index 0000000000..e2d809e40f --- /dev/null +++ b/lib/smol_str/tests/tidy.rs @@ -0,0 +1,47 @@ +#![cfg(not(miri))] +use std::{ + env, + path::{Path, PathBuf}, + process::{Command, Stdio}, +}; + +fn project_root() -> PathBuf { + PathBuf::from( + env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned()), + ) +} + +fn run(cmd: &str, dir: impl AsRef) -> Result<(), ()> { + let mut args: Vec<_> = cmd.split_whitespace().collect(); + let bin = args.remove(0); + println!("> {}", cmd); + let output = Command::new(bin) + .args(args) + .current_dir(dir) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .output() + .map_err(drop)?; + if output.status.success() { + Ok(()) + } else { + let stdout = String::from_utf8(output.stdout).map_err(drop)?; + print!("{}", stdout); + Err(()) + } +} + +#[test] +fn check_code_formatting() { + let dir = project_root(); + if run("rustfmt +stable --version", &dir).is_err() { + panic!( + "failed to run rustfmt from toolchain 'stable'; \ + please run `rustup component add rustfmt --toolchain stable` to install it.", + ); + } + if run("cargo +stable fmt -- --check", &dir).is_err() { + panic!("code is not properly formatted; please format the code by running `cargo fmt`") + } +}