From 9588fb6dda56aacaba79099ac8b506a84917bcf3 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Wed, 6 Sep 2023 21:10:59 -0700 Subject: [PATCH] Add a du utility function. This adds a very primitive `du` function for determining the disk usage in a directory. This should probably be improved or replaced at some point in the future, this is just a start for supporting tracking sizes of cache data. --- Cargo.lock | 1 + crates/cargo-util/Cargo.toml | 1 + crates/cargo-util/src/du.rs | 77 ++++++++++++++++++++++++++++++++++++ crates/cargo-util/src/lib.rs | 2 + 4 files changed, 81 insertions(+) create mode 100644 crates/cargo-util/src/du.rs diff --git a/Cargo.lock b/Cargo.lock index 0d0c034ad..87b997456 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,6 +400,7 @@ dependencies = [ "core-foundation", "filetime", "hex", + "ignore", "jobserver", "libc", "miow", diff --git a/crates/cargo-util/Cargo.toml b/crates/cargo-util/Cargo.toml index 616a79c5e..d4376d05b 100644 --- a/crates/cargo-util/Cargo.toml +++ b/crates/cargo-util/Cargo.toml @@ -12,6 +12,7 @@ description = "Miscellaneous support code used by Cargo." anyhow.workspace = true filetime.workspace = true hex.workspace = true +ignore.workspace = true jobserver.workspace = true libc.workspace = true same-file.workspace = true diff --git a/crates/cargo-util/src/du.rs b/crates/cargo-util/src/du.rs new file mode 100644 index 000000000..a4f2cbe8c --- /dev/null +++ b/crates/cargo-util/src/du.rs @@ -0,0 +1,77 @@ +//! A simple disk usage estimator. + +use anyhow::{Context, Result}; +use ignore::overrides::OverrideBuilder; +use ignore::{WalkBuilder, WalkState}; +use std::path::Path; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, Mutex}; + +/// Determines the disk usage of all files in the given directory. +/// +/// The given patterns are gitignore style patterns relative to the given +/// path. If there are patterns, it will only count things matching that +/// pattern. `!` can be used to exclude things. See [`OverrideBuilder::add`] +/// for more info. +/// +/// This is a primitive implementation that doesn't handle hard links, and +/// isn't particularly fast (for example, not using `getattrlistbulk` on +/// macOS). It also only uses actual byte sizes instead of block counts (and +/// thus vastly undercounts directories with lots of small files). It would be +/// nice to improve this or replace it with something better. +pub fn du(path: &Path, patterns: &[&str]) -> Result { + du_inner(path, patterns).with_context(|| format!("failed to walk `{}`", path.display())) +} + +fn du_inner(path: &Path, patterns: &[&str]) -> Result { + let mut builder = OverrideBuilder::new(path); + for pattern in patterns { + builder.add(pattern)?; + } + let overrides = builder.build()?; + + let mut builder = WalkBuilder::new(path); + builder + .overrides(overrides) + .hidden(false) + .parents(false) + .ignore(false) + .git_global(false) + .git_ignore(false) + .git_exclude(false); + let walker = builder.build_parallel(); + let total = Arc::new(AtomicU64::new(0)); + // A slot used to indicate there was an error while walking. + // + // It is possible that more than one error happens (such as in different + // threads). The error returned is arbitrary in that case. + let err = Arc::new(Mutex::new(None)); + walker.run(|| { + Box::new(|entry| { + match entry { + Ok(entry) => match entry.metadata() { + Ok(meta) => { + if meta.is_file() { + total.fetch_add(meta.len(), Ordering::SeqCst); + } + } + Err(e) => { + *err.lock().unwrap() = Some(e.into()); + return WalkState::Quit; + } + }, + Err(e) => { + *err.lock().unwrap() = Some(e.into()); + return WalkState::Quit; + } + } + WalkState::Continue + }) + }); + + if let Some(e) = err.lock().unwrap().take() { + return Err(e); + } + + Ok(total.load(Ordering::SeqCst)) +} diff --git a/crates/cargo-util/src/lib.rs b/crates/cargo-util/src/lib.rs index 0cbc920ec..599d7d861 100644 --- a/crates/cargo-util/src/lib.rs +++ b/crates/cargo-util/src/lib.rs @@ -1,10 +1,12 @@ //! Miscellaneous support code used by Cargo. pub use self::read2::read2; +pub use du::du; pub use process_builder::ProcessBuilder; pub use process_error::{exit_status_to_string, is_simple_exit_code, ProcessError}; pub use sha256::Sha256; +mod du; pub mod paths; mod process_builder; mod process_error;