Add a du utility function.

This adds a very primitive `du` function for determining the disk usage
in a directory. This should probably be improved or replaced at some
point in the future, this is just a start for supporting tracking sizes
of cache data.
This commit is contained in:
Eric Huss 2023-09-06 21:10:59 -07:00
parent 094632bcea
commit 9588fb6dda
4 changed files with 81 additions and 0 deletions

1
Cargo.lock generated
View File

@ -400,6 +400,7 @@ dependencies = [
"core-foundation",
"filetime",
"hex",
"ignore",
"jobserver",
"libc",
"miow",

View File

@ -12,6 +12,7 @@ description = "Miscellaneous support code used by Cargo."
anyhow.workspace = true
filetime.workspace = true
hex.workspace = true
ignore.workspace = true
jobserver.workspace = true
libc.workspace = true
same-file.workspace = true

View File

@ -0,0 +1,77 @@
//! A simple disk usage estimator.
use anyhow::{Context, Result};
use ignore::overrides::OverrideBuilder;
use ignore::{WalkBuilder, WalkState};
use std::path::Path;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
/// Determines the disk usage of all files in the given directory.
///
/// The given patterns are gitignore style patterns relative to the given
/// path. If there are patterns, it will only count things matching that
/// pattern. `!` can be used to exclude things. See [`OverrideBuilder::add`]
/// for more info.
///
/// This is a primitive implementation that doesn't handle hard links, and
/// isn't particularly fast (for example, not using `getattrlistbulk` on
/// macOS). It also only uses actual byte sizes instead of block counts (and
/// thus vastly undercounts directories with lots of small files). It would be
/// nice to improve this or replace it with something better.
pub fn du(path: &Path, patterns: &[&str]) -> Result<u64> {
du_inner(path, patterns).with_context(|| format!("failed to walk `{}`", path.display()))
}
fn du_inner(path: &Path, patterns: &[&str]) -> Result<u64> {
let mut builder = OverrideBuilder::new(path);
for pattern in patterns {
builder.add(pattern)?;
}
let overrides = builder.build()?;
let mut builder = WalkBuilder::new(path);
builder
.overrides(overrides)
.hidden(false)
.parents(false)
.ignore(false)
.git_global(false)
.git_ignore(false)
.git_exclude(false);
let walker = builder.build_parallel();
let total = Arc::new(AtomicU64::new(0));
// A slot used to indicate there was an error while walking.
//
// It is possible that more than one error happens (such as in different
// threads). The error returned is arbitrary in that case.
let err = Arc::new(Mutex::new(None));
walker.run(|| {
Box::new(|entry| {
match entry {
Ok(entry) => match entry.metadata() {
Ok(meta) => {
if meta.is_file() {
total.fetch_add(meta.len(), Ordering::SeqCst);
}
}
Err(e) => {
*err.lock().unwrap() = Some(e.into());
return WalkState::Quit;
}
},
Err(e) => {
*err.lock().unwrap() = Some(e.into());
return WalkState::Quit;
}
}
WalkState::Continue
})
});
if let Some(e) = err.lock().unwrap().take() {
return Err(e);
}
Ok(total.load(Ordering::SeqCst))
}

View File

@ -1,10 +1,12 @@
//! Miscellaneous support code used by Cargo.
pub use self::read2::read2;
pub use du::du;
pub use process_builder::ProcessBuilder;
pub use process_error::{exit_status_to_string, is_simple_exit_code, ProcessError};
pub use sha256::Sha256;
mod du;
pub mod paths;
mod process_builder;
mod process_error;