Make cache tracking resilient to unexpected files (#15147)

This adds a check to fix an issue where the cache tracking
synchronization was breaking on unexpected files like macOS's
`.DS_Store` (see commit for details).

Fixes https://github.com/rust-lang/cargo/issues/15145
This commit is contained in:
Ed Page 2025-02-05 23:20:15 +00:00 committed by GitHub
commit 1687f7409c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 80 additions and 14 deletions

View File

@ -657,8 +657,19 @@ impl GlobalCacheTracker {
Ok(())
}
/// Returns a list of directory entries in the given path.
fn names_from(path: &Path) -> CargoResult<Vec<String>> {
/// Returns a list of directory entries in the given path that are
/// themselves directories.
fn list_dir_names(path: &Path) -> CargoResult<Vec<String>> {
Self::read_dir_with_filter(path, &|entry| {
entry.file_type().map_or(false, |ty| ty.is_dir())
})
}
/// Returns a list of names in a directory, filtered by the given callback.
fn read_dir_with_filter(
path: &Path,
filter: &dyn Fn(&std::fs::DirEntry) -> bool,
) -> CargoResult<Vec<String>> {
let entries = match path.read_dir() {
Ok(e) => e,
Err(e) => {
@ -672,7 +683,9 @@ impl GlobalCacheTracker {
}
};
let names = entries
.filter_map(|entry| entry.ok()?.file_name().into_string().ok())
.filter_map(|entry| entry.ok())
.filter(|entry| filter(entry))
.filter_map(|entry| entry.file_name().into_string().ok())
.collect();
Ok(names)
}
@ -803,7 +816,7 @@ impl GlobalCacheTracker {
base_path: &Path,
) -> CargoResult<()> {
trace!(target: "gc", "checking for untracked parent to add to {parent_table_name}");
let names = Self::names_from(base_path)?;
let names = Self::list_dir_names(base_path)?;
let mut stmt = conn.prepare_cached(&format!(
"INSERT INTO {parent_table_name} (name, timestamp)
@ -896,7 +909,7 @@ impl GlobalCacheTracker {
VALUES (?1, ?2, ?3, ?4)
ON CONFLICT DO NOTHING",
)?;
let index_names = Self::names_from(&base_path)?;
let index_names = Self::list_dir_names(&base_path)?;
for index_name in index_names {
let Some(id) = Self::id_from_name(conn, REGISTRY_INDEX_TABLE, &index_name)? else {
// The id is missing from the database. This should be resolved
@ -904,13 +917,18 @@ impl GlobalCacheTracker {
continue;
};
let index_path = base_path.join(index_name);
for crate_name in Self::names_from(&index_path)? {
if crate_name.ends_with(".crate") {
// Missing files should have already been taken care of by
// update_db_for_removed.
let size = paths::metadata(index_path.join(&crate_name))?.len();
insert_stmt.execute(params![id, crate_name, size, now])?;
}
let crates = Self::read_dir_with_filter(&index_path, &|entry| {
entry.file_type().map_or(false, |ty| ty.is_file())
&& entry
.file_name()
.to_str()
.map_or(false, |name| name.ends_with(".crate"))
})?;
for crate_name in crates {
// Missing files should have already been taken care of by
// update_db_for_removed.
let size = paths::metadata(index_path.join(&crate_name))?.len();
insert_stmt.execute(params![id, crate_name, size, now])?;
}
}
Ok(())
@ -931,7 +949,7 @@ impl GlobalCacheTracker {
) -> CargoResult<()> {
trace!(target: "gc", "populating untracked files for {table_name}");
// Gather names (and make sure they are in the database).
let id_names = Self::names_from(&base_path)?;
let id_names = Self::list_dir_names(&base_path)?;
// This SELECT is used to determine if the directory is already
// tracked. We don't want to do the expensive size computation unless
@ -954,7 +972,7 @@ impl GlobalCacheTracker {
continue;
};
let index_path = base_path.join(id_name);
let names = Self::names_from(&index_path)?;
let names = Self::list_dir_names(&index_path)?;
let max = names.len();
for (i, name) in names.iter().enumerate() {
if select_stmt.exists(params![id, name])? {

View File

@ -2103,3 +2103,51 @@ fn forward_compatible() {
assert_eq!(cos.len(), 1);
drop(lock);
}
#[cargo_test]
fn resilient_to_unexpected_files() {
// Tests that it doesn't choke on unexpected files.
Package::new("bar", "1.0.0").publish();
let git_project = git::new("from_git", |p| {
p.file("Cargo.toml", &basic_manifest("from_git", "1.0.0"))
.file("src/lib.rs", "")
});
let p = project()
.file(
"Cargo.toml",
&format!(
r#"
[package]
name = "foo"
[dependencies]
bar = "1.0.0"
from_git = {{ git = '{}' }}
"#,
git_project.url()
),
)
.file("src/lib.rs", "")
.build();
p.cargo("fetch -Zgc")
.masquerade_as_nightly_cargo(&["gc"])
.env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
.run();
let root = paths::home().join(".cargo");
std::fs::write(root.join("registry/index/foo"), "").unwrap();
std::fs::write(root.join("registry/cache/foo"), "").unwrap();
std::fs::write(root.join("registry/src/foo"), "").unwrap();
std::fs::write(root.join("git/db/foo"), "").unwrap();
std::fs::write(root.join("git/checkouts/foo"), "").unwrap();
p.cargo("clean gc -Zgc")
.masquerade_as_nightly_cargo(&["gc"])
.with_stderr_data(str![[r#"
[REMOVED] [FILE_NUM] files, [FILE_SIZE]B total
"#]])
.run();
}