From da3ca05677fff6d82e1b1eddfd5c0c95f9ff0ed7 Mon Sep 17 00:00:00 2001
From: Eric Huss <eric@huss.org>
Date: Wed, 6 Sep 2023 21:34:05 -0700
Subject: [PATCH] Add a global cache garbage collector.

This adds a garbage collector which will remove old files from cargo's
global cache.

A general overview of the changes here:

- `cargo::core::global_cache_tracker` contains the `GlobalCacheTracker`
  which handles the interface to a sqlite database which stores
  timestamps of the last time a file was used.
- `DeferredGlobalLastUse` is a type that implements an optimization for
  collecting last-use timestamps so that they can be flushed to disk all
  at once.
- `cargo::core::gc` contains the `Gc` type which is the interface for
  performing garbage collection. It coordinates with the
  `GlobalCacheTracker` for determining what to delete.
- Garbage collection can either be automatic or manual. The automatic
  garbage collection supports some config options for defining when
  it runs and how much it deletes.
- Manual garbage collection can be performed via options to `cargo
  clean`.
- `cargo clean` uses the new package cache locking system to coordinate
  access to the package cache to prevent interference with other cargo
  commands running concurrently.
---
 Cargo.lock                                |   20 +-
 Cargo.toml                                |    2 +
 crates/cargo-test-support/Cargo.toml      |    1 +
 crates/cargo-test-support/src/paths.rs    |   13 +
 src/bin/cargo/commands/clean.rs           |  222 ++-
 src/cargo/core/gc.rs                      |  550 ++++++
 src/cargo/core/global_cache_tracker.rs    | 1754 +++++++++++++++++++
 src/cargo/core/mod.rs                     |    2 +
 src/cargo/core/package.rs                 |    4 +
 src/cargo/ops/cargo_clean.rs              |   97 +-
 src/cargo/ops/cargo_compile/mod.rs        |    1 +
 src/cargo/ops/cargo_fetch.rs              |    1 +
 src/cargo/ops/mod.rs                      |    2 +-
 src/cargo/ops/resolve.rs                  |    3 +
 src/cargo/sources/git/source.rs           |   34 +-
 src/cargo/sources/registry/download.rs    |   18 +
 src/cargo/sources/registry/http_remote.rs |   19 +-
 src/cargo/sources/registry/mod.rs         |   20 +
 src/cargo/sources/registry/remote.rs      |   18 +-
 src/cargo/util/config/mod.rs              |   24 +
 tests/testsuite/clean.rs                  |   14 +-
 tests/testsuite/global_cache_tracker.rs   | 1890 +++++++++++++++++++++
 tests/testsuite/main.rs                   |    1 +
 23 files changed, 4652 insertions(+), 58 deletions(-)
 create mode 100644 src/cargo/core/gc.rs
 create mode 100644 src/cargo/core/global_cache_tracker.rs
 create mode 100644 tests/testsuite/global_cache_tracker.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9cf756194..0cd1bbbe6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -287,6 +287,7 @@ dependencies = [
  "pathdiff",
  "pulldown-cmark",
  "rand",
+ "regex",
  "rusqlite",
  "rustfix",
  "same-file",
@@ -407,6 +408,7 @@ dependencies = [
  "time",
  "toml",
  "url",
+ "walkdir",
  "windows-sys",
 ]
 
@@ -2669,7 +2671,7 @@ dependencies = [
  "rand",
  "rand_chacha",
  "rand_xorshift",
- "regex-syntax 0.7.2",
+ "regex-syntax 0.7.5",
  "rusty-fork",
  "tempfile",
  "unarray",
@@ -2797,13 +2799,14 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.8.4"
+version = "1.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
+checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
 dependencies = [
  "aho-corasick",
  "memchr",
- "regex-syntax 0.7.2",
+ "regex-automata 0.3.8",
+ "regex-syntax 0.7.5",
 ]
 
 [[package]]
@@ -2820,6 +2823,11 @@ name = "regex-automata"
 version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax 0.7.5",
+]
 
 [[package]]
 name = "regex-syntax"
@@ -2829,9 +2837,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
 
 [[package]]
 name = "regex-syntax"
-version = "0.7.2"
+version = "0.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
+checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
 
 [[package]]
 name = "resolver-tests"
diff --git a/Cargo.toml b/Cargo.toml
index 4c3a4bde6..60ffec21b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,6 +73,7 @@ pretty_assertions = "1.4.0"
 proptest = "1.3.1"
 pulldown-cmark = { version = "0.9.3", default-features = false }
 rand = "0.8.5"
+regex = "1.9.3"
 rusqlite = { version = "0.29.0", features = ["bundled"] }
 rustfix = "0.6.1"
 same-file = "1.0.6"
@@ -163,6 +164,7 @@ pasetors.workspace = true
 pathdiff.workspace = true
 pulldown-cmark.workspace = true
 rand.workspace = true
+regex.workspace = true
 rusqlite.workspace = true
 rustfix.workspace = true
 semver.workspace = true
diff --git a/crates/cargo-test-support/Cargo.toml b/crates/cargo-test-support/Cargo.toml
index fc32e1c9c..42f8c2af9 100644
--- a/crates/cargo-test-support/Cargo.toml
+++ b/crates/cargo-test-support/Cargo.toml
@@ -29,6 +29,7 @@ tar.workspace = true
 time.workspace = true
 toml.workspace = true
 url.workspace = true
+walkdir.workspace = true
 
 [target.'cfg(windows)'.dependencies]
 windows-sys = { workspace = true, features = ["Win32_Storage_FileSystem"] }
diff --git a/crates/cargo-test-support/src/paths.rs b/crates/cargo-test-support/src/paths.rs
index 50040e1d4..8e2909963 100644
--- a/crates/cargo-test-support/src/paths.rs
+++ b/crates/cargo-test-support/src/paths.rs
@@ -114,6 +114,10 @@ pub trait CargoPathExt {
     fn rm_rf(&self);
     fn mkdir_p(&self);
 
+    /// Returns a list of all files and directories underneath the given
+    /// directory, recursively, including the starting path.
+    fn ls_r(&self) -> Vec<PathBuf>;
+
     fn move_into_the_past(&self) {
         self.move_in_time(|sec, nsec| (sec - 3600, nsec))
     }
@@ -155,6 +159,15 @@ impl CargoPathExt for Path {
             .unwrap_or_else(|e| panic!("failed to mkdir_p {}: {}", self.display(), e))
     }
 
+    fn ls_r(&self) -> Vec<PathBuf> {
+        let mut file_list: Vec<_> = walkdir::WalkDir::new(self)
+            .into_iter()
+            .filter_map(|e| e.map(|e| e.path().to_owned()).ok())
+            .collect();
+        file_list.sort();
+        file_list
+    }
+
     fn move_in_time<F>(&self, travel_amount: F)
     where
         F: Fn(i64, u32) -> (i64, u32),
diff --git a/src/bin/cargo/commands/clean.rs b/src/bin/cargo/commands/clean.rs
index 8596561c9..c51de5650 100644
--- a/src/bin/cargo/commands/clean.rs
+++ b/src/bin/cargo/commands/clean.rs
@@ -1,7 +1,11 @@
 use crate::command_prelude::*;
-
+use cargo::core::gc::{parse_human_size, parse_time_span};
+use cargo::core::gc::{AutoGcKind, GcOpts};
 use cargo::ops::{self, CleanOptions};
 use cargo::util::print_available_packages;
+use cargo::CargoResult;
+use clap::builder::{PossibleValuesParser, TypedValueParser};
+use std::time::Duration;
 
 pub fn cli() -> Command {
     subcommand("clean")
@@ -15,18 +19,227 @@ pub fn cli() -> Command {
         .arg_target_dir()
         .arg_manifest_path()
         .arg_dry_run("Display what would be deleted without deleting anything")
+
+        // NOTE: Not all of these options may get stabilized. Some of them are
+        // very low-level details, and may not be something typical users need.
+        .arg(
+            optional_opt(
+                "gc",
+                "Delete old and unused files (unstable) (comma separated): all, download, target, shared-target",
+            )
+            .hide(true)
+            .value_name("KINDS")
+            .value_parser(
+                PossibleValuesParser::new(["all", "download", "target", "shared-target"]).map(|x|
+                    match x.as_str() {
+                        "all" => AutoGcKind::All,
+                        "download" => AutoGcKind::Download,
+                        "target" => panic!("target is not yet implemented"),
+                        "shared-target" => panic!("shared-target is not yet implemented"),
+                        x => panic!("possible value out of sync with `{x}`"),
+                    }
+            ))
+            .require_equals(true),
+        )
+        .arg(
+            opt(
+                "max-src-age",
+                "Deletes source cache files that have not been used since the given age (unstable)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+        .arg(
+            opt(
+                "max-crate-age",
+                "Deletes crate cache files that have not been used since the given age (unstable)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+        .arg(
+            opt(
+                "max-index-age",
+                "Deletes registry indexes that have not been used since then given age (unstable)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+        .arg(
+            opt(
+                "max-git-co-age",
+                "Deletes git dependency checkouts that have not been used since then given age (unstable)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+        .arg(
+            opt(
+                "max-git-db-age",
+                "Deletes git dependency clones that have not been used since then given age (unstable)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+        .arg(
+            opt(
+                "max-download-age",
+                "Deletes any downloaded cache data that has not been used since then given age (unstable)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+
+        .arg(
+            opt(
+                "max-src-size",
+                "Deletes source cache files until the cache is under the given size (unstable)",
+            )
+            .hide(true)
+            .value_name("SIZE"),
+        )
+        .arg(
+            opt(
+                "max-crate-size",
+                "Deletes crate cache files until the cache is under the given size (unstable)",
+            )
+            .hide(true)
+            .value_name("SIZE"),
+        )
+        .arg(
+            opt("max-git-size",
+                "Deletes git dependency caches until the cache is under the given size (unstable")
+            .hide(true)
+            .value_name("SIZE"))
+        .arg(
+            opt(
+                "max-download-size",
+                "Deletes downloaded cache data until the cache is under the given size (unstable)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+
+        // These are unimplemented. Leaving here as a guide for how this is
+        // intended to evolve. These will likely change, this is just a sketch
+        // of ideas.
+        .arg(
+            opt(
+                "max-target-age",
+                "Deletes any build artifact files that have not been used since then given age (unstable) (UNIMPLEMENTED)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+        .arg(
+            // TODO: come up with something less wordy?
+            opt(
+                "max-shared-target-age",
+                "Deletes any shared build artifact files that have not been used since then given age (unstable) (UNIMPLEMENTED)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+        .arg(
+            opt(
+                "max-target-size",
+                "Deletes build artifact files until the cache is under the given size (unstable) (UNIMPLEMENTED)",
+            )
+            .hide(true)
+            .value_name("SIZE"),
+        )
+        .arg(
+            // TODO: come up with something less wordy?
+            opt(
+                "max-shared-target-size",
+                "Deletes shared build artifact files until the cache is under the given size (unstable) (UNIMPLEMENTED)",
+            )
+            .hide(true)
+            .value_name("DURATION"),
+        )
+
         .after_help(color_print::cstr!(
             "Run `<cyan,bold>cargo help clean</>` for more detailed information.\n"
         ))
 }
 
 pub fn exec(config: &mut Config, args: &ArgMatches) -> CliResult {
-    let ws = args.workspace(config)?;
+    let ws = args.workspace(config);
 
     if args.is_present_with_zero_values("package") {
-        print_available_packages(&ws)?;
+        print_available_packages(&ws?)?;
+        return Ok(());
     }
 
+    let unstable_gc = |opt| {
+        // TODO: issue number
+        config
+            .cli_unstable()
+            .fail_if_stable_opt_custom_z(opt, 0, "gc", config.cli_unstable().gc)
+    };
+    let unstable_cache_opt = |opt| -> CargoResult<Option<&str>> {
+        let arg = args.get_one::<String>(opt).map(String::as_str);
+        if arg.is_some() {
+            unstable_gc(opt)?;
+        }
+        Ok(arg)
+    };
+    let unstable_size_opt = |opt| -> CargoResult<Option<u64>> {
+        unstable_cache_opt(opt)?
+            .map(|s| parse_human_size(s))
+            .transpose()
+    };
+    let unstable_duration_opt = |opt| -> CargoResult<Option<Duration>> {
+        unstable_cache_opt(opt)?
+            .map(|s| parse_time_span(s))
+            .transpose()
+    };
+    let unimplemented_opt = |opt| -> CargoResult<Option<&str>> {
+        let arg = args.get_one::<String>(opt).map(String::as_str);
+        if arg.is_some() {
+            anyhow::bail!("option --{opt} is not yet implemented");
+        }
+        Ok(None)
+    };
+    let unimplemented_size_opt = |opt| -> CargoResult<Option<u64>> {
+        unimplemented_opt(opt)?;
+        Ok(None)
+    };
+    let unimplemented_duration_opt = |opt| -> CargoResult<Option<Duration>> {
+        unimplemented_opt(opt)?;
+        Ok(None)
+    };
+
+    let mut gc: Vec<_> = args
+        .get_many::<AutoGcKind>("gc")
+        .unwrap_or_default()
+        .cloned()
+        .collect();
+    if gc.is_empty() && args.contains_id("gc") {
+        gc.push(AutoGcKind::All);
+    }
+    if !gc.is_empty() {
+        unstable_gc("gc")?;
+    }
+
+    let mut gc_opts = GcOpts {
+        max_src_age: unstable_duration_opt("max-src-age")?,
+        max_crate_age: unstable_duration_opt("max-crate-age")?,
+        max_index_age: unstable_duration_opt("max-index-age")?,
+        max_git_co_age: unstable_duration_opt("max-git-co-age")?,
+        max_git_db_age: unstable_duration_opt("max-git-db-age")?,
+        max_src_size: unstable_size_opt("max-src-size")?,
+        max_crate_size: unstable_size_opt("max-crate-size")?,
+        max_git_size: unstable_size_opt("max-git-size")?,
+        max_download_size: unstable_size_opt("max-download-size")?,
+        max_target_age: unimplemented_duration_opt("max-target-age")?,
+        max_shared_target_age: unimplemented_duration_opt("max-shared-target-age")?,
+        max_target_size: unimplemented_size_opt("max-target-size")?,
+        max_shared_target_size: unimplemented_size_opt("max-shared-target-size")?,
+    };
+    let max_download_age = unstable_duration_opt("max-download-age")?;
+    gc_opts.update_for_auto_gc(config, &gc, max_download_age)?;
+
     let opts = CleanOptions {
         config,
         spec: values(args, "package"),
@@ -35,7 +248,8 @@ pub fn exec(config: &mut Config, args: &ArgMatches) -> CliResult {
         profile_specified: args.contains_id("profile") || args.flag("release"),
         doc: args.flag("doc"),
         dry_run: args.dry_run(),
+        gc_opts,
     };
-    ops::clean(&ws, &opts)?;
+    ops::clean(ws, &opts)?;
     Ok(())
 }
diff --git a/src/cargo/core/gc.rs b/src/cargo/core/gc.rs
new file mode 100644
index 000000000..f70aee584
--- /dev/null
+++ b/src/cargo/core/gc.rs
@@ -0,0 +1,550 @@
+//! Support for garbage collecting unused files from downloaded files or
+//! artifacts from the target directory.
+//!
+//! Garbage collection can be done "automatically" by cargo, which it does by
+//! default once a day when running any command that does a lot of work (like
+//! `cargo build`).
+//!
+//! Garbage collection can also be done manually via the `cargo clean` command
+//! by passing any option that requests deleting unused files.
+//!
+//! Garbage collection is guided by the last-use tracking implemented in the
+//! [`crate::core::global_cache_tracker`] module.
+
+use crate::core::global_cache_tracker::{self, GlobalCacheTracker};
+use crate::core::Verbosity;
+use crate::ops::CleanContext;
+use crate::util::cache_lock::{CacheLock, CacheLockMode};
+use crate::{CargoResult, Config};
+use anyhow::format_err;
+use anyhow::{bail, Context};
+use serde::Deserialize;
+use std::time::Duration;
+
+/// Garbage collector.
+pub struct Gc<'a, 'config> {
+    config: &'config Config,
+    global_cache_tracker: &'a mut GlobalCacheTracker,
+    /// A lock on the package cache.
+    ///
+    /// This is important to be held, since we don't want multiple cargos to
+    /// be allowed to write to the cache at the same time, or for others to
+    /// read while we are modifying the cache.
+    #[allow(dead_code)] // Held for drop.
+    lock: CacheLock<'config>,
+}
+
+/// Automatic garbage collection settings from the `gc.auto` config table.
+///
+/// NOTE: Not all of these options may get stabilized. Some of them are very
+/// low-level details, and may not be something typical users need.
+///
+/// If any of these options are `None`, the built-in default is used.
+#[derive(Deserialize, Default)]
+#[serde(rename_all = "kebab-case")]
+struct AutoConfig {
+    /// The maximum frequency that automatic garbage collection happens.
+    frequency: Option<String>,
+    /// Anything older than this duration will be deleted in the source cache.
+    max_src_age: Option<String>,
+    /// Anything older than this duration will be deleted in the compressed crate cache.
+    max_crate_age: Option<String>,
+    /// Any index older than this duration will be deleted from the index cache.
+    max_index_age: Option<String>,
+    /// Any git checkout older than this duration will be deleted from the checkout cache.
+    max_git_co_age: Option<String>,
+    /// Any git clone older than this duration will be deleted from the git cache.
+    max_git_db_age: Option<String>,
+}
+
+/// Options to use for garbage collection.
+#[derive(Clone, Debug, Default)]
+pub struct GcOpts {
+    /// The `--max-src-age` CLI option.
+    pub max_src_age: Option<Duration>,
+    // The `--max-crate-age` CLI option.
+    pub max_crate_age: Option<Duration>,
+    /// The `--max-index-age` CLI option.
+    pub max_index_age: Option<Duration>,
+    /// The `--max-git-co-age` CLI option.
+    pub max_git_co_age: Option<Duration>,
+    /// The `--max-git-db-age` CLI option.
+    pub max_git_db_age: Option<Duration>,
+    /// The `--max-src-size` CLI option.
+    pub max_src_size: Option<u64>,
+    /// The `--max-crate-size` CLI option.
+    pub max_crate_size: Option<u64>,
+    /// The `--max-git-size` CLI option.
+    pub max_git_size: Option<u64>,
+    /// The `--max-download-size` CLI option.
+    pub max_download_size: Option<u64>,
+
+    /// The `--max-target-age` CLI option (UNIMPLEMENTED).
+    pub max_target_age: Option<Duration>,
+    /// The `--max-shared-target-age CLI option (UNIMPLEMENTED).
+    pub max_shared_target_age: Option<Duration>,
+    /// The `--max-target-size` CLI option  (UNIMPLEMENTED).
+    pub max_target_size: Option<u64>,
+    /// The `--max-shared-target-size` CLI option (UNIMPLEMENTED).
+    pub max_shared_target_size: Option<u64>,
+}
+
+impl GcOpts {
+    /// Returns whether any download cache cleaning options are set.
+    pub fn is_download_cache_opt_set(&self) -> bool {
+        self.max_src_age.is_some()
+            || self.max_crate_age.is_some()
+            || self.max_index_age.is_some()
+            || self.max_git_co_age.is_some()
+            || self.max_git_db_age.is_some()
+            || self.max_src_size.is_some()
+            || self.max_crate_size.is_some()
+            || self.max_git_size.is_some()
+            || self.max_download_size.is_some()
+    }
+
+    /// Returns whether any download cache cleaning options based on size are set.
+    pub fn is_download_cache_size_set(&self) -> bool {
+        self.max_src_size.is_some()
+            || self.max_crate_size.is_some()
+            || self.max_git_size.is_some()
+            || self.max_download_size.is_some()
+    }
+
+    /// Returns whether any target directory cleaning options are set.
+    pub fn is_target_opt_set(&self) -> bool {
+        self.max_target_size.is_some()
+            || self.max_target_age.is_some()
+            || self.max_shared_target_age.is_some()
+            || self.max_shared_target_size.is_some()
+    }
+
+    /// Updates the configuration of this [`GcOpts`] to incorporate the
+    /// settings from config and the given CLI options.
+    ///
+    /// * `kinds` is a list of [`AutoGcKind`] that is being requested to
+    ///   perform. This corresponds to the `cargo clean --gc` flag. If empty,
+    ///   no config options are incorporated.
+    /// * `max_download_age` is the `--max-download-age` CLI option which
+    ///   requires special handling since it implicitly overlaps two options.
+    ///   It will use the newer value of either this or the explicit value.
+    ///
+    /// The `kinds` list is used in a few different ways:
+    ///
+    /// * If empty, uses only the options the user specified on the
+    ///   command-line, like `cargo clean --max-crate-size=…`.
+    /// * If the user specified a `cargo clean --gc` option, then the `kinds`
+    ///   list is filled in with whatever `--gc` option the user picked, and
+    ///   then this function *merges* the settings between the requested
+    ///   `--gc` option and any options that were explicitly specified.
+    /// * [`AutoGcKind::All`] is used in `cargo clean` when no options are
+    ///   specified.
+    pub fn update_for_auto_gc(
+        &mut self,
+        config: &Config,
+        kinds: &[AutoGcKind],
+        max_download_age: Option<Duration>,
+    ) -> CargoResult<()> {
+        let auto_config = config
+            .get::<Option<AutoConfig>>("gc.auto")?
+            .unwrap_or_default();
+        self.update_for_auto_gc_config(&auto_config, kinds, max_download_age)
+    }
+
+    fn update_for_auto_gc_config(
+        &mut self,
+        auto_config: &AutoConfig,
+        kinds: &[AutoGcKind],
+        max_download_age: Option<Duration>,
+    ) -> CargoResult<()> {
+        for kind in kinds {
+            if matches!(kind, AutoGcKind::All | AutoGcKind::Download) {
+                self.max_src_age = newer_time_span_for_config(
+                    self.max_src_age,
+                    "gc.auto.max-src-age",
+                    auto_config.max_src_age.as_deref().unwrap_or("1 month"),
+                )?;
+                self.max_crate_age = newer_time_span_for_config(
+                    self.max_crate_age,
+                    "gc.auto.max-crate-age",
+                    auto_config.max_crate_age.as_deref().unwrap_or("3 months"),
+                )?;
+                self.max_index_age = newer_time_span_for_config(
+                    self.max_index_age,
+                    "gc.auto.max-index-age",
+                    auto_config.max_index_age.as_deref().unwrap_or("3 months"),
+                )?;
+                self.max_git_co_age = newer_time_span_for_config(
+                    self.max_git_co_age,
+                    "gc.auto.max-git-co-age",
+                    auto_config.max_git_co_age.as_deref().unwrap_or("1 month"),
+                )?;
+                self.max_git_db_age = newer_time_span_for_config(
+                    self.max_git_db_age,
+                    "gc.auto.max-git-db-age",
+                    auto_config.max_git_db_age.as_deref().unwrap_or("3 months"),
+                )?;
+            }
+            if matches!(kind, AutoGcKind::Target | AutoGcKind::SharedTarget) {
+                bail!("target is unimplemented");
+            }
+        }
+        if let Some(max_download_age) = max_download_age {
+            self.max_src_age = Some(maybe_newer_span(max_download_age, self.max_src_age));
+            self.max_crate_age = Some(maybe_newer_span(max_download_age, self.max_crate_age));
+            self.max_index_age = Some(maybe_newer_span(max_download_age, self.max_index_age));
+            self.max_git_co_age = Some(maybe_newer_span(max_download_age, self.max_git_co_age));
+            self.max_git_db_age = Some(maybe_newer_span(max_download_age, self.max_git_db_age));
+        }
+        Ok(())
+    }
+}
+
+/// The kind of automatic garbage collection to perform.
+///
+/// "Automatic" is the kind of gc performed automatically by Cargo in any
+/// command that is already doing a bunch of work. See [`auto_gc`] for more.
+#[derive(Clone, Debug)]
+pub enum AutoGcKind {
+    /// Automatically clean up the downloaded files *and* the target directory.
+    ///
+    /// This is the mode used by default.
+    All,
+    /// Automatically clean only downloaded files.
+    ///
+    /// This corresponds to `cargo clean --gc=download`.
+    Download,
+    /// Automatically clean only the target directory.
+    ///
+    /// THIS IS NOT IMPLEMENTED.
+    ///
+    /// This corresponds to `cargo clean --gc=target`.
+    Target,
+    /// Automatically clean only the shared target directory.
+    ///
+    /// THIS IS NOT IMPLEMENTED.
+    ///
+    /// This corresponds to `cargo clean --gc=shared-target`.
+    SharedTarget,
+}
+
+impl<'a, 'config> Gc<'a, 'config> {
+    pub fn new(
+        config: &'config Config,
+        global_cache_tracker: &'a mut GlobalCacheTracker,
+    ) -> CargoResult<Gc<'a, 'config>> {
+        let lock = config.acquire_package_cache_lock(CacheLockMode::MutateExclusive)?;
+        Ok(Gc {
+            config,
+            global_cache_tracker,
+            lock,
+        })
+    }
+
+    /// Performs automatic garbage cleaning.
+    ///
+    /// This returns immediately without doing work if garbage collection has
+    /// been performed recently (since `gc.auto.frequency`).
+    fn auto(&mut self, clean_ctx: &mut CleanContext<'config>) -> CargoResult<()> {
+        if !self.config.cli_unstable().gc {
+            return Ok(());
+        }
+        let auto_config = self
+            .config
+            .get::<Option<AutoConfig>>("gc.auto")?
+            .unwrap_or_default();
+        let Some(freq) = parse_frequency(auto_config.frequency.as_deref().unwrap_or("1 day"))?
+        else {
+            tracing::trace!("auto gc disabled");
+            return Ok(());
+        };
+        if !self.global_cache_tracker.should_run_auto_gc(freq)? {
+            return Ok(());
+        }
+        let mut gc_opts = GcOpts::default();
+        gc_opts.update_for_auto_gc_config(&auto_config, &[AutoGcKind::All], None)?;
+        self.gc(clean_ctx, &gc_opts)?;
+        if !clean_ctx.dry_run {
+            self.global_cache_tracker.set_last_auto_gc()?;
+        }
+        Ok(())
+    }
+
+    /// Performs garbage collection based on the given options.
+    pub fn gc(
+        &mut self,
+        clean_ctx: &mut CleanContext<'config>,
+        gc_opts: &GcOpts,
+    ) -> CargoResult<()> {
+        self.global_cache_tracker.clean(clean_ctx, gc_opts)?;
+        // In the future, other gc operations go here, such as target cleaning.
+        Ok(())
+    }
+}
+
+/// Returns the shorter duration from `cur_span` versus `config_span`.
+///
+/// This is used because the user may specify multiple options which overlap,
+/// and this will pick whichever one is shorter.
+///
+/// * `cur_span` is the span we are comparing against (the value from the CLI
+///   option). If None, just returns the config duration.
+/// * `config_name` is the name of the config option the span is loaded from.
+/// * `config_span` is the span value loaded from config.
+fn newer_time_span_for_config(
+    cur_span: Option<Duration>,
+    config_name: &str,
+    config_span: &str,
+) -> CargoResult<Option<Duration>> {
+    let config_span = parse_time_span_for_config(config_name, config_span)?;
+    Ok(Some(maybe_newer_span(config_span, cur_span)))
+}
+
+/// Returns whichever [`Duration`] is shorter.
+fn maybe_newer_span(a: Duration, b: Option<Duration>) -> Duration {
+    match b {
+        Some(b) => {
+            if b < a {
+                b
+            } else {
+                a
+            }
+        }
+        None => a,
+    }
+}
+
+/// Parses a frequency string.
+///
+/// Returns `Ok(None)` if the frequency is "never".
+fn parse_frequency(frequency: &str) -> CargoResult<Option<Duration>> {
+    if frequency == "always" {
+        return Ok(Some(Duration::new(0, 0)));
+    } else if frequency == "never" {
+        return Ok(None);
+    }
+    let duration = maybe_parse_time_span(frequency).ok_or_else(|| {
+        format_err!(
+            "config option `gc.auto.frequency` expected a value of \"always\", \"never\", \
+             or \"N seconds/minutes/days/weeks/months\", got: {frequency:?}"
+        )
+    })?;
+    Ok(Some(duration))
+}
+
+/// Parses a time span value fetched from config.
+///
+/// This is here to provide better error messages specific to reading from
+/// config.
+fn parse_time_span_for_config(config_name: &str, span: &str) -> CargoResult<Duration> {
+    maybe_parse_time_span(span).ok_or_else(|| {
+        format_err!(
+            "config option `{config_name}` expected a value of the form \
+             \"N seconds/minutes/days/weeks/months\", got: {span:?}"
+        )
+    })
+}
+
+/// Parses a time span string.
+///
+/// Returns None if the value is not valid. See [`parse_time_span`] if you
+/// need a variant that generates an error message.
+fn maybe_parse_time_span(span: &str) -> Option<Duration> {
+    let Some(right_i) = span.find(|c: char| !c.is_ascii_digit()) else {
+        return None;
+    };
+    let left = &span[..right_i];
+    let mut right = &span[right_i..];
+    if right.starts_with(' ') {
+        right = &right[1..];
+    }
+    let count: u64 = left.parse().ok()?;
+    let factor = match right {
+        "second" | "seconds" => 1,
+        "minute" | "minutes" => 60,
+        "hour" | "hours" => 60 * 60,
+        "day" | "days" => 24 * 60 * 60,
+        "week" | "weeks" => 7 * 24 * 60 * 60,
+        "month" | "months" => 30 * 24 * 60 * 60,
+        _ => return None,
+    };
+    Some(Duration::from_secs(factor * count))
+}
+
+/// Parses a time span string.
+pub fn parse_time_span(span: &str) -> CargoResult<Duration> {
+    maybe_parse_time_span(span).ok_or_else(|| {
+        format_err!(
+            "expected a value of the form \
+             \"N seconds/minutes/days/weeks/months\", got: {span:?}"
+        )
+    })
+}
+
+/// Parses a file size using metric or IEC units.
+pub fn parse_human_size(input: &str) -> CargoResult<u64> {
+    let re = regex::Regex::new(r"(?i)^([0-9]+(\.[0-9])?) ?(b|kb|mb|gb|kib|mib|gib)?$").unwrap();
+    let cap = re.captures(input).ok_or_else(|| {
+        format_err!(
+            "invalid size `{input}`, \
+             expected a number with an optional B, kB, MB, GB, kiB, MiB, or GiB suffix"
+        )
+    })?;
+    let factor = match cap.get(3) {
+        Some(suffix) => match suffix.as_str().to_lowercase().as_str() {
+            "b" => 1.0,
+            "kb" => 1_000.0,
+            "mb" => 1_000_000.0,
+            "gb" => 1_000_000_000.0,
+            "kib" => 1024.0,
+            "mib" => 1024.0 * 1024.0,
+            "gib" => 1024.0 * 1024.0 * 1024.0,
+            s => panic!("suffix `{s}` out of sync with regex"),
+        },
+        None => {
+            return cap[1]
+                .parse()
+                .with_context(|| format!("expected an integer size, got `{}`", &cap[1]))
+        }
+    };
+    let num = cap[1]
+        .parse::<f64>()
+        .with_context(|| format!("expected an integer or float, found `{}`", &cap[1]))?;
+    Ok((num * factor) as u64)
+}
+
+/// Performs automatic garbage collection.
+///
+/// This is called in various places in Cargo where garbage collection should
+/// be performed automatically based on the config settings. The default
+/// behavior is to only clean once a day.
+///
+/// This should only be called in code paths for commands that are already
+/// doing a lot of work. It should only be called *after* crates are
+/// downloaded so that the last-use data is updated first.
+///
+/// It should be cheap to call this multiple times (subsequent calls are
+/// ignored), but try not to abuse that.
+pub fn auto_gc(config: &Config) {
+    if !config.cli_unstable().gc {
+        return;
+    }
+    if !config.network_allowed() {
+        // As a conservative choice, auto-gc is disabled when offline. If the
+        // user is indefinitely offline, we don't want to delete things they
+        // may later depend on.
+        return;
+    }
+
+    if let Err(e) = auto_gc_inner(config) {
+        if global_cache_tracker::is_silent_error(&e)
+            && config.shell().verbosity() != Verbosity::Verbose
+        {
+            tracing::warn!("failed to auto-clean cache data: {e:?}");
+        } else {
+            crate::display_warning_with_error(
+                "failed to auto-clean cache data",
+                &e,
+                &mut config.shell(),
+            );
+        }
+    }
+}
+
+fn auto_gc_inner(config: &Config) -> CargoResult<()> {
+    let _lock = match config.try_acquire_package_cache_lock(CacheLockMode::MutateExclusive)? {
+        Some(lock) => lock,
+        None => {
+            tracing::debug!("unable to acquire mutate lock, auto gc disabled");
+            return Ok(());
+        }
+    };
+    // This should not be called when there are pending deferred entries, so check that.
+    let deferred = config.deferred_global_last_use()?;
+    debug_assert!(deferred.is_empty());
+    let mut global_cache_tracker = config.global_cache_tracker()?;
+    let mut gc = Gc::new(config, &mut global_cache_tracker)?;
+    let mut clean_ctx = CleanContext::new(config);
+    gc.auto(&mut clean_ctx)?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn time_spans() {
+        let d = |x| Some(Duration::from_secs(x));
+        assert_eq!(maybe_parse_time_span("0 seconds"), d(0));
+        assert_eq!(maybe_parse_time_span("1second"), d(1));
+        assert_eq!(maybe_parse_time_span("23 seconds"), d(23));
+        assert_eq!(maybe_parse_time_span("5 minutes"), d(60 * 5));
+        assert_eq!(maybe_parse_time_span("2 hours"), d(60 * 60 * 2));
+        assert_eq!(maybe_parse_time_span("1 day"), d(60 * 60 * 24));
+        assert_eq!(maybe_parse_time_span("2 weeks"), d(60 * 60 * 24 * 14));
+        assert_eq!(maybe_parse_time_span("6 months"), d(60 * 60 * 24 * 30 * 6));
+
+        assert_eq!(parse_frequency("5 seconds").unwrap(), d(5));
+        assert_eq!(parse_frequency("always").unwrap(), d(0));
+        assert_eq!(parse_frequency("never").unwrap(), None);
+    }
+
+    #[test]
+    fn time_span_errors() {
+        assert_eq!(maybe_parse_time_span(""), None);
+        assert_eq!(maybe_parse_time_span("1"), None);
+        assert_eq!(maybe_parse_time_span("second"), None);
+        assert_eq!(maybe_parse_time_span("+2 seconds"), None);
+        assert_eq!(maybe_parse_time_span("day"), None);
+        assert_eq!(maybe_parse_time_span("-1 days"), None);
+        assert_eq!(maybe_parse_time_span("1.5 days"), None);
+        assert_eq!(maybe_parse_time_span("1 dayz"), None);
+        assert_eq!(maybe_parse_time_span("always"), None);
+        assert_eq!(maybe_parse_time_span("never"), None);
+        assert_eq!(maybe_parse_time_span("1 day "), None);
+        assert_eq!(maybe_parse_time_span(" 1 day"), None);
+        assert_eq!(maybe_parse_time_span("1  second"), None);
+
+        let e = parse_time_span_for_config("gc.auto.max-src-age", "-1 days").unwrap_err();
+        assert_eq!(
+            e.to_string(),
+            "config option `gc.auto.max-src-age` \
+             expected a value of the form \"N seconds/minutes/days/weeks/months\", \
+             got: \"-1 days\""
+        );
+        let e = parse_frequency("abc").unwrap_err();
+        assert_eq!(
+            e.to_string(),
+            "config option `gc.auto.frequency` \
+             expected a value of \"always\", \"never\", or \"N seconds/minutes/days/weeks/months\", \
+             got: \"abc\""
+        );
+    }
+
+    #[test]
+    fn human_sizes() {
+        assert_eq!(parse_human_size("0").unwrap(), 0);
+        assert_eq!(parse_human_size("123").unwrap(), 123);
+        assert_eq!(parse_human_size("123b").unwrap(), 123);
+        assert_eq!(parse_human_size("123B").unwrap(), 123);
+        assert_eq!(parse_human_size("123 b").unwrap(), 123);
+        assert_eq!(parse_human_size("123 B").unwrap(), 123);
+        assert_eq!(parse_human_size("1kb").unwrap(), 1_000);
+        assert_eq!(parse_human_size("5kb").unwrap(), 5_000);
+        assert_eq!(parse_human_size("1mb").unwrap(), 1_000_000);
+        assert_eq!(parse_human_size("1gb").unwrap(), 1_000_000_000);
+        assert_eq!(parse_human_size("1kib").unwrap(), 1_024);
+        assert_eq!(parse_human_size("1mib").unwrap(), 1_048_576);
+        assert_eq!(parse_human_size("1gib").unwrap(), 1_073_741_824);
+        assert_eq!(parse_human_size("1.5kb").unwrap(), 1_500);
+        assert_eq!(parse_human_size("1.7b").unwrap(), 1);
+
+        assert!(parse_human_size("").is_err());
+        assert!(parse_human_size("x").is_err());
+        assert!(parse_human_size("1x").is_err());
+        assert!(parse_human_size("1 2").is_err());
+        assert!(parse_human_size("1.5").is_err());
+        assert!(parse_human_size("+1").is_err());
+        assert!(parse_human_size("123  b").is_err());
+    }
+}
diff --git a/src/cargo/core/global_cache_tracker.rs b/src/cargo/core/global_cache_tracker.rs
new file mode 100644
index 000000000..ee4024eda
--- /dev/null
+++ b/src/cargo/core/global_cache_tracker.rs
@@ -0,0 +1,1754 @@
+//! Support for tracking the last time files were used to assist with cleaning
+//! up those files if they haven't been used in a while.
+//!
+//! Tracking of cache files is stored in a sqlite database which contains a
+//! timestamp of the last time the file was used, as well as the size of the
+//! file.
+//!
+//! While cargo is running, when it detects a use of a cache file, it adds a
+//! timestamp to [`DeferredGlobalLastUse`]. This batches up a set of changes
+//! that are then flushed to the database all at once (via
+//! [`DeferredGlobalLastUse::save`]). Ideally saving would only be done once
+//! for performance reasons, but that is not really possible due to the way
+//! cargo works, since there are different ways cargo can be used (like `cargo
+//! generate-lockfile`, `cargo fetch`, and `cargo build` are all very
+//! different ways the code is used).
+//!
+//! All of the database interaction is done through the [`GlobalCacheTracker`]
+//! type.
+//!
+//! There is a single global [`GlobalCacheTracker`] and
+//! [`DeferredGlobalLastUse`] stored in [`Config`].
+//!
+//! ## Automatic gc
+//!
+//! Some commands (primarily the build commands) will trigger an automatic
+//! deletion of files that haven't been used in a while. The interface for
+//! this is in the [`crate::core::gc`] module. The database tracks the last
+//! time an automatic gc was performed so that it is only done once per day
+//! for performance reasons.
+//!
+//! ## Manual gc
+//!
+//! The user can perform a manual garbage collection with the `cargo clean`
+//! command. That command has a variety of options to specify what to delete.
+//! Manual gc supports deleting based on age or size or both.
+//!
+//! ## Locking
+//!
+//! Usage of the database requires that the package cache is locked to prevent
+//! concurrent access. Although sqlite has built-in locking support, we want
+//! to use cargo's locking so that the "Blocking" message gets displayed, and
+//! so that locks can block indefinitely for long-running build commands.
+//! [`rusqlite`] has a default timeout of 5 seconds, though that is
+//! configurable.
+//!
+//! When garbage collection is being performed, the package cache lock must be
+//! in [`CacheLockMode::MutateExclusive`] to ensure no other cargo process is
+//! running. See [`crate::util::cache_lock`] for more detail on locking.
+//!
+//! ## Compatibility
+//!
+//! The database must retain both forwards and backwards compatibility between
+//! different versions of cargo. For the most part, this shouldn't be too
+//! difficult to maintain. Generally sqlite doesn't change on-disk formats
+//! between versions (the introduction of WAL is one of the few examples where
+//! version 3 had a format change, but we wouldn't use it anyway since it has
+//! shared-memory requirements cargo can't depend on due to things like
+//! network mounts).
+//!
+//! Schema changes must be managed through [`migrations`] by adding new
+//! entries that make a change to the database. Changes must not break older
+//! versions of cargo. Generally, adding columns should be fine (either with a
+//! default value, or NULL). Adding tables should also be fine. Just don't do
+//! destructive things like removing a column, or changing the semantics of an
+//! existing column.
+//!
+//! ## Performance
+//!
+//! A lot of focus on the design of this system is to minimize the performance
+//! impact. Every build command needs to save updates which we try to avoid
+//! having a noticeable impact on build times. Systems like Windows,
+//! particularly with a magnetic hard disk, can experience a fairly large
+//! impact of cargo's overhead. Cargo's benchsuite has some benchmarks to help
+//! compare different environments, or changes to the code here. Please try to
+//! keep performance in mind if making any major changes.
+//!
+//! Performance of `cargo clean` is not quite as important since it is not
+//! expected to be run often. However, it is still courteous to the user to
+//! try to not impact it too much. One part that has a performance concern is
+//! that the clean command will synchronize the database with whatever is on
+//! disk if needed (in case files were added by older versions of cargo that
+//! don't do cache tracking, or if the user manually deleted some files). This
+//! can potentially be very slow, especially if the two are very out of sync.
+//!
+//! ## Filesystems
+//!
+//! Everything here is sensitive to the kind of filesystem it is running on.
+//! People tend to run cargo in all sorts of strange environments that have
+//! limited capabilities, or on things like read-only mounts. The code here
+//! needs to gracefully handle as many situations as possible.
+//!
+//! The sections above about performance and locking are very relevant when
+//! considering different filesystems.
+//!
+//! There are checks for read-only filesystems, which is generally ignored.
+
+use crate::core::gc::GcOpts;
+use crate::core::Verbosity;
+use crate::ops::CleanContext;
+use crate::util::cache_lock::CacheLockMode;
+use crate::util::interning::InternedString;
+use crate::util::sqlite::{self, basic_migration, Migration};
+use crate::util::{Filesystem, Progress, ProgressStyle};
+use crate::{CargoResult, Config};
+use anyhow::{bail, Context};
+use cargo_util::paths;
+use rusqlite::{params, Connection, ErrorCode};
+use std::collections::{hash_map, HashMap};
+use std::path::{Path, PathBuf};
+use std::time::{Duration, SystemTime};
+use tracing::{debug, trace};
+
+/// The filename of the database.
+const GLOBAL_CACHE_FILENAME: &str = ".global-cache";
+
+const REGISTRY_INDEX_TABLE: &str = "registry_index";
+const REGISTRY_CRATE_TABLE: &str = "registry_crate";
+const REGISTRY_SRC_TABLE: &str = "registry_src";
+const GIT_DB_TABLE: &str = "git_db";
+const GIT_CO_TABLE: &str = "git_checkout";
+
+/// How often timestamps will be updated.
+///
+/// As an optimization timestamps are not updated unless they are older than
+/// the given number of seconds. This helps reduce the amount of disk I/O when
+/// running cargo multiple times within a short window.
+const UPDATE_RESOLUTION: u64 = 60 * 5;
+
+/// Type for timestamps as stored in the database.
+///
+/// These are seconds since the Unix epoch.
+type Timestamp = u64;
+
+/// Tracking for the global shared cache (registry files, etc.).
+#[derive(Debug)]
+pub struct GlobalCacheTracker {
+    /// Connection to the SQLite database.
+    conn: Connection,
+    auto_gc_checked_this_session: bool,
+}
+
+/// This is a cache of modifications that will be saved to disk all at once
+/// via the [`DeferredGlobalLastUse::save`] method.
+///
+/// This is here to improve performance.
+#[derive(Debug)]
+pub struct DeferredGlobalLastUse {
+    /// Cache of registry keys, used for faster fetching.
+    ///
+    /// The key is the registry name (which is its directory name) and the
+    /// value is the `id` in the `registry_index` table.
+    registry_keys: HashMap<InternedString, i64>,
+    /// Cache of git keys, used for faster fetching.
+    ///
+    /// The key is the git db name (which is its directory name) and the value
+    /// is the `id` in the `git_db` table.
+    git_keys: HashMap<InternedString, i64>,
+
+    /// New registry index entries to insert.
+    registry_index_timestamps: HashMap<RegistryIndex, Timestamp>,
+    /// New registry `.crate` entries to insert.
+    registry_crate_timestamps: HashMap<RegistryCrate, Timestamp>,
+    /// New registry src directory entries to insert.
+    registry_src_timestamps: HashMap<RegistrySrc, Timestamp>,
+    /// New git db entries to insert.
+    git_db_timestamps: HashMap<GitDb, Timestamp>,
+    /// New git checkout entries to insert.
+    git_checkout_timestamps: HashMap<GitCheckout, Timestamp>,
+    /// This is used so that a warning about failing to update the database is
+    /// only displayed once.
+    save_err_has_warned: bool,
+    /// The current time, used to improve performance to avoid accessing the
+    /// clock hundreds of times.
+    now: Timestamp,
+}
+
+/// The key for a registry index entry stored in the database.
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+pub struct RegistryIndex {
+    pub encoded_registry_name: InternedString,
+}
+
+/// The key for a registry `.crate` entry stored in the database.
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+pub struct RegistryCrate {
+    pub encoded_registry_name: InternedString,
+    pub crate_filename: InternedString,
+    pub size: u64,
+}
+
+/// The key for a registry src directory entry stored in the database.
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+pub struct RegistrySrc {
+    pub encoded_registry_name: InternedString,
+    pub package_dir: InternedString,
+    // Total size of the src directory in bytes.
+    //
+    // This can be None when the size is unknown. For example, when the src
+    // directory already exists on disk, and we just want to update the
+    // last-use timestamp. We don't want to take the expense of computing disk
+    // usage unless necessary. `populate_untracked_src` will handle any actual
+    // NULL values in the database, which can happen when the src directory is
+    // created by an older version of cargo that did not track sizes.
+    pub size: Option<u64>,
+}
+
+/// The key for a git db entry stored in the database.
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+pub struct GitDb {
+    pub encoded_git_name: InternedString,
+}
+
+/// The key for a git checkout entry stored in the database.
+#[derive(Clone, Debug, Hash, Eq, PartialEq)]
+pub struct GitCheckout {
+    pub encoded_git_name: InternedString,
+    pub short_name: InternedString,
+    /// Total size of the checkout directory.
+    ///
+    /// This can be None when the size is unknown. See [`RegistrySrc::size`]
+    /// for an explanation.
+    pub size: Option<u64>,
+}
+
+/// Paths in the global cache.
+///
+/// Accessing these assumes a lock has already been acquired.
+struct BasePaths {
+    /// Root path to the index caches.
+    index: PathBuf,
+    /// Root path to the git DBs.
+    git_db: PathBuf,
+    /// Root path to the git checkouts.
+    git_co: PathBuf,
+    /// Root path to the `.crate` files.
+    crate_dir: PathBuf,
+    /// Root path to the `src` directories.
+    src: PathBuf,
+}
+
+/// Migrations which initialize the database, and can be used to evolve it over time.
+///
+/// See [`Migration`] for more detail.
+///
+/// **Be sure to not change the order or entries here!**
+fn migrations() -> Vec<Migration> {
+    vec![
+        // registry_index tracks the overall usage of an index cache, and tracks a
+        // numeric ID to refer to that index that is used in other tables.
+        basic_migration(
+            "CREATE TABLE registry_index (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                name TEXT UNIQUE NOT NULL,
+                timestamp INTEGER NOT NULL
+            )",
+        ),
+        // .crate files
+        basic_migration(
+            "CREATE TABLE registry_crate (
+                registry_id INTEGER NOT NULL,
+                name TEXT NOT NULL,
+                size INTEGER NOT NULL,
+                timestamp INTEGER NOT NULL,
+                PRIMARY KEY (registry_id, name),
+                FOREIGN KEY (registry_id) REFERENCES registry_index (id) ON DELETE CASCADE
+             )",
+        ),
+        // Extracted src directories
+        //
+        // Note that `size` can be NULL. This will happen when marking a src
+        // directory as used that was created by an older version of cargo
+        // that didn't do size tracking.
+        basic_migration(
+            "CREATE TABLE registry_src (
+                registry_id INTEGER NOT NULL,
+                name TEXT NOT NULL,
+                size INTEGER,
+                timestamp INTEGER NOT NULL,
+                PRIMARY KEY (registry_id, name),
+                FOREIGN KEY (registry_id) REFERENCES registry_index (id) ON DELETE CASCADE
+             )",
+        ),
+        // Git db directories
+        basic_migration(
+            "CREATE TABLE git_db (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                name TEXT UNIQUE NOT NULL,
+                timestamp INTEGER NOT NULL
+             )",
+        ),
+        // Git checkout directories
+        basic_migration(
+            "CREATE TABLE git_checkout (
+                git_id INTEGER NOT NULL,
+                name TEXT UNIQUE NOT NULL,
+                size INTEGER,
+                timestamp INTEGER NOT NULL,
+                PRIMARY KEY (git_id, name),
+                FOREIGN KEY (git_id) REFERENCES git_db (id) ON DELETE CASCADE
+             )",
+        ),
+        // This is a general-purpose single-row table that can store arbitrary
+        // data. Feel free to add columns (with ALTER TABLE) if necessary.
+        basic_migration(
+            "CREATE TABLE global_data (
+                last_auto_gc INTEGER NOT NULL
+            )",
+        ),
+        // last_auto_gc tracks the last time auto-gc was run (so that it only
+        // runs roughly once a day for performance reasons). Prime it with the
+        // current time to establish a baseline.
+        Box::new(|conn| {
+            conn.execute(
+                "INSERT INTO global_data (last_auto_gc) VALUES (?1)",
+                [now()],
+            )?;
+            Ok(())
+        }),
+    ]
+}
+
+impl GlobalCacheTracker {
+    /// Creates a new [`GlobalCacheTracker`].
+    ///
+    /// The caller is responsible for locking the package cache with
+    /// [`CacheLockMode::DownloadExclusive`] before calling this.
+    pub fn new(config: &Config) -> CargoResult<GlobalCacheTracker> {
+        let mut conn = if config.cli_unstable().gc {
+            let db_path = Self::db_path(config);
+            // A package cache lock is required to ensure only one cargo is
+            // accessing at the same time. If there is concurrent access, we
+            // want to rely on cargo's own "Blocking" system (which can
+            // provide user feedback) rather than blocking inside sqlite
+            // (which by default has a short timeout).
+            let db_path =
+                config.assert_package_cache_locked(CacheLockMode::DownloadExclusive, &db_path);
+            Connection::open(db_path)?
+        } else {
+            // To simplify things (so there aren't checks everywhere for being
+            // enabled), just process everything in memory.
+            Connection::open_in_memory()?
+        };
+        conn.pragma_update(None, "foreign_keys", true)?;
+        sqlite::migrate(&mut conn, &migrations())?;
+        Ok(GlobalCacheTracker {
+            conn,
+            auto_gc_checked_this_session: false,
+        })
+    }
+
+    /// The path to the database.
+    pub fn db_path(config: &Config) -> Filesystem {
+        config.home().join(GLOBAL_CACHE_FILENAME)
+    }
+
+    /// Given an encoded registry name, returns its ID.
+    ///
+    /// Returns None if the given name isn't in the database.
+    fn id_from_name(
+        conn: &Connection,
+        table_name: &str,
+        encoded_name: &str,
+    ) -> CargoResult<Option<i64>> {
+        let mut stmt =
+            conn.prepare_cached(&format!("SELECT id FROM {table_name} WHERE name = ?"))?;
+        match stmt.query_row([encoded_name], |row| row.get(0)) {
+            Ok(id) => Ok(Some(id)),
+            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
+            Err(e) => Err(e.into()),
+        }
+    }
+
+    /// Returns a map of ID to path for the given ids in the given table.
+    ///
+    /// For example, given `registry_index` IDs, it returns filenames of the
+    /// form "index.crates.io-6f17d22bba15001f".
+    fn get_id_map(
+        conn: &Connection,
+        table_name: &str,
+        ids: &[i64],
+    ) -> CargoResult<HashMap<i64, PathBuf>> {
+        let mut stmt =
+            conn.prepare_cached(&format!("SELECT name FROM {table_name} WHERE id = ?1"))?;
+        ids.iter()
+            .map(|id| {
+                let name = stmt.query_row(params![id], |row| {
+                    Ok(PathBuf::from(row.get::<_, String>(0)?))
+                })?;
+                Ok((*id, name))
+            })
+            .collect()
+    }
+
+    /// Returns all index cache timestamps.
+    pub fn registry_index_all(&self) -> CargoResult<Vec<(RegistryIndex, Timestamp)>> {
+        let mut stmt = self
+            .conn
+            .prepare_cached("SELECT name, timestamp FROM registry_index")?;
+        let rows = stmt
+            .query_map([], |row| {
+                let encoded_registry_name = row.get_unwrap(0);
+                let timestamp = row.get_unwrap(1);
+                let kind = RegistryIndex {
+                    encoded_registry_name,
+                };
+                Ok((kind, timestamp))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        Ok(rows)
+    }
+
+    /// Returns all registry crate cache timestamps.
+    pub fn registry_crate_all(&self) -> CargoResult<Vec<(RegistryCrate, Timestamp)>> {
+        let mut stmt = self.conn.prepare_cached(
+            "SELECT registry_index.name, registry_crate.name, registry_crate.size, registry_crate.timestamp
+             FROM registry_index, registry_crate
+             WHERE registry_crate.registry_id = registry_index.id",
+        )?;
+        let rows = stmt
+            .query_map([], |row| {
+                let encoded_registry_name = row.get_unwrap(0);
+                let crate_filename = row.get_unwrap(1);
+                let size = row.get_unwrap(2);
+                let timestamp = row.get_unwrap(3);
+                let kind = RegistryCrate {
+                    encoded_registry_name,
+                    crate_filename,
+                    size,
+                };
+                Ok((kind, timestamp))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        Ok(rows)
+    }
+
+    /// Returns all registry source cache timestamps.
+    pub fn registry_src_all(&self) -> CargoResult<Vec<(RegistrySrc, Timestamp)>> {
+        let mut stmt = self.conn.prepare_cached(
+            "SELECT registry_index.name, registry_src.name, registry_src.size, registry_src.timestamp
+             FROM registry_index, registry_src
+             WHERE registry_src.registry_id = registry_index.id",
+        )?;
+        let rows = stmt
+            .query_map([], |row| {
+                let encoded_registry_name = row.get_unwrap(0);
+                let package_dir = row.get_unwrap(1);
+                let size = row.get_unwrap(2);
+                let timestamp = row.get_unwrap(3);
+                let kind = RegistrySrc {
+                    encoded_registry_name,
+                    package_dir,
+                    size,
+                };
+                Ok((kind, timestamp))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        Ok(rows)
+    }
+
+    /// Returns all git db timestamps.
+    pub fn git_db_all(&self) -> CargoResult<Vec<(GitDb, Timestamp)>> {
+        let mut stmt = self
+            .conn
+            .prepare_cached("SELECT name, timestamp FROM git_db")?;
+        let rows = stmt
+            .query_map([], |row| {
+                let encoded_git_name = row.get_unwrap(0);
+                let timestamp = row.get_unwrap(1);
+                let kind = GitDb { encoded_git_name };
+                Ok((kind, timestamp))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        Ok(rows)
+    }
+
+    /// Returns all git checkout timestamps.
+    pub fn git_checkout_all(&self) -> CargoResult<Vec<(GitCheckout, Timestamp)>> {
+        let mut stmt = self.conn.prepare_cached(
+            "SELECT git_db.name, git_checkout.name, git_checkout.size, git_checkout.timestamp
+             FROM git_db, git_checkout
+             WHERE git_checkout.registry_id = git_db.id",
+        )?;
+        let rows = stmt
+            .query_map([], |row| {
+                let encoded_git_name = row.get_unwrap(0);
+                let short_name = row.get_unwrap(1);
+                let size = row.get_unwrap(2);
+                let timestamp = row.get_unwrap(3);
+                let kind = GitCheckout {
+                    encoded_git_name,
+                    short_name,
+                    size,
+                };
+                Ok((kind, timestamp))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        Ok(rows)
+    }
+
+    /// Returns whether or not an auto GC should be performed, compared to the
+    /// last time it was recorded in the database.
+    pub fn should_run_auto_gc(&mut self, frequency: Duration) -> CargoResult<bool> {
+        trace!("should_run_auto_gc");
+        if self.auto_gc_checked_this_session {
+            return Ok(false);
+        }
+        let last_auto_gc: Timestamp =
+            self.conn
+                .query_row("SELECT last_auto_gc FROM global_data", [], |row| row.get(0))?;
+        let should_run = last_auto_gc + frequency.as_secs() < now();
+        trace!(
+            "last auto gc was {}, {}",
+            last_auto_gc,
+            if should_run { "running" } else { "skipping" }
+        );
+        self.auto_gc_checked_this_session = true;
+        Ok(should_run)
+    }
+
+    /// Writes to the database to indicate that an automatic GC has just been
+    /// completed.
+    pub fn set_last_auto_gc(&self) -> CargoResult<()> {
+        self.conn
+            .execute("UPDATE global_data SET last_auto_gc = ?1", [now()])?;
+        Ok(())
+    }
+
+    /// Deletes files from the global cache based on the given options.
+    pub fn clean(&mut self, clean_ctx: &mut CleanContext<'_>, gc_opts: &GcOpts) -> CargoResult<()> {
+        self.clean_inner(clean_ctx, gc_opts)
+            .with_context(|| "failed to clean entries from the global cache")
+    }
+
+    fn clean_inner(
+        &mut self,
+        clean_ctx: &mut CleanContext<'_>,
+        gc_opts: &GcOpts,
+    ) -> CargoResult<()> {
+        let _p = crate::util::profile::start("cleaning global cache files");
+        let config = clean_ctx.config;
+        let base_git_path = config.git_path().into_path_unlocked();
+        let base = BasePaths {
+            index: config.registry_index_path().into_path_unlocked(),
+            git_db: base_git_path.join("db"),
+            git_co: base_git_path.join("checkouts"),
+            crate_dir: config.registry_cache_path().into_path_unlocked(),
+            src: config.registry_source_path().into_path_unlocked(),
+        };
+        let now = now();
+        trace!("cleaning {gc_opts:?}");
+        let tx = self.conn.transaction()?;
+        let mut delete_paths = Vec::new();
+        // This can be an expensive operation, so only perform it if necessary.
+        if gc_opts.is_download_cache_opt_set() {
+            // TODO: Investigate how slow this might be.
+            Self::sync_db_with_files(
+                &tx,
+                config,
+                &base,
+                gc_opts.is_download_cache_size_set(),
+                &mut delete_paths,
+            )
+            .with_context(|| "failed to sync tracking database")?
+        }
+        if let Some(max_age) = gc_opts.max_index_age {
+            let max_age = now - max_age.as_secs();
+            Self::get_registry_index_to_clean(&tx, max_age, &base, &mut delete_paths)?;
+        }
+        if let Some(max_age) = gc_opts.max_src_age {
+            let max_age = now - max_age.as_secs();
+            Self::get_registry_items_to_clean_age(
+                &tx,
+                max_age,
+                REGISTRY_SRC_TABLE,
+                &base.src,
+                &mut delete_paths,
+            )?;
+        }
+        if let Some(max_age) = gc_opts.max_crate_age {
+            let max_age = now - max_age.as_secs();
+            Self::get_registry_items_to_clean_age(
+                &tx,
+                max_age,
+                REGISTRY_CRATE_TABLE,
+                &base.crate_dir,
+                &mut delete_paths,
+            )?;
+        }
+        if let Some(max_age) = gc_opts.max_git_db_age {
+            let max_age = now - max_age.as_secs();
+            Self::get_git_db_items_to_clean(&tx, max_age, &base, &mut delete_paths)?;
+        }
+        if let Some(max_age) = gc_opts.max_git_co_age {
+            let max_age = now - max_age.as_secs();
+            Self::get_git_co_items_to_clean(&tx, max_age, &base.git_co, &mut delete_paths)?;
+        }
+        // Size collection must happen after date collection so that dates
+        // have precedence, since size constraints are a more blunt
+        // instrument.
+        //
+        // These are also complicated by the `--max-download-size` option
+        // overlapping with `--max-crate-size` and `--max-src-size`, which
+        // requires some coordination between those options which isn't
+        // necessary with the age-based options. An item's age is either older
+        // or it isn't, but contrast that with size which is based on the sum
+        // of all tracked items. Also, `--max-download-size` is summed against
+        // both the crate and src tracking, which requires combining them to
+        // compute the size, and then separating them to calculate the correct
+        // paths.
+        if let Some(max_size) = gc_opts.max_crate_size {
+            Self::get_registry_items_to_clean_size(
+                &tx,
+                max_size,
+                REGISTRY_CRATE_TABLE,
+                &base.crate_dir,
+                &mut delete_paths,
+            )?;
+        }
+        if let Some(max_size) = gc_opts.max_src_size {
+            Self::get_registry_items_to_clean_size(
+                &tx,
+                max_size,
+                REGISTRY_SRC_TABLE,
+                &base.src,
+                &mut delete_paths,
+            )?;
+        }
+        if let Some(max_size) = gc_opts.max_git_size {
+            Self::get_git_items_to_clean_size(&tx, max_size, &base, &mut delete_paths)?;
+        }
+        if let Some(max_size) = gc_opts.max_download_size {
+            Self::get_registry_items_to_clean_size_both(&tx, max_size, &base, &mut delete_paths)?;
+        }
+
+        clean_ctx.remove_paths(&delete_paths)?;
+
+        if clean_ctx.dry_run {
+            tx.rollback()?;
+        } else {
+            tx.commit()?;
+        }
+        Ok(())
+    }
+
+    /// Returns a list of directory entries in the given path.
+    fn names_from(path: &Path) -> CargoResult<Vec<String>> {
+        let entries = match path.read_dir() {
+            Ok(e) => e,
+            Err(e) => {
+                if e.kind() == std::io::ErrorKind::NotFound {
+                    return Ok(Vec::new());
+                } else {
+                    return Err(
+                        anyhow::Error::new(e).context(format!("failed to read path `{path:?}`"))
+                    );
+                }
+            }
+        };
+        let names = entries
+            .filter_map(|entry| entry.ok()?.file_name().into_string().ok())
+            .collect();
+        Ok(names)
+    }
+
+    /// Synchronizes the database to match the files on disk.
+    ///
+    /// This performs the following cleanups:
+    ///
+    /// 1. Remove entries from the database that are missing on disk.
+    /// 2. Adds missing entries to the database that are on disk (such as when
+    ///    files are added by older versions of cargo).
+    /// 3. Fills in the `size` column where it is NULL (such as when something
+    ///    is added to disk by an older version of cargo, and one of the mark
+    ///    functions marked it without knowing the size).
+    ///
+    /// This is only called by `cargo clean` when needed since it is an
+    /// expensive operation. Size computations are only done if `sync_size` is
+    /// set since that adds an even larger expense.
+    ///
+    /// Adds paths to `delete_paths` that should be removed since they are
+    /// orphaned (for example, deleting `.crate` files if the corresponding
+    /// index doesn't exist).
+    fn sync_db_with_files(
+        conn: &Connection,
+        config: &Config,
+        base: &BasePaths,
+        sync_size: bool,
+        delete_paths: &mut Vec<PathBuf>,
+    ) -> CargoResult<()> {
+        let _p = crate::util::profile::start("global cache db sync");
+        debug!("starting db sync");
+        // For registry_index and git_db, add anything that is missing in the db.
+        Self::update_parent_for_missing_from_db(conn, REGISTRY_INDEX_TABLE, &base.index)?;
+        Self::update_parent_for_missing_from_db(conn, GIT_DB_TABLE, &base.git_db)?;
+
+        // For registry_crate, registry_src, and git_checkout, remove anything
+        // from the db that isn't on disk.
+        Self::update_db_for_removed(
+            conn,
+            REGISTRY_INDEX_TABLE,
+            "registry_id",
+            REGISTRY_CRATE_TABLE,
+            &base.crate_dir,
+        )?;
+        Self::update_db_for_removed(
+            conn,
+            REGISTRY_INDEX_TABLE,
+            "registry_id",
+            REGISTRY_SRC_TABLE,
+            &base.src,
+        )?;
+        Self::update_db_for_removed(conn, GIT_DB_TABLE, "git_id", GIT_CO_TABLE, &base.git_co)?;
+
+        // For registry_index and git_db, remove anything from the db that
+        // isn't on disk.
+        //
+        // This also collects paths for any child files that don't have their
+        // respective parent on disk.
+        Self::update_db_parent_for_removed_from_disk(
+            conn,
+            REGISTRY_INDEX_TABLE,
+            &base.index,
+            &[&base.crate_dir, &base.src],
+            delete_paths,
+        )?;
+        Self::update_db_parent_for_removed_from_disk(
+            conn,
+            GIT_DB_TABLE,
+            &base.git_db,
+            &[&base.git_co],
+            delete_paths,
+        )?;
+
+        // For registry_crate, registry_src, and git_checkout, add anything
+        // that is missing in the db.
+        Self::populate_untracked_crate(conn, &base.crate_dir)?;
+        Self::populate_untracked(
+            conn,
+            config,
+            REGISTRY_INDEX_TABLE,
+            "registry_id",
+            REGISTRY_SRC_TABLE,
+            &base.src,
+            sync_size,
+        )?;
+        Self::populate_untracked(
+            conn,
+            config,
+            GIT_DB_TABLE,
+            "git_id",
+            GIT_CO_TABLE,
+            &base.git_co,
+            sync_size,
+        )?;
+
+        // Update any NULL sizes if needed.
+        if sync_size {
+            Self::update_null_sizes(
+                conn,
+                config,
+                REGISTRY_INDEX_TABLE,
+                "registry_id",
+                REGISTRY_SRC_TABLE,
+                &base.src,
+            )?;
+            Self::update_null_sizes(
+                conn,
+                config,
+                GIT_DB_TABLE,
+                "git_id",
+                GIT_CO_TABLE,
+                &base.git_co,
+            )?;
+        }
+        Ok(())
+    }
+
+    /// For parent tables, add any entries that are on disk but aren't tracked in the db.
+    fn update_parent_for_missing_from_db(
+        conn: &Connection,
+        parent_table_name: &str,
+        base_path: &Path,
+    ) -> CargoResult<()> {
+        let _p = crate::util::profile::start(format!(
+            "update parent db for missing from db {parent_table_name}"
+        ));
+        trace!("checking for untracked parent to add to {parent_table_name}");
+        let names = Self::names_from(base_path)?;
+
+        let mut stmt = conn.prepare_cached(&format!(
+            "INSERT INTO {parent_table_name} (name, timestamp)
+                VALUES (?1, ?2)
+                ON CONFLICT DO NOTHING",
+        ))?;
+        let now = now();
+        for name in names {
+            stmt.execute(params![name, now])?;
+        }
+        Ok(())
+    }
+
+    /// Removes database entries for any files that are not on disk for the child tables.
+    ///
+    /// This could happen for example if the user manually deleted the file or
+    /// any such scenario where the filesystem and db are out of sync.
+    fn update_db_for_removed(
+        conn: &Connection,
+        parent_table_name: &str,
+        id_column_name: &str,
+        table_name: &str,
+        base_path: &Path,
+    ) -> CargoResult<()> {
+        let _p = crate::util::profile::start(format!("update db for removed {table_name}"));
+        trace!("checking for db entries to remove from {table_name}");
+        let mut select_stmt = conn.prepare_cached(&format!(
+            "SELECT {table_name}.rowid, {parent_table_name}.name, {table_name}.name
+             FROM {parent_table_name}, {table_name}
+             WHERE {table_name}.{id_column_name} = {parent_table_name}.id",
+        ))?;
+        let mut delete_stmt =
+            conn.prepare_cached(&format!("DELETE FROM {table_name} WHERE rowid = ?1"))?;
+        let mut rows = select_stmt.query([])?;
+        while let Some(row) = rows.next()? {
+            let rowid: i64 = row.get_unwrap(0);
+            let id_name: String = row.get_unwrap(1);
+            let name: String = row.get_unwrap(2);
+            if !base_path.join(id_name).join(name).exists() {
+                delete_stmt.execute([rowid])?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Removes database entries for any files that are not on disk for the parent tables.
+    fn update_db_parent_for_removed_from_disk(
+        conn: &Connection,
+        parent_table_name: &str,
+        base_path: &Path,
+        child_base_paths: &[&Path],
+        delete_paths: &mut Vec<PathBuf>,
+    ) -> CargoResult<()> {
+        let _p = crate::util::profile::start(format!(
+            "update db parent for removed from disk {parent_table_name}"
+        ));
+        trace!("checking for db entries to remove from {parent_table_name}");
+        let mut select_stmt =
+            conn.prepare_cached(&format!("SELECT rowid, name FROM {parent_table_name}"))?;
+        let mut delete_stmt =
+            conn.prepare_cached(&format!("DELETE FROM {parent_table_name} WHERE rowid = ?1"))?;
+        let mut rows = select_stmt.query([])?;
+        while let Some(row) = rows.next()? {
+            let rowid: i64 = row.get_unwrap(0);
+            let id_name: String = row.get_unwrap(1);
+            if !base_path.join(&id_name).exists() {
+                delete_stmt.execute([rowid])?;
+                // Make sure any child data is also cleaned up.
+                for child_base in child_base_paths {
+                    let child_path = child_base.join(&id_name);
+                    if child_path.exists() {
+                        debug!("removing orphaned path {child_path:?}");
+                        delete_paths.push(child_path);
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    /// Updates the database to add any `.crate` files that are currently
+    /// not tracked (such as when they are downloaded by an older version of
+    /// cargo).
+    fn populate_untracked_crate(conn: &Connection, base_path: &Path) -> CargoResult<()> {
+        let _p = crate::util::profile::start("populate untracked crate");
+        trace!("populating untracked crate files");
+        let mut insert_stmt = conn.prepare_cached(
+            "INSERT INTO registry_crate (registry_id, name, size, timestamp)
+             VALUES (?1, ?2, ?3, ?4)
+             ON CONFLICT DO NOTHING",
+        )?;
+        let now = now();
+        let index_names = Self::names_from(&base_path)?;
+        for index_name in index_names {
+            let Some(id) = Self::id_from_name(conn, REGISTRY_INDEX_TABLE, &index_name)? else {
+                // The id is missing from the database. This should be resolved
+                // via update_db_parent_for_removed_from_disk.
+                continue;
+            };
+            let index_path = base_path.join(index_name);
+            for crate_name in Self::names_from(&index_path)? {
+                if crate_name.ends_with(".crate") {
+                    // Missing files should have already been taken care of by
+                    // update_db_for_removed.
+                    let size = paths::metadata(index_path.join(&crate_name))?.len();
+                    insert_stmt.execute(params![id, crate_name, size, now])?;
+                }
+            }
+        }
+        Ok(())
+    }
+
+    /// Updates the database to add any files that are currently not tracked
+    /// (such as when they are downloaded by an older version of cargo).
+    fn populate_untracked(
+        conn: &Connection,
+        config: &Config,
+        id_table_name: &str,
+        id_column_name: &str,
+        table_name: &str,
+        base_path: &Path,
+        populate_size: bool,
+    ) -> CargoResult<()> {
+        let _p = crate::util::profile::start(format!("populate untracked {table_name}"));
+        trace!("populating untracked files for {table_name}");
+        // Gather names (and make sure they are in the database).
+        let id_names = Self::names_from(&base_path)?;
+
+        // This SELECT is used to determine if the directory is already
+        // tracked. We don't want to do the expensive size computation unless
+        // necessary.
+        let mut select_stmt = conn.prepare_cached(&format!(
+            "SELECT 1 FROM {table_name}
+             WHERE {id_column_name} = ?1 AND name = ?2",
+        ))?;
+        let mut insert_stmt = conn.prepare_cached(&format!(
+            "INSERT INTO {table_name} ({id_column_name}, name, size, timestamp)
+             VALUES (?1, ?2, ?3, ?4)
+             ON CONFLICT DO NOTHING",
+        ))?;
+        let mut progress = Progress::with_style("Scanning", ProgressStyle::Ratio, config);
+        let now = now();
+        // Compute the size of any directory not in the database.
+        for id_name in id_names {
+            let Some(id) = Self::id_from_name(conn, id_table_name, &id_name)? else {
+                // The id is missing from the database. This should be resolved
+                // via update_db_parent_for_removed_from_disk.
+                continue;
+            };
+            let index_path = base_path.join(id_name);
+            let names = Self::names_from(&index_path)?;
+            let max = names.len();
+            for (i, name) in names.iter().enumerate() {
+                if select_stmt.exists(params![id, name])? {
+                    continue;
+                }
+                let dir_path = index_path.join(name);
+                if !dir_path.is_dir() {
+                    continue;
+                }
+                progress.tick(i, max, "")?;
+                let size = if populate_size {
+                    Some(du(&dir_path, table_name)?)
+                } else {
+                    None
+                };
+                insert_stmt.execute(params![id, name, size, now])?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Fills in the `size` column where it is NULL.
+    ///
+    /// This can happen when something is added to disk by an older version of
+    /// cargo, and one of the mark functions marked it without knowing the
+    /// size.
+    ///
+    /// `update_db_for_removed` should be called before this is called.
+    fn update_null_sizes(
+        conn: &Connection,
+        config: &Config,
+        parent_table_name: &str,
+        id_column_name: &str,
+        table_name: &str,
+        base_path: &Path,
+    ) -> CargoResult<()> {
+        let _p = crate::util::profile::start(format!("update NULL sizes {table_name}"));
+        trace!("updating NULL size information in {table_name}");
+        let mut null_stmt = conn.prepare_cached(&format!(
+            "SELECT {table_name}.rowid, {table_name}.name, {parent_table_name}.name
+             FROM {table_name}, {parent_table_name}
+             WHERE {table_name}.size IS NULL AND {table_name}.{id_column_name} = {parent_table_name}.id",
+        ))?;
+        let mut update_stmt = conn.prepare_cached(&format!(
+            "UPDATE {table_name} SET size = ?1 WHERE rowid = ?2"
+        ))?;
+        let mut progress = Progress::with_style("Scanning", ProgressStyle::Ratio, config);
+        let rows: Vec<_> = null_stmt
+            .query_map([], |row| {
+                Ok((row.get_unwrap(0), row.get_unwrap(1), row.get_unwrap(2)))
+            })?
+            .collect();
+        let max = rows.len();
+        for (i, row) in rows.into_iter().enumerate() {
+            let (rowid, name, id_name): (i64, String, String) = row?;
+            let path = base_path.join(id_name).join(name);
+            progress.tick(i, max, "")?;
+            // Missing files should have already been taken care of by
+            // update_db_for_removed.
+            let size = du(&path, table_name)?;
+            update_stmt.execute(params![size, rowid])?;
+        }
+        Ok(())
+    }
+
+    /// Adds paths to delete from either registry_crate or registry_src whose
+    /// last use is older than the given timestamp.
+    fn get_registry_items_to_clean_age(
+        conn: &Connection,
+        max_age: Timestamp,
+        table_name: &str,
+        base_path: &Path,
+        delete_paths: &mut Vec<PathBuf>,
+    ) -> CargoResult<()> {
+        debug!("cleaning {table_name} since {max_age:?}");
+        let mut stmt = conn.prepare_cached(&format!(
+            "DELETE FROM {table_name} WHERE timestamp < ?1
+                RETURNING registry_id, name"
+        ))?;
+        let rows = stmt
+            .query_map(params![max_age], |row| {
+                let registry_id = row.get_unwrap(0);
+                let name: String = row.get_unwrap(1);
+                Ok((registry_id, name))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        let ids: Vec<_> = rows.iter().map(|r| r.0).collect();
+        let id_map = Self::get_id_map(conn, REGISTRY_INDEX_TABLE, &ids)?;
+        for (id, name) in rows {
+            let encoded_registry_name = &id_map[&id];
+            delete_paths.push(base_path.join(encoded_registry_name).join(name));
+        }
+        Ok(())
+    }
+
+    /// Adds paths to delete from either `registry_crate` or `registry_src` in
+    /// order to keep the total size under the given max size.
+    fn get_registry_items_to_clean_size(
+        conn: &Connection,
+        max_size: u64,
+        table_name: &str,
+        base_path: &Path,
+        delete_paths: &mut Vec<PathBuf>,
+    ) -> CargoResult<()> {
+        debug!("cleaning {table_name} till under {max_size:?}");
+        let total_size: u64 = conn.query_row(
+            &format!("SELECT coalesce(SUM(size), 0) FROM {table_name}"),
+            [],
+            |row| row.get(0),
+        )?;
+        if total_size <= max_size {
+            return Ok(());
+        }
+        // This SQL statement selects all of the rows ordered by timestamp,
+        // and then uses a window function to keep a running total of the
+        // size. It selects all rows until the running total exceeds the
+        // threshold of the total number of bytes that we want to delete.
+        //
+        // The window function essentially computes an aggregate over all
+        // previous rows as it goes along. As long as the running size is
+        // below the total amount that we need to delete, it keeps picking
+        // more rows.
+        //
+        // The ORDER BY includes `name` mainly for test purposes so that
+        // entries with the same timestamp have deterministic behavior.
+        //
+        // The coalesce helps convert NULL to 0.
+        let mut stmt = conn.prepare(&format!(
+            "DELETE FROM {table_name} WHERE rowid IN \
+                (SELECT x.rowid FROM \
+                    (SELECT rowid, size, SUM(size) OVER \
+                        (ORDER BY timestamp, name ROWS UNBOUNDED PRECEDING) AS running_amount \
+                        FROM {table_name}) x \
+                    WHERE coalesce(x.running_amount, 0) - x.size < ?1) \
+                RETURNING registry_id, name;"
+        ))?;
+        let rows = stmt
+            .query_map(params![total_size - max_size], |row| {
+                let id = row.get_unwrap(0);
+                let name: String = row.get_unwrap(1);
+                Ok((id, name))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        // Convert registry_id to the encoded registry name, and join those.
+        let ids: Vec<_> = rows.iter().map(|r| r.0).collect();
+        let id_map = Self::get_id_map(conn, REGISTRY_INDEX_TABLE, &ids)?;
+        for (id, name) in rows {
+            let encoded_name = &id_map[&id];
+            delete_paths.push(base_path.join(encoded_name).join(name));
+        }
+        Ok(())
+    }
+
+    /// Adds paths to delete from both `registry_crate` and `registry_src` in
+    /// order to keep the total size under the given max size.
+    fn get_registry_items_to_clean_size_both(
+        conn: &Connection,
+        max_size: u64,
+        base: &BasePaths,
+        delete_paths: &mut Vec<PathBuf>,
+    ) -> CargoResult<()> {
+        debug!("cleaning download till under {max_size:?}");
+
+        // This SQL statement selects from both registry_src and
+        // registry_crate so that sorting of timestamps incorporates both of
+        // them at the same time. It uses a const value of 1 or 2 as the first
+        // column so that the code below can determine which table the value
+        // came from.
+        let mut stmt = conn.prepare_cached(
+            "SELECT 1, registry_src.rowid, registry_src.name AS name, registry_index.name,
+                    registry_src.size, registry_src.timestamp AS timestamp
+             FROM registry_src, registry_index
+             WHERE registry_src.registry_id = registry_index.id AND registry_src.size NOT NULL
+
+             UNION
+
+             SELECT 2, registry_crate.rowid, registry_crate.name AS name, registry_index.name,
+                    registry_crate.size, registry_crate.timestamp AS timestamp
+             FROM registry_crate, registry_index
+             WHERE registry_crate.registry_id = registry_index.id
+
+             ORDER BY timestamp, name",
+        )?;
+        let mut delete_src_stmt =
+            conn.prepare_cached("DELETE FROM registry_src WHERE rowid = ?1")?;
+        let mut delete_crate_stmt =
+            conn.prepare_cached("DELETE FROM registry_crate WHERE rowid = ?1")?;
+        let rows = stmt
+            .query_map([], |row| {
+                Ok((
+                    row.get_unwrap(0),
+                    row.get_unwrap(1),
+                    row.get_unwrap(2),
+                    row.get_unwrap(3),
+                    row.get_unwrap(4),
+                ))
+            })?
+            .collect::<Result<Vec<(i64, i64, String, String, u64)>, _>>()?;
+        let mut total_size: u64 = rows.iter().map(|r| r.4).sum();
+        debug!("total download cache size appears to be {total_size}");
+        for (table, rowid, name, index_name, size) in rows {
+            if total_size <= max_size {
+                break;
+            }
+            if table == 1 {
+                delete_paths.push(base.src.join(index_name).join(name));
+                delete_src_stmt.execute([rowid])?;
+            } else {
+                delete_paths.push(base.crate_dir.join(index_name).join(name));
+                delete_crate_stmt.execute([rowid])?;
+            }
+            // TODO: If delete crate, ensure src is also deleted.
+            total_size -= size;
+        }
+        Ok(())
+    }
+
+    /// Adds paths to delete from the git cache, keeping the total size under
+    /// the give value.
+    ///
+    /// Paths are relative to the `git` directory in the cache directory.
+    fn get_git_items_to_clean_size(
+        conn: &Connection,
+        max_size: u64,
+        base: &BasePaths,
+        delete_paths: &mut Vec<PathBuf>,
+    ) -> CargoResult<()> {
+        debug!("cleaning git till under {max_size:?}");
+
+        // Collect all the sizes from git_db and git_checkouts, and then sort them by timestamp.
+        let mut stmt = conn.prepare_cached("SELECT rowid, name, timestamp FROM git_db")?;
+        let mut git_info = stmt
+            .query_map([], |row| {
+                let rowid: i64 = row.get_unwrap(0);
+                let name: String = row.get_unwrap(1);
+                let timestamp: Timestamp = row.get_unwrap(2);
+                // Size is added below so that the error doesn't need to be
+                // converted to a rusqlite error.
+                Ok((timestamp, rowid, None, name, 0))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        for info in &mut git_info {
+            let size = cargo_util::du(&base.git_db.join(&info.3), &[])?;
+            info.4 = size;
+        }
+
+        let mut stmt = conn.prepare_cached(
+            "SELECT git_checkout.rowid, git_db.name, git_checkout.name,
+                git_checkout.size, git_checkout.timestamp
+                FROM git_checkout, git_db
+                WHERE git_checkout.git_id = git_db.id AND git_checkout.size NOT NULL",
+        )?;
+        let git_co_rows = stmt
+            .query_map([], |row| {
+                let rowid = row.get_unwrap(0);
+                let db_name: String = row.get_unwrap(1);
+                let name = row.get_unwrap(2);
+                let size = row.get_unwrap(3);
+                let timestamp = row.get_unwrap(4);
+                Ok((timestamp, rowid, Some(db_name), name, size))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        git_info.extend(git_co_rows);
+
+        // Sort by timestamp, and name. The name is included mostly for test
+        // purposes so that entries with the same timestamp have deterministic
+        // behavior.
+        git_info.sort_by(|a, b| (b.0, &b.3).cmp(&(a.0, &a.3)));
+
+        // Collect paths to delete.
+        let mut delete_db_stmt = conn.prepare_cached("DELETE FROM git_db WHERE rowid = ?1")?;
+        let mut delete_co_stmt =
+            conn.prepare_cached("DELETE FROM git_checkout WHERE rowid = ?1")?;
+        let mut total_size: u64 = git_info.iter().map(|r| r.4).sum();
+        debug!("total git cache size appears to be {total_size}");
+        while let Some((_timestamp, rowid, db_name, name, size)) = git_info.pop() {
+            if total_size <= max_size {
+                break;
+            }
+            if let Some(db_name) = db_name {
+                delete_paths.push(base.git_co.join(db_name).join(name));
+                delete_co_stmt.execute([rowid])?;
+                total_size -= size;
+            } else {
+                total_size -= size;
+                delete_paths.push(base.git_db.join(&name));
+                delete_db_stmt.execute([rowid])?;
+                // If the db is deleted, then all the checkouts must be deleted.
+                let mut i = 0;
+                while i < git_info.len() {
+                    if git_info[i].2.as_deref() == Some(name.as_ref()) {
+                        let (_, rowid, db_name, name, size) = git_info.remove(i);
+                        delete_paths.push(base.git_co.join(db_name.unwrap()).join(name));
+                        delete_co_stmt.execute([rowid])?;
+                        total_size -= size;
+                    } else {
+                        i += 1;
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    /// Adds paths to delete from `registry_index` whose last use is older
+    /// than the given timestamp.
+    fn get_registry_index_to_clean(
+        conn: &Connection,
+        max_age: Timestamp,
+        base: &BasePaths,
+        delete_paths: &mut Vec<PathBuf>,
+    ) -> CargoResult<()> {
+        debug!("cleaning index since {max_age:?}");
+        let mut stmt = conn.prepare_cached(
+            "DELETE FROM registry_index WHERE timestamp < ?1
+                RETURNING name",
+        )?;
+        let mut rows = stmt.query([max_age])?;
+        while let Some(row) = rows.next()? {
+            let name: String = row.get_unwrap(0);
+            delete_paths.push(base.index.join(&name));
+            // Also delete .crate and src directories, since by definition
+            // they cannot be used without their index.
+            delete_paths.push(base.src.join(&name));
+            delete_paths.push(base.crate_dir.join(&name));
+        }
+        Ok(())
+    }
+
+    /// Adds paths to delete from `git_checkout` whose last use is
+    /// older than the given timestamp.
+    fn get_git_co_items_to_clean(
+        conn: &Connection,
+        max_age: Timestamp,
+        base_path: &Path,
+        delete_paths: &mut Vec<PathBuf>,
+    ) -> CargoResult<()> {
+        debug!("cleaning git co since {max_age:?}");
+        let mut stmt = conn.prepare_cached(
+            "DELETE FROM git_checkout WHERE timestamp < ?1
+                RETURNING git_id, name",
+        )?;
+        let rows = stmt
+            .query_map(params![max_age], |row| {
+                let git_id = row.get_unwrap(0);
+                let name: String = row.get_unwrap(1);
+                Ok((git_id, name))
+            })?
+            .collect::<Result<Vec<_>, _>>()?;
+        let ids: Vec<_> = rows.iter().map(|r| r.0).collect();
+        let id_map = Self::get_id_map(conn, GIT_DB_TABLE, &ids)?;
+        for (id, name) in rows {
+            let encoded_git_name = &id_map[&id];
+            delete_paths.push(base_path.join(encoded_git_name).join(name));
+        }
+        Ok(())
+    }
+
+    /// Adds paths to delete from `git_db` in order to keep the total size
+    /// under the given max size.
+    fn get_git_db_items_to_clean(
+        conn: &Connection,
+        max_age: Timestamp,
+        base: &BasePaths,
+        delete_paths: &mut Vec<PathBuf>,
+    ) -> CargoResult<()> {
+        debug!("cleaning git db since {max_age:?}");
+        let mut stmt = conn.prepare_cached(
+            "DELETE FROM git_db WHERE timestamp < ?1
+                RETURNING name",
+        )?;
+        let mut rows = stmt.query([max_age])?;
+        while let Some(row) = rows.next()? {
+            let name: String = row.get_unwrap(0);
+            delete_paths.push(base.git_db.join(&name));
+            // Also delete checkout directories, since by definition they
+            // cannot be used without their db.
+            delete_paths.push(base.git_co.join(&name));
+        }
+        Ok(())
+    }
+}
+
+/// Helper to generate the upsert for the parent tables.
+///
+/// This handles checking if the row already exists, and only updates the
+/// timestamp it if it hasn't been updated recently. This also handles keeping
+/// a cached map of the `id` value.
+///
+/// Unfortunately it is a bit tricky to share this code without a macro.
+macro_rules! insert_or_update_parent {
+    ($self:expr, $conn:expr, $table_name:expr, $timestamps_field:ident, $keys_field:ident, $encoded_name:ident) => {
+        let mut select_stmt = $conn.prepare_cached(concat!(
+            "SELECT id, timestamp FROM ",
+            $table_name,
+            " WHERE name = ?1"
+        ))?;
+        let mut insert_stmt = $conn.prepare_cached(concat!(
+            "INSERT INTO ",
+            $table_name,
+            " (name, timestamp)
+                VALUES (?1, ?2)
+                ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp
+                RETURNING id",
+        ))?;
+        let mut update_stmt = $conn.prepare_cached(concat!(
+            "UPDATE ",
+            $table_name,
+            " SET timestamp = ?1 WHERE id = ?2"
+        ))?;
+        for (parent, new_timestamp) in std::mem::take(&mut $self.$timestamps_field) {
+            trace!(
+                concat!("insert ", $table_name, " {:?} {}"),
+                parent,
+                new_timestamp
+            );
+            let mut rows = select_stmt.query([parent.$encoded_name])?;
+            let id = if let Some(row) = rows.next()? {
+                let id: i64 = row.get_unwrap(0);
+                let timestamp: Timestamp = row.get_unwrap(1);
+                if timestamp < new_timestamp - UPDATE_RESOLUTION {
+                    update_stmt.execute(params![new_timestamp, id])?;
+                }
+                id
+            } else {
+                insert_stmt.query_row(params![parent.$encoded_name, new_timestamp], |row| {
+                    row.get(0)
+                })?
+            };
+            match $self.$keys_field.entry(parent.$encoded_name) {
+                hash_map::Entry::Occupied(o) => {
+                    assert_eq!(*o.get(), id);
+                }
+                hash_map::Entry::Vacant(v) => {
+                    v.insert(id);
+                }
+            }
+        }
+        return Ok(());
+    };
+}
+
+impl DeferredGlobalLastUse {
+    pub fn new() -> DeferredGlobalLastUse {
+        DeferredGlobalLastUse {
+            registry_keys: HashMap::new(),
+            git_keys: HashMap::new(),
+            registry_index_timestamps: HashMap::new(),
+            registry_crate_timestamps: HashMap::new(),
+            registry_src_timestamps: HashMap::new(),
+            git_db_timestamps: HashMap::new(),
+            git_checkout_timestamps: HashMap::new(),
+            save_err_has_warned: false,
+            now: now(),
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.registry_index_timestamps.is_empty()
+            && self.registry_crate_timestamps.is_empty()
+            && self.registry_src_timestamps.is_empty()
+            && self.git_db_timestamps.is_empty()
+            && self.git_checkout_timestamps.is_empty()
+    }
+
+    fn clear(&mut self) {
+        self.registry_index_timestamps.clear();
+        self.registry_crate_timestamps.clear();
+        self.registry_src_timestamps.clear();
+        self.git_db_timestamps.clear();
+        self.git_checkout_timestamps.clear();
+    }
+
+    /// Indicates the given [`RegistryIndex`] has been used right now.
+    pub fn mark_registry_index_used(&mut self, registry_index: RegistryIndex) {
+        self.mark_registry_index_used_stamp(registry_index, None);
+    }
+
+    /// Indicates the given [`RegistryCrate`] has been used right now.
+    ///
+    /// Also implicitly marks the index used, too.
+    pub fn mark_registry_crate_used(&mut self, registry_crate: RegistryCrate) {
+        self.mark_registry_crate_used_stamp(registry_crate, None);
+    }
+
+    /// Indicates the given [`RegistrySrc`] has been used right now.
+    ///
+    /// Also implicitly marks the index used, too.
+    pub fn mark_registry_src_used(&mut self, registry_src: RegistrySrc) {
+        self.mark_registry_src_used_stamp(registry_src, None);
+    }
+
+    /// Indicates the given [`GitCheckout`] has been used right now.
+    ///
+    /// Also implicitly marks the git db used, too.
+    pub fn mark_git_checkout_used(&mut self, git_checkout: GitCheckout) {
+        self.mark_git_checkout_used_stamp(git_checkout, None);
+    }
+
+    /// Indicates the given [`RegistryIndex`] has been used with the given
+    /// time (or "now" if `None`).
+    pub fn mark_registry_index_used_stamp(
+        &mut self,
+        registry_index: RegistryIndex,
+        timestamp: Option<&SystemTime>,
+    ) {
+        let timestamp = timestamp.map_or(self.now, to_timestamp);
+        self.registry_index_timestamps
+            .insert(registry_index, timestamp);
+    }
+
+    /// Indicates the given [`RegistryCrate`] has been used with the given
+    /// time (or "now" if `None`).
+    ///
+    /// Also implicitly marks the index used, too.
+    pub fn mark_registry_crate_used_stamp(
+        &mut self,
+        registry_crate: RegistryCrate,
+        timestamp: Option<&SystemTime>,
+    ) {
+        let timestamp = timestamp.map_or(self.now, to_timestamp);
+        let index = RegistryIndex {
+            encoded_registry_name: registry_crate.encoded_registry_name,
+        };
+        self.registry_index_timestamps.insert(index, timestamp);
+        self.registry_crate_timestamps
+            .insert(registry_crate, timestamp);
+    }
+
+    /// Indicates the given [`RegistrySrc`] has been used with the given
+    /// time (or "now" if `None`).
+    ///
+    /// Also implicitly marks the index used, too.
+    pub fn mark_registry_src_used_stamp(
+        &mut self,
+        registry_src: RegistrySrc,
+        timestamp: Option<&SystemTime>,
+    ) {
+        let timestamp = timestamp.map_or(self.now, to_timestamp);
+        let index = RegistryIndex {
+            encoded_registry_name: registry_src.encoded_registry_name,
+        };
+        self.registry_index_timestamps.insert(index, timestamp);
+        self.registry_src_timestamps.insert(registry_src, timestamp);
+    }
+
+    /// Indicates the given [`GitCheckout`] has been used with the given
+    /// time (or "now" if `None`).
+    ///
+    /// Also implicitly marks the git db used, too.
+    pub fn mark_git_checkout_used_stamp(
+        &mut self,
+        git_checkout: GitCheckout,
+        timestamp: Option<&SystemTime>,
+    ) {
+        let timestamp = timestamp.map_or(self.now, to_timestamp);
+        let db = GitDb {
+            encoded_git_name: git_checkout.encoded_git_name,
+        };
+        self.git_db_timestamps.insert(db, timestamp);
+        self.git_checkout_timestamps.insert(git_checkout, timestamp);
+    }
+
+    /// Saves all of the deferred information to the database.
+    ///
+    /// This will also clear the state of `self`.
+    pub fn save(&mut self, tracker: &mut GlobalCacheTracker) -> CargoResult<()> {
+        let _p = crate::util::profile::start("saving last-use data");
+        trace!("saving last-use data");
+        if self.is_empty() {
+            return Ok(());
+        }
+        let tx = tracker.conn.transaction()?;
+        // These must run before the ones that refer to their IDs.
+        self.insert_registry_index_from_cache(&tx)?;
+        self.insert_git_db_from_cache(&tx)?;
+        self.insert_registry_crate_from_cache(&tx)?;
+        self.insert_registry_src_from_cache(&tx)?;
+        self.insert_git_checkout_from_cache(&tx)?;
+        tx.commit()?;
+        trace!("last-use save complete");
+        Ok(())
+    }
+
+    /// Variant of [`DeferredGlobalLastUse::save`] that does not return an
+    /// error.
+    ///
+    /// This will log or display a warning to the user.
+    pub fn save_no_error(&mut self, config: &Config) {
+        if let Err(e) = self.save_with_config(config) {
+            // Because there is an assertion in auto-gc that checks if this is
+            // empty, be sure to clear it so that assertion doesn't fail.
+            self.clear();
+            if !self.save_err_has_warned {
+                if is_silent_error(&e) && config.shell().verbosity() != Verbosity::Verbose {
+                    tracing::warn!("failed to save last-use data: {e:?}");
+                } else {
+                    crate::display_warning_with_error(
+                        "failed to save last-use data\n\
+                        This may prevent cargo from accurately tracking what is being \
+                        used in its global cache. This information is used for \
+                        automatically removing unused data in the cache.",
+                        &e,
+                        &mut config.shell(),
+                    );
+                    self.save_err_has_warned = true;
+                }
+            }
+        }
+    }
+
+    fn save_with_config(&mut self, config: &Config) -> CargoResult<()> {
+        let mut tracker = config.global_cache_tracker()?;
+        self.save(&mut tracker)
+    }
+
+    /// Flushes all of the `registry_index_timestamps` to the database,
+    /// clearing `registry_index_timestamps`.
+    fn insert_registry_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
+        insert_or_update_parent!(
+            self,
+            conn,
+            "registry_index",
+            registry_index_timestamps,
+            registry_keys,
+            encoded_registry_name
+        );
+    }
+
+    /// Flushes all of the `git_db_timestamps` to the database,
+    /// clearing `registry_index_timestamps`.
+    fn insert_git_db_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
+        insert_or_update_parent!(
+            self,
+            conn,
+            "git_db",
+            git_db_timestamps,
+            git_keys,
+            encoded_git_name
+        );
+    }
+
+    /// Flushes all of the `registry_crate_timestamps` to the database,
+    /// clearing `registry_index_timestamps`.
+    fn insert_registry_crate_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
+        let registry_crate_timestamps = std::mem::take(&mut self.registry_crate_timestamps);
+        for (registry_crate, timestamp) in registry_crate_timestamps {
+            trace!("insert registry crate {registry_crate:?} {timestamp}");
+            let registry_id = self.registry_id(conn, registry_crate.encoded_registry_name)?;
+            let mut stmt = conn.prepare_cached(
+                "INSERT INTO registry_crate (registry_id, name, size, timestamp)
+                 VALUES (?1, ?2, ?3, ?4)
+                 ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp
+                    WHERE timestamp < ?5
+                 ",
+            )?;
+            stmt.execute(params![
+                registry_id,
+                registry_crate.crate_filename,
+                registry_crate.size,
+                timestamp,
+                timestamp - UPDATE_RESOLUTION
+            ])?;
+        }
+        Ok(())
+    }
+
+    /// Flushes all of the `registry_src_timestamps` to the database,
+    /// clearing `registry_index_timestamps`.
+    fn insert_registry_src_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
+        let registry_src_timestamps = std::mem::take(&mut self.registry_src_timestamps);
+        for (registry_src, timestamp) in registry_src_timestamps {
+            trace!("insert registry src {registry_src:?} {timestamp}");
+            let registry_id = self.registry_id(conn, registry_src.encoded_registry_name)?;
+            let mut stmt = conn.prepare_cached(
+                "INSERT INTO registry_src (registry_id, name, size, timestamp)
+                 VALUES (?1, ?2, ?3, ?4)
+                 ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp
+                    WHERE timestamp < ?5
+                 ",
+            )?;
+            stmt.execute(params![
+                registry_id,
+                registry_src.package_dir,
+                registry_src.size,
+                timestamp,
+                timestamp - UPDATE_RESOLUTION
+            ])?;
+        }
+
+        Ok(())
+    }
+
+    /// Flushes all of the `git_checkout_timestamps` to the database,
+    /// clearing `registry_index_timestamps`.
+    fn insert_git_checkout_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
+        let git_checkout_timestamps = std::mem::take(&mut self.git_checkout_timestamps);
+        for (git_checkout, timestamp) in git_checkout_timestamps {
+            let git_id = self.git_id(conn, git_checkout.encoded_git_name)?;
+            let mut stmt = conn.prepare_cached(
+                "INSERT INTO git_checkout (git_id, name, size, timestamp)
+                 VALUES (?1, ?2, ?3, ?4)
+                 ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp
+                    WHERE timestamp < ?5",
+            )?;
+            stmt.execute(params![
+                git_id,
+                git_checkout.short_name,
+                git_checkout.size,
+                timestamp,
+                timestamp - UPDATE_RESOLUTION
+            ])?;
+        }
+
+        Ok(())
+    }
+
+    /// Returns the numeric ID of the registry, either fetching from the local
+    /// cache, or getting it from the database.
+    ///
+    /// It is an error if the registry does not exist.
+    fn registry_id(
+        &mut self,
+        conn: &Connection,
+        encoded_registry_name: InternedString,
+    ) -> CargoResult<i64> {
+        match self.registry_keys.get(&encoded_registry_name) {
+            Some(i) => Ok(*i),
+            None => {
+                let Some(id) = GlobalCacheTracker::id_from_name(
+                    conn,
+                    REGISTRY_INDEX_TABLE,
+                    &encoded_registry_name,
+                )?
+                else {
+                    bail!("expected registry_index {encoded_registry_name} to exist, but wasn't found");
+                };
+                self.registry_keys.insert(encoded_registry_name, id);
+                Ok(id)
+            }
+        }
+    }
+
+    /// Returns the numeric ID of the git db, either fetching from the local
+    /// cache, or getting it from the database.
+    ///
+    /// It is an error if the git db does not exist.
+    fn git_id(&mut self, conn: &Connection, encoded_git_name: InternedString) -> CargoResult<i64> {
+        match self.git_keys.get(&encoded_git_name) {
+            Some(i) => Ok(*i),
+            None => {
+                let Some(id) =
+                    GlobalCacheTracker::id_from_name(conn, GIT_DB_TABLE, &encoded_git_name)?
+                else {
+                    bail!("expected git_db {encoded_git_name} to exist, but wasn't found")
+                };
+                self.git_keys.insert(encoded_git_name, id);
+                Ok(id)
+            }
+        }
+    }
+}
+
+/// Converts a [`SystemTime`] to a [`Timestamp`] which can be stored in the database.
+fn to_timestamp(t: &SystemTime) -> Timestamp {
+    t.duration_since(SystemTime::UNIX_EPOCH)
+        .expect("invalid clock")
+        .as_secs()
+}
+
+/// Returns the current time.
+///
+/// This supports pretending that the time is different for testing using an
+/// environment variable.
+///
+/// If possible, try to avoid calling this too often since accessing clocks
+/// can be a little slow on some systems.
+#[allow(clippy::disallowed_methods)]
+fn now() -> Timestamp {
+    match std::env::var("__CARGO_TEST_LAST_USE_NOW") {
+        Ok(now) => now.parse().unwrap(),
+        Err(_) => to_timestamp(&SystemTime::now()),
+    }
+}
+
+/// Returns whether or not the given error should cause a warning to be
+/// displayed to the user.
+///
+/// In some situations, like a read-only global cache, we don't want to spam
+/// the user with a warning. I think once cargo has controllable lints, I
+/// think we should consider changing this to always warn, but give the user
+/// an option to silence the warning.
+pub fn is_silent_error(e: &anyhow::Error) -> bool {
+    if let Some(e) = e.downcast_ref::<rusqlite::Error>() {
+        if matches!(
+            e.sqlite_error_code(),
+            Some(ErrorCode::CannotOpen | ErrorCode::ReadOnly)
+        ) {
+            return true;
+        }
+    }
+    false
+}
+
+fn du(path: &Path, table_name: &str) -> CargoResult<u64> {
+    // !.git is used because clones typically use hardlinks for the git
+    // contents. TODO: Verify behavior on Windows.
+    // TODO: Or even better, switch to worktrees, and remove this.
+    let patterns = if table_name == GIT_CO_TABLE {
+        &["!.git"][..]
+    } else {
+        &[][..]
+    };
+    cargo_util::du(&path, patterns)
+}
diff --git a/src/cargo/core/mod.rs b/src/cargo/core/mod.rs
index 2add52d5c..808091061 100644
--- a/src/cargo/core/mod.rs
+++ b/src/cargo/core/mod.rs
@@ -19,6 +19,8 @@ pub use crate::util::toml::schema::InheritableFields;
 pub mod compiler;
 pub mod dependency;
 pub mod features;
+pub mod gc;
+pub mod global_cache_tracker;
 pub mod manifest;
 pub mod package;
 pub mod package_id;
diff --git a/src/cargo/core/package.rs b/src/cargo/core/package.rs
index 274798474..d87f81036 100644
--- a/src/cargo/core/package.rs
+++ b/src/cargo/core/package.rs
@@ -491,6 +491,10 @@ impl<'cfg> PackageSet<'cfg> {
             pkgs.push(downloads.wait()?);
         }
         downloads.success = true;
+        drop(downloads);
+
+        let mut deferred = self.config.deferred_global_last_use()?;
+        deferred.save_no_error(self.config);
         Ok(pkgs)
     }
 
diff --git a/src/cargo/ops/cargo_clean.rs b/src/cargo/ops/cargo_clean.rs
index 6f58b8bdc..35c7063f4 100644
--- a/src/cargo/ops/cargo_clean.rs
+++ b/src/cargo/ops/cargo_clean.rs
@@ -1,7 +1,10 @@
 use crate::core::compiler::{CompileKind, CompileMode, Layout, RustcTargetData};
+use crate::core::gc::{AutoGcKind, Gc, GcOpts};
+use crate::core::global_cache_tracker::GlobalCacheTracker;
 use crate::core::profiles::Profiles;
 use crate::core::{PackageIdSpec, TargetKind, Workspace};
 use crate::ops;
+use crate::util::cache_lock::CacheLockMode;
 use crate::util::edit_distance;
 use crate::util::errors::CargoResult;
 use crate::util::interning::InternedString;
@@ -25,6 +28,7 @@ pub struct CleanOptions<'cfg> {
     pub doc: bool,
     /// If set, doesn't delete anything.
     pub dry_run: bool,
+    pub gc_opts: GcOpts,
 }
 
 pub struct CleanContext<'cfg> {
@@ -37,45 +41,76 @@ pub struct CleanContext<'cfg> {
 }
 
 /// Cleans various caches.
-pub fn clean(ws: &Workspace<'_>, opts: &CleanOptions<'_>) -> CargoResult<()> {
-    let mut target_dir = ws.target_dir();
+pub fn clean(ws: CargoResult<Workspace<'_>>, opts: &CleanOptions<'_>) -> CargoResult<()> {
     let config = opts.config;
     let mut ctx = CleanContext::new(config);
     ctx.dry_run = opts.dry_run;
 
-    if opts.doc {
-        if !opts.spec.is_empty() {
-            // FIXME: https://github.com/rust-lang/cargo/issues/8790
-            // This should support the ability to clean specific packages
-            // within the doc directory. It's a little tricky since it
-            // needs to find all documentable targets, but also consider
-            // the fact that target names might overlap with dependency
-            // names and such.
-            bail!("--doc cannot be used with -p");
-        }
-        // If the doc option is set, we just want to delete the doc directory.
-        target_dir = target_dir.join("doc");
-        ctx.remove_paths(&[target_dir.into_path_unlocked()])?;
-    } else {
-        let profiles = Profiles::new(&ws, opts.requested_profile)?;
+    let any_download_cache_opts = opts.gc_opts.is_download_cache_opt_set();
 
-        if opts.profile_specified {
-            // After parsing profiles we know the dir-name of the profile, if a profile
-            // was passed from the command line. If so, delete only the directory of
-            // that profile.
-            let dir_name = profiles.get_dir_name();
-            target_dir = target_dir.join(dir_name);
-        }
+    // The following options need a workspace.
+    let any_ws_opts = !opts.spec.is_empty()
+        || !opts.targets.is_empty()
+        || opts.profile_specified
+        || opts.doc
+        || opts.gc_opts.is_target_opt_set();
 
-        // If we have a spec, then we need to delete some packages, otherwise, just
-        // remove the whole target directory and be done with it!
-        //
-        // Note that we don't bother grabbing a lock here as we're just going to
-        // blow it all away anyway.
-        if opts.spec.is_empty() {
+    // When no options are specified, do the default action.
+    let no_opts_specified = !any_download_cache_opts && !any_ws_opts;
+
+    if any_ws_opts || no_opts_specified {
+        let ws = ws?;
+        let mut target_dir = ws.target_dir();
+
+        if opts.doc {
+            if !opts.spec.is_empty() {
+                // FIXME: https://github.com/rust-lang/cargo/issues/8790
+                // This should support the ability to clean specific packages
+                // within the doc directory. It's a little tricky since it
+                // needs to find all documentable targets, but also consider
+                // the fact that target names might overlap with dependency
+                // names and such.
+                bail!("--doc cannot be used with -p");
+            }
+            // If the doc option is set, we just want to delete the doc directory.
+            target_dir = target_dir.join("doc");
             ctx.remove_paths(&[target_dir.into_path_unlocked()])?;
         } else {
-            clean_specs(&mut ctx, &ws, &profiles, &opts.targets, &opts.spec)?;
+            let profiles = Profiles::new(&ws, opts.requested_profile)?;
+
+            if opts.profile_specified {
+                // After parsing profiles we know the dir-name of the profile, if a profile
+                // was passed from the command line. If so, delete only the directory of
+                // that profile.
+                let dir_name = profiles.get_dir_name();
+                target_dir = target_dir.join(dir_name);
+            }
+
+            // If we have a spec, then we need to delete some packages, otherwise, just
+            // remove the whole target directory and be done with it!
+            //
+            // Note that we don't bother grabbing a lock here as we're just going to
+            // blow it all away anyway.
+            if opts.spec.is_empty() {
+                ctx.remove_paths(&[target_dir.into_path_unlocked()])?;
+            } else {
+                clean_specs(&mut ctx, &ws, &profiles, &opts.targets, &opts.spec)?;
+            }
+        }
+    }
+
+    if config.cli_unstable().gc {
+        let _lock = config.acquire_package_cache_lock(CacheLockMode::MutateExclusive)?;
+        let mut cache_track = GlobalCacheTracker::new(&config)?;
+        let mut gc = Gc::new(config, &mut cache_track)?;
+        if no_opts_specified {
+            // This is the behavior for `cargo clean` without *any* options.
+            // It uses the defaults from config to determine what is cleaned.
+            let mut gc_opts = opts.gc_opts.clone();
+            gc_opts.update_for_auto_gc(config, &[AutoGcKind::All], None)?;
+            gc.gc(&mut ctx, &gc_opts)?;
+        } else {
+            gc.gc(&mut ctx, &opts.gc_opts)?;
         }
     }
 
diff --git a/src/cargo/ops/cargo_compile/mod.rs b/src/cargo/ops/cargo_compile/mod.rs
index 94c6cf9de..3522ef9d3 100644
--- a/src/cargo/ops/cargo_compile/mod.rs
+++ b/src/cargo/ops/cargo_compile/mod.rs
@@ -153,6 +153,7 @@ pub fn compile_ws<'a>(
         unit_graph::emit_serialized_unit_graph(&bcx.roots, &bcx.unit_graph, ws.config())?;
         return Compilation::new(&bcx);
     }
+    crate::core::gc::auto_gc(bcx.config);
     let _p = profile::start("compiling");
     let cx = Context::new(&bcx)?;
     cx.compile(exec)
diff --git a/src/cargo/ops/cargo_fetch.rs b/src/cargo/ops/cargo_fetch.rs
index 6acdbddef..ac2b60aab 100644
--- a/src/cargo/ops/cargo_fetch.rs
+++ b/src/cargo/ops/cargo_fetch.rs
@@ -76,6 +76,7 @@ pub fn fetch<'a>(
     }
 
     packages.get_many(to_download)?;
+    crate::core::gc::auto_gc(config);
 
     Ok((resolve, packages))
 }
diff --git a/src/cargo/ops/mod.rs b/src/cargo/ops/mod.rs
index 13613eaf6..76fa91d25 100644
--- a/src/cargo/ops/mod.rs
+++ b/src/cargo/ops/mod.rs
@@ -1,6 +1,6 @@
 use crate::sources::CRATES_IO_DOMAIN;
 
-pub use self::cargo_clean::{clean, CleanOptions};
+pub use self::cargo_clean::{clean, CleanContext, CleanOptions};
 pub use self::cargo_compile::{
     compile, compile_with_exec, compile_ws, create_bcx, print, resolve_all_features, CompileOptions,
 };
diff --git a/src/cargo/ops/resolve.rs b/src/cargo/ops/resolve.rs
index 8ca72f77c..00d3b1144 100644
--- a/src/cargo/ops/resolve.rs
+++ b/src/cargo/ops/resolve.rs
@@ -530,6 +530,9 @@ pub fn resolve_with_previous<'cfg>(
     if let Some(previous) = previous {
         resolved.merge_from(previous)?;
     }
+    let config = ws.config();
+    let mut deferred = config.deferred_global_last_use()?;
+    deferred.save_no_error(config);
     Ok(resolved)
 }
 
diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs
index a75c1ec6d..9fc874b29 100644
--- a/src/cargo/sources/git/source.rs
+++ b/src/cargo/sources/git/source.rs
@@ -1,5 +1,6 @@
 //! See [GitSource].
 
+use crate::core::global_cache_tracker;
 use crate::core::GitReference;
 use crate::core::SourceId;
 use crate::core::{Dependency, Package, PackageId, Summary};
@@ -11,6 +12,7 @@ use crate::sources::PathSource;
 use crate::util::cache_lock::CacheLockMode;
 use crate::util::errors::CargoResult;
 use crate::util::hex::short_hash;
+use crate::util::interning::InternedString;
 use crate::util::Config;
 use anyhow::Context;
 use cargo_util::paths::exclude_from_backups_and_indexing;
@@ -74,9 +76,10 @@ pub struct GitSource<'cfg> {
     source_id: SourceId,
     /// The underlying path source to discover packages inside the Git repository.
     path_source: Option<PathSource<'cfg>>,
+    short_id: Option<InternedString>,
     /// The identifier of this source for Cargo's Git cache directory.
     /// See [`ident`] for more.
-    ident: String,
+    ident: InternedString,
     config: &'cfg Config,
     /// Disables status messages.
     quiet: bool,
@@ -104,7 +107,8 @@ impl<'cfg> GitSource<'cfg> {
             locked_rev,
             source_id,
             path_source: None,
-            ident,
+            short_id: None,
+            ident: ident.into(),
             config,
             quiet: false,
         };
@@ -127,6 +131,17 @@ impl<'cfg> GitSource<'cfg> {
         }
         self.path_source.as_mut().unwrap().read_packages()
     }
+
+    fn mark_used(&self, size: Option<u64>) -> CargoResult<()> {
+        self.config
+            .deferred_global_last_use()?
+            .mark_git_checkout_used(global_cache_tracker::GitCheckout {
+                encoded_git_name: self.ident,
+                short_name: self.short_id.expect("update before download"),
+                size,
+            });
+        Ok(())
+    }
 }
 
 /// Create an identifier from a URL,
@@ -200,6 +215,7 @@ impl<'cfg> Source for GitSource<'cfg> {
 
     fn block_until_ready(&mut self) -> CargoResult<()> {
         if self.path_source.is_some() {
+            self.mark_used(None)?;
             return Ok(());
         }
 
@@ -290,8 +306,19 @@ impl<'cfg> Source for GitSource<'cfg> {
         let path_source = PathSource::new_recursive(&checkout_path, source_id, self.config);
 
         self.path_source = Some(path_source);
+        self.short_id = Some(short_id.as_str().into());
         self.locked_rev = Some(actual_rev);
-        self.path_source.as_mut().unwrap().update()
+        self.path_source.as_mut().unwrap().update()?;
+
+        // Hopefully this shouldn't incur too much of a performance hit since
+        // most of this should already be in cache since it was just
+        // extracted.
+        //
+        // !.git is used because clones typically use hardlinks for the git
+        // contents. TODO: Verify behavior on Windows.
+        let size = cargo_util::du(&checkout_path, &["!.git"])?;
+        self.mark_used(Some(size))?;
+        Ok(())
     }
 
     fn download(&mut self, id: PackageId) -> CargoResult<MaybePackage> {
@@ -300,6 +327,7 @@ impl<'cfg> Source for GitSource<'cfg> {
             id,
             self.remote
         );
+        self.mark_used(None)?;
         self.path_source
             .as_mut()
             .expect("BUG: `update()` must be called before `get()`")
diff --git a/src/cargo/sources/registry/download.rs b/src/cargo/sources/registry/download.rs
index 786432835..daf1d0537 100644
--- a/src/cargo/sources/registry/download.rs
+++ b/src/cargo/sources/registry/download.rs
@@ -3,11 +3,13 @@
 //! [`HttpRegistry`]: super::http_remote::HttpRegistry
 //! [`RemoteRegistry`]: super::remote::RemoteRegistry
 
+use crate::util::interning::InternedString;
 use anyhow::Context;
 use cargo_credential::Operation;
 use cargo_util::registry::make_dep_path;
 use cargo_util::Sha256;
 
+use crate::core::global_cache_tracker;
 use crate::core::PackageId;
 use crate::sources::registry::MaybeLock;
 use crate::sources::registry::RegistryConfig;
@@ -34,6 +36,7 @@ const CHECKSUM_TEMPLATE: &str = "{sha256-checksum}";
 pub(super) fn download(
     cache_path: &Filesystem,
     config: &Config,
+    encoded_registry_name: InternedString,
     pkg: PackageId,
     checksum: &str,
     registry_config: RegistryConfig,
@@ -50,6 +53,13 @@ pub(super) fn download(
     if let Ok(dst) = File::open(path) {
         let meta = dst.metadata()?;
         if meta.len() > 0 {
+            config.deferred_global_last_use()?.mark_registry_crate_used(
+                global_cache_tracker::RegistryCrate {
+                    encoded_registry_name,
+                    crate_filename: pkg.tarball_name().into(),
+                    size: meta.len(),
+                },
+            );
             return Ok(MaybeLock::Ready(dst));
         }
     }
@@ -106,6 +116,7 @@ pub(super) fn download(
 pub(super) fn finish_download(
     cache_path: &Filesystem,
     config: &Config,
+    encoded_registry_name: InternedString,
     pkg: PackageId,
     checksum: &str,
     data: &[u8],
@@ -115,6 +126,13 @@ pub(super) fn finish_download(
     if actual != checksum {
         anyhow::bail!("failed to verify the checksum of `{}`", pkg)
     }
+    config.deferred_global_last_use()?.mark_registry_crate_used(
+        global_cache_tracker::RegistryCrate {
+            encoded_registry_name,
+            crate_filename: pkg.tarball_name().into(),
+            size: data.len() as u64,
+        },
+    );
 
     cache_path.create_dir()?;
     let path = cache_path.join(&pkg.tarball_name());
diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs
index 3d31110c3..1dfae4ad8 100644
--- a/src/cargo/sources/registry/http_remote.rs
+++ b/src/cargo/sources/registry/http_remote.rs
@@ -1,11 +1,13 @@
 //! Access to a HTTP-based crate registry. See [`HttpRegistry`] for details.
 
+use crate::core::global_cache_tracker;
 use crate::core::{PackageId, SourceId};
 use crate::sources::registry::download;
 use crate::sources::registry::MaybeLock;
 use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData};
 use crate::util::cache_lock::CacheLockMode;
 use crate::util::errors::{CargoResult, HttpNotSuccessful};
+use crate::util::interning::InternedString;
 use crate::util::network::http::http_handle;
 use crate::util::network::retry::{Retry, RetryResult};
 use crate::util::network::sleep::SleepTracker;
@@ -52,6 +54,7 @@ const UNKNOWN: &'static str = "Unknown";
 ///
 /// [RFC 2789]: https://github.com/rust-lang/rfcs/pull/2789
 pub struct HttpRegistry<'cfg> {
+    name: InternedString,
     /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`).
     ///
     /// To be fair, `HttpRegistry` doesn't store the registry index it
@@ -199,6 +202,7 @@ impl<'cfg> HttpRegistry<'cfg> {
             .expect("a url with the sparse+ stripped should still be valid");
 
         Ok(HttpRegistry {
+            name: name.into(),
             index_path: config.registry_index_path().join(name),
             cache_path: config.registry_cache_path().join(name),
             source_id,
@@ -454,6 +458,11 @@ impl<'cfg> HttpRegistry<'cfg> {
 
 impl<'cfg> RegistryData for HttpRegistry<'cfg> {
     fn prepare(&self) -> CargoResult<()> {
+        self.config
+            .deferred_global_last_use()?
+            .mark_registry_index_used(global_cache_tracker::RegistryIndex {
+                encoded_registry_name: self.name,
+            });
         Ok(())
     }
 
@@ -750,6 +759,7 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> {
         download::download(
             &self.cache_path,
             &self.config,
+            self.name.clone(),
             pkg,
             checksum,
             registry_config,
@@ -762,7 +772,14 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> {
         checksum: &str,
         data: &[u8],
     ) -> CargoResult<File> {
-        download::finish_download(&self.cache_path, &self.config, pkg, checksum, data)
+        download::finish_download(
+            &self.cache_path,
+            &self.config,
+            self.name.clone(),
+            pkg,
+            checksum,
+            data,
+        )
     }
 
     fn is_crate_downloaded(&self, pkg: PackageId) -> bool {
diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs
index 7ee461edd..f884eec30 100644
--- a/src/cargo/sources/registry/mod.rs
+++ b/src/cargo/sources/registry/mod.rs
@@ -201,6 +201,7 @@ use tar::Archive;
 use tracing::debug;
 
 use crate::core::dependency::Dependency;
+use crate::core::global_cache_tracker;
 use crate::core::{Package, PackageId, SourceId, Summary};
 use crate::sources::source::MaybePackage;
 use crate::sources::source::QueryKind;
@@ -239,6 +240,7 @@ struct LockMetadata {
 ///
 /// For general concepts of registries, see the [module-level documentation](crate::sources::registry).
 pub struct RegistrySource<'cfg> {
+    name: InternedString,
     /// The unique identifier of this source.
     source_id: SourceId,
     /// The path where crate files are extracted (`$CARGO_HOME/registry/src/$REG-HASH`).
@@ -514,6 +516,7 @@ impl<'cfg> RegistrySource<'cfg> {
         yanked_whitelist: &HashSet<PackageId>,
     ) -> RegistrySource<'cfg> {
         RegistrySource {
+            name: name.into(),
             src_path: config.registry_source_path().join(name),
             config,
             source_id,
@@ -589,6 +592,13 @@ impl<'cfg> RegistrySource<'cfg> {
         match fs::read_to_string(path) {
             Ok(ok) => match serde_json::from_str::<LockMetadata>(&ok) {
                 Ok(lock_meta) if lock_meta.v == 1 => {
+                    self.config
+                        .deferred_global_last_use()?
+                        .mark_registry_src_used(global_cache_tracker::RegistrySrc {
+                            encoded_registry_name: self.name,
+                            package_dir: package_dir.into(),
+                            size: None,
+                        });
                     return Ok(unpack_dir.to_path_buf());
                 }
                 _ => {
@@ -613,6 +623,7 @@ impl<'cfg> RegistrySource<'cfg> {
             set_mask(&mut tar);
             tar
         };
+        let mut bytes_written = 0;
         let prefix = unpack_dir.file_name().unwrap();
         let parent = unpack_dir.parent().unwrap();
         for entry in tar.entries()? {
@@ -644,6 +655,7 @@ impl<'cfg> RegistrySource<'cfg> {
                 continue;
             }
             // Unpacking failed
+            bytes_written += entry.size();
             let mut result = entry.unpack_in(parent).map_err(anyhow::Error::from);
             if cfg!(windows) && restricted_names::is_windows_reserved_path(&entry_path) {
                 result = result.with_context(|| {
@@ -670,6 +682,14 @@ impl<'cfg> RegistrySource<'cfg> {
         let lock_meta = LockMetadata { v: 1 };
         write!(ok, "{}", serde_json::to_string(&lock_meta).unwrap())?;
 
+        self.config
+            .deferred_global_last_use()?
+            .mark_registry_src_used(global_cache_tracker::RegistrySrc {
+                encoded_registry_name: self.name,
+                package_dir: package_dir.into(),
+                size: Some(bytes_written),
+            });
+
         Ok(unpack_dir.to_path_buf())
     }
 
diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs
index ba171eac3..4e7dd5f6c 100644
--- a/src/cargo/sources/registry/remote.rs
+++ b/src/cargo/sources/registry/remote.rs
@@ -1,5 +1,6 @@
 //! Access to a Git index based registry. See [`RemoteRegistry`] for details.
 
+use crate::core::global_cache_tracker;
 use crate::core::{GitReference, PackageId, SourceId};
 use crate::sources::git;
 use crate::sources::git::fetch::RemoteKind;
@@ -47,6 +48,7 @@ use tracing::{debug, trace};
 ///
 /// [`HttpRegistry`]: super::http_remote::HttpRegistry
 pub struct RemoteRegistry<'cfg> {
+    name: InternedString,
     /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`).
     index_path: Filesystem,
     /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/cache/$REG-HASH`).
@@ -87,6 +89,7 @@ impl<'cfg> RemoteRegistry<'cfg> {
     ///   registry index are stored. Expect to be unique.
     pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> RemoteRegistry<'cfg> {
         RemoteRegistry {
+            name: name.into(),
             index_path: config.registry_index_path().join(name),
             cache_path: config.registry_cache_path().join(name),
             source_id,
@@ -211,6 +214,11 @@ impl<'cfg> RemoteRegistry<'cfg> {
 impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
     fn prepare(&self) -> CargoResult<()> {
         self.repo()?;
+        self.config
+            .deferred_global_last_use()?
+            .mark_registry_index_used(global_cache_tracker::RegistryIndex {
+                encoded_registry_name: self.name,
+            });
         Ok(())
     }
 
@@ -403,6 +411,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
         download::download(
             &self.cache_path,
             &self.config,
+            self.name,
             pkg,
             checksum,
             registry_config,
@@ -415,7 +424,14 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
         checksum: &str,
         data: &[u8],
     ) -> CargoResult<File> {
-        download::finish_download(&self.cache_path, &self.config, pkg, checksum, data)
+        download::finish_download(
+            &self.cache_path,
+            &self.config,
+            self.name.clone(),
+            pkg,
+            checksum,
+            data,
+        )
     }
 
     fn is_crate_downloaded(&self, pkg: PackageId) -> bool {
diff --git a/src/cargo/util/config/mod.rs b/src/cargo/util/config/mod.rs
index 50153466b..b054541d0 100644
--- a/src/cargo/util/config/mod.rs
+++ b/src/cargo/util/config/mod.rs
@@ -68,6 +68,7 @@ use std::time::Instant;
 
 use self::ConfigValue as CV;
 use crate::core::compiler::rustdoc::RustdocExternMap;
+use crate::core::global_cache_tracker::{DeferredGlobalLastUse, GlobalCacheTracker};
 use crate::core::shell::Verbosity;
 use crate::core::{features, CliUnstable, Shell, SourceId, Workspace, WorkspaceRootConfig};
 use crate::ops::RegistryCredentialConfig;
@@ -244,6 +245,8 @@ pub struct Config {
     pub nightly_features_allowed: bool,
     /// WorkspaceRootConfigs that have been found
     pub ws_roots: RefCell<HashMap<PathBuf, WorkspaceRootConfig>>,
+    global_cache_tracker: LazyCell<RefCell<GlobalCacheTracker>>,
+    deferred_global_last_use: LazyCell<RefCell<DeferredGlobalLastUse>>,
 }
 
 impl Config {
@@ -317,6 +320,8 @@ impl Config {
             env_config: LazyCell::new(),
             nightly_features_allowed: matches!(&*features::channel(), "nightly" | "dev"),
             ws_roots: RefCell::new(HashMap::new()),
+            global_cache_tracker: LazyCell::new(),
+            deferred_global_last_use: LazyCell::new(),
         }
     }
 
@@ -1919,6 +1924,25 @@ impl Config {
     ) -> CargoResult<Option<CacheLock<'_>>> {
         self.package_cache_lock.try_lock(self, mode)
     }
+
+    /// Returns a reference to the shared [`GlobalCacheTracker`].
+    ///
+    /// The package cache lock must be held to call this function (and to use
+    /// it in general).
+    pub fn global_cache_tracker(&self) -> CargoResult<RefMut<'_, GlobalCacheTracker>> {
+        let tracker = self.global_cache_tracker.try_borrow_with(|| {
+            Ok::<_, anyhow::Error>(RefCell::new(GlobalCacheTracker::new(self)?))
+        })?;
+        Ok(tracker.borrow_mut())
+    }
+
+    /// Returns a reference to the shared [`DeferredGlobalLastUse`].
+    pub fn deferred_global_last_use(&self) -> CargoResult<RefMut<'_, DeferredGlobalLastUse>> {
+        let deferred = self.deferred_global_last_use.try_borrow_with(|| {
+            Ok::<_, anyhow::Error>(RefCell::new(DeferredGlobalLastUse::new()))
+        })?;
+        Ok(deferred.borrow_mut())
+    }
 }
 
 /// Internal error for serde errors.
diff --git a/tests/testsuite/clean.rs b/tests/testsuite/clean.rs
index fbb4d3e5b..fef351e9d 100644
--- a/tests/testsuite/clean.rs
+++ b/tests/testsuite/clean.rs
@@ -1,5 +1,6 @@
 //! Tests for the `cargo clean` command.
 
+use cargo_test_support::paths::CargoPathExt;
 use cargo_test_support::registry::Package;
 use cargo_test_support::{
     basic_bin_manifest, basic_manifest, git, main_file, project, project_in, rustc_host,
@@ -805,15 +806,6 @@ fn clean_dry_run() {
         .file("src/lib.rs", "")
         .build();
 
-    let ls_r = || -> Vec<_> {
-        let mut file_list: Vec<_> = walkdir::WalkDir::new(p.build_dir())
-            .into_iter()
-            .filter_map(|e| e.map(|e| e.path().to_owned()).ok())
-            .collect();
-        file_list.sort();
-        file_list
-    };
-
     // Start with no files.
     p.cargo("clean --dry-run")
         .with_stdout("")
@@ -823,7 +815,7 @@ fn clean_dry_run() {
         )
         .run();
     p.cargo("check").run();
-    let before = ls_r();
+    let before = p.build_dir().ls_r();
     p.cargo("clean --dry-run")
         .with_stderr(
             "[SUMMARY] [..] files, [..] total\n\
@@ -831,7 +823,7 @@ fn clean_dry_run() {
         )
         .run();
     // Verify it didn't delete anything.
-    let after = ls_r();
+    let after = p.build_dir().ls_r();
     assert_eq!(before, after);
     let expected = cargo::util::iter_join(before.iter().map(|p| p.to_str().unwrap()), "\n");
     eprintln!("{expected}");
diff --git a/tests/testsuite/global_cache_tracker.rs b/tests/testsuite/global_cache_tracker.rs
new file mode 100644
index 000000000..7c658f7c0
--- /dev/null
+++ b/tests/testsuite/global_cache_tracker.rs
@@ -0,0 +1,1890 @@
+//! Tests for last-use tracking and auto-gc.
+//!
+//! Cargo supports an environment variable called `__CARGO_TEST_LAST_USE_NOW`
+//! to have cargo pretend that the current time is the given time (in seconds
+//! since the unix epoch). This is used throughout these tests to simulate
+//! what happens when time passes. The [`days_ago_unix`] and
+//! [`months_ago_unix`] functions help with setting this value.
+
+use super::config::ConfigBuilder;
+use cargo::core::global_cache_tracker::{self, DeferredGlobalLastUse, GlobalCacheTracker};
+use cargo::util::cache_lock::CacheLockMode;
+use cargo::util::interning::InternedString;
+use cargo::Config;
+use cargo_test_support::paths::{self, CargoPathExt};
+use cargo_test_support::registry::{Package, RegistryBuilder};
+use cargo_test_support::{
+    basic_manifest, cargo_process, execs, git, project, retry, sleep_ms, thread_wait_timeout,
+    Project,
+};
+use itertools::Itertools;
+use std::fmt::Write;
+use std::path::PathBuf;
+use std::process::Stdio;
+use std::time::{Duration, SystemTime};
+
+/// Helper to create a simple `foo` project which depends on a registry
+/// dependency called `bar`.
+fn basic_foo_bar_project() -> Project {
+    Package::new("bar", "1.0.0").publish();
+    project()
+        .file(
+            "Cargo.toml",
+            r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                bar = "1.0"
+            "#,
+        )
+        .file("src/lib.rs", "")
+        .build()
+}
+
+/// Helper to get the names of files in a directory as strings.
+fn get_names(glob: &str) -> Vec<String> {
+    let mut names: Vec<_> = glob::glob(paths::home().join(glob).to_str().unwrap())
+        .unwrap()
+        .map(|p| p.unwrap().file_name().unwrap().to_str().unwrap().to_owned())
+        .collect();
+    names.sort();
+    names
+}
+
+fn get_registry_names(which: &str) -> Vec<String> {
+    get_names(&format!(".cargo/registry/{which}/*/*"))
+}
+
+fn get_index_names() -> Vec<String> {
+    get_names(&format!(".cargo/registry/index/*"))
+}
+
+fn get_git_db_names() -> Vec<String> {
+    get_names(&format!(".cargo/git/db/*"))
+}
+
+fn get_git_checkout_names(db_name: &str) -> Vec<String> {
+    get_names(&format!(".cargo/git/checkouts/{db_name}/*"))
+}
+
+fn days_ago(n: u64) -> SystemTime {
+    SystemTime::now() - Duration::from_secs(60 * 60 * 24 * n)
+}
+
+/// Helper for simulating running cargo in the past. Use with the
+/// __CARGO_TEST_LAST_USE_NOW environment variable.
+fn days_ago_unix(n: u64) -> String {
+    days_ago(n)
+        .duration_since(SystemTime::UNIX_EPOCH)
+        .unwrap()
+        .as_secs()
+        .to_string()
+}
+
+/// Helper for simulating running cargo in the past. Use with the
+/// __CARGO_TEST_LAST_USE_NOW environment variable.
+fn months_ago_unix(n: u64) -> String {
+    days_ago_unix(n * 30)
+}
+
+/// Populates last-use database and the cache files.
+///
+/// This makes it easier to more accurately specify exact sizes. Creating
+/// specific sizes with `Package` is too difficult.
+fn populate_cache(config: &Config, test_crates: &[(&str, u64, u64, u64)]) -> (PathBuf, PathBuf) {
+    let cache_dir = paths::home().join(".cargo/registry/cache/example.com-a6c4a5adcb232b9a");
+    let src_dir = paths::home().join(".cargo/registry/src/example.com-a6c4a5adcb232b9a");
+
+    GlobalCacheTracker::db_path(&config)
+        .into_path_unlocked()
+        .rm_rf();
+
+    let _lock = config
+        .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+        .unwrap();
+    let mut tracker = GlobalCacheTracker::new(&config).unwrap();
+    let mut deferred = DeferredGlobalLastUse::new();
+
+    cache_dir.rm_rf();
+    cache_dir.mkdir_p();
+    src_dir.rm_rf();
+    src_dir.mkdir_p();
+    paths::home()
+        .join(".cargo/registry/index/example.com-a6c4a5adcb232b9a")
+        .mkdir_p();
+    let mut create = |name: &str, age, crate_size: u64, src_size: u64| {
+        let crate_filename = InternedString::new(&format!("{name}.crate"));
+        deferred.mark_registry_crate_used_stamp(
+            global_cache_tracker::RegistryCrate {
+                encoded_registry_name: "example.com-a6c4a5adcb232b9a".into(),
+                crate_filename,
+                size: crate_size,
+            },
+            Some(&days_ago(age)),
+        );
+        deferred.mark_registry_src_used_stamp(
+            global_cache_tracker::RegistrySrc {
+                encoded_registry_name: "example.com-a6c4a5adcb232b9a".into(),
+                package_dir: name.into(),
+                size: Some(src_size),
+            },
+            Some(&days_ago(age)),
+        );
+        std::fs::write(
+            cache_dir.join(crate_filename),
+            "x".repeat(crate_size as usize),
+        )
+        .unwrap();
+        let path = src_dir.join(name);
+        path.mkdir_p();
+        std::fs::write(path.join("data"), "x".repeat(src_size as usize)).unwrap()
+    };
+
+    for (name, age, crate_size, src_size) in test_crates {
+        create(name, *age, *crate_size, *src_size);
+    }
+    deferred.save(&mut tracker).unwrap();
+
+    (cache_dir, src_dir)
+}
+
+#[cargo_test]
+fn auto_gc_gated() {
+    // Requires -Zgc to both track last-use data and to run auto-gc.
+    let p = basic_foo_bar_project();
+    p.cargo("check")
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    // Check that it did not create a database or delete anything.
+    let config = ConfigBuilder::new().build();
+    assert!(!GlobalCacheTracker::db_path(&config)
+        .into_path_unlocked()
+        .exists());
+    assert_eq!(get_index_names().len(), 1);
+
+    // Again in the future, shouldn't auto-gc.
+    p.cargo("check").run();
+    assert!(!GlobalCacheTracker::db_path(&config)
+        .into_path_unlocked()
+        .exists());
+    assert_eq!(get_index_names().len(), 1);
+}
+
+#[cargo_test]
+fn cache_clean_options_gated() {
+    // Checks that all cache clean options require -Zgc.
+    let p = project().build();
+    for opt in [
+        "--gc",
+        "--max-src-age=0 day",
+        "--max-index-age=0 day",
+        "--max-git-co-age=0 day",
+        "--max-git-db-age=0 day",
+        "--max-download-age=0 day",
+        "--max-src-size=0",
+        "--max-crate-size=0",
+        "--max-download-size=0",
+    ] {
+        let trimmed_opt = opt.trim_start_matches('-').split('=').next().unwrap();
+        p.cargo("clean")
+            .arg(opt)
+            .with_status(101)
+            .with_stderr(&format!(
+                "\
+error: the `{trimmed_opt}` flag is unstable, [..]
+See [..]
+See [..] for more information about the `{trimmed_opt}` flag.
+"
+            ))
+            .run();
+    }
+
+    for opt in [
+        "--max-target-age=0 day",
+        "--max-shared-target-age=0 day",
+        "--max-target-size=0",
+        "--max-shared-target-size=0",
+    ] {
+        let trimmed_opt = opt.split('=').next().unwrap();
+        p.cargo("clean")
+            .arg(opt)
+            .with_status(101)
+            .with_stderr(&format!(
+                "error: option {trimmed_opt} is not yet implemented"
+            ))
+            .run();
+    }
+}
+
+#[cargo_test]
+fn implies_source() {
+    // Checks that when a src, crate, or checkout is marked as used, the
+    // corresponding index or git db also gets marked as used.
+    let config = ConfigBuilder::new().unstable_flag("gc").build();
+    let _lock = config
+        .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+        .unwrap();
+    let mut deferred = DeferredGlobalLastUse::new();
+    let mut tracker = GlobalCacheTracker::new(&config).unwrap();
+
+    deferred.mark_registry_crate_used(global_cache_tracker::RegistryCrate {
+        encoded_registry_name: "example.com-a6c4a5adcb232b9a".into(),
+        crate_filename: "regex-1.8.4.crate".into(),
+        size: 123,
+    });
+    deferred.mark_registry_src_used(global_cache_tracker::RegistrySrc {
+        encoded_registry_name: "index.crates.io-6f17d22bba15001f".into(),
+        package_dir: "rand-0.8.5".into(),
+        size: None,
+    });
+    deferred.mark_git_checkout_used(global_cache_tracker::GitCheckout {
+        encoded_git_name: "cargo-e7ff1db891893a9e".into(),
+        short_name: "f0a4ee0".into(),
+        size: None,
+    });
+    deferred.save(&mut tracker).unwrap();
+
+    let mut indexes = tracker.registry_index_all().unwrap();
+    assert_eq!(indexes.len(), 2);
+    indexes.sort_by(|a, b| a.0.encoded_registry_name.cmp(&b.0.encoded_registry_name));
+    assert_eq!(
+        indexes[0].0.encoded_registry_name,
+        "example.com-a6c4a5adcb232b9a"
+    );
+    assert_eq!(
+        indexes[1].0.encoded_registry_name,
+        "index.crates.io-6f17d22bba15001f"
+    );
+
+    let dbs = tracker.git_db_all().unwrap();
+    assert_eq!(dbs.len(), 1);
+    assert_eq!(dbs[0].0.encoded_git_name, "cargo-e7ff1db891893a9e");
+}
+
+#[cargo_test]
+fn auto_gc_defaults() {
+    // Checks that the auto-gc deletes old entries, and leaves new ones intact.
+    Package::new("old", "1.0.0").publish();
+    Package::new("new", "1.0.0").publish();
+    let p = project()
+        .file(
+            "Cargo.toml",
+            r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                old = "1.0"
+                new = "1.0"
+            "#,
+        )
+        .file("src/lib.rs", "")
+        .build();
+    // Populate the last-use data.
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    assert_eq!(get_registry_names("src"), ["new-1.0.0", "old-1.0.0"]);
+    assert_eq!(
+        get_registry_names("cache"),
+        ["new-1.0.0.crate", "old-1.0.0.crate"]
+    );
+
+    // Run again with just one package. Make sure the old src gets deleted,
+    // but .crate does not.
+    p.change_file(
+        "Cargo.toml",
+        r#"
+            [package]
+            name = "foo"
+            version = "0.1.0"
+
+            [dependencies]
+            new = "1.0"
+        "#,
+    );
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(2))
+        .run();
+    assert_eq!(get_registry_names("src"), ["new-1.0.0"]);
+    assert_eq!(
+        get_registry_names("cache"),
+        ["new-1.0.0.crate", "old-1.0.0.crate"]
+    );
+
+    // Run again after the .crate should have aged out.
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    assert_eq!(get_registry_names("src"), ["new-1.0.0"]);
+    assert_eq!(get_registry_names("cache"), ["new-1.0.0.crate"]);
+}
+
+#[cargo_test]
+fn auto_gc_config() {
+    // Can configure auto gc settings.
+    Package::new("old", "1.0.0").publish();
+    Package::new("new", "1.0.0").publish();
+    let p = project()
+        .file(
+            ".cargo/config.toml",
+            r#"
+                [gc.auto]
+                frequency = "always"
+                max-src-age = "1 day"
+                max-crate-age = "3 days"
+                max-index-age = "3 days"
+                max-git-co-age = "1 day"
+                max-git-db-age = "3 days"
+            "#,
+        )
+        .file(
+            "Cargo.toml",
+            r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                old = "1.0"
+                new = "1.0"
+            "#,
+        )
+        .file("src/lib.rs", "")
+        .build();
+    // Populate the last-use data.
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(4))
+        .run();
+    assert_eq!(get_registry_names("src"), ["new-1.0.0", "old-1.0.0"]);
+    assert_eq!(
+        get_registry_names("cache"),
+        ["new-1.0.0.crate", "old-1.0.0.crate"]
+    );
+
+    // Run again with just one package. Make sure the old src gets deleted,
+    // but .crate does not.
+    p.change_file(
+        "Cargo.toml",
+        r#"
+            [package]
+            name = "foo"
+            version = "0.1.0"
+
+            [dependencies]
+            new = "1.0"
+        "#,
+    );
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2))
+        .run();
+    assert_eq!(get_registry_names("src"), ["new-1.0.0"]);
+    assert_eq!(
+        get_registry_names("cache"),
+        ["new-1.0.0.crate", "old-1.0.0.crate"]
+    );
+
+    // Run again after the .crate should have aged out.
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    assert_eq!(get_registry_names("src"), ["new-1.0.0"]);
+    assert_eq!(get_registry_names("cache"), ["new-1.0.0.crate"]);
+}
+
+#[cargo_test]
+fn frequency() {
+    // gc.auto.frequency settings
+    let p = basic_foo_bar_project();
+    p.change_file(
+        ".cargo/config.toml",
+        r#"
+            [gc.auto]
+            frequency = "never"
+        "#,
+    );
+    // Populate data in the past.
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    assert_eq!(get_index_names().len(), 1);
+    assert_eq!(get_registry_names("src"), ["bar-1.0.0"]);
+    assert_eq!(get_registry_names("cache"), ["bar-1.0.0.crate"]);
+
+    p.change_file("Cargo.toml", &basic_manifest("foo", "0.2.0"));
+
+    // Try after the default expiration time, with "never" it shouldn't gc.
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    assert_eq!(get_index_names().len(), 1);
+    assert_eq!(get_registry_names("src"), ["bar-1.0.0"]);
+    assert_eq!(get_registry_names("cache"), ["bar-1.0.0.crate"]);
+
+    // Try again with a setting that allows it to run.
+    p.cargo("check -Zgc")
+        .env("CARGO_GC_AUTO_FREQUENCY", "1 day")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    assert_eq!(get_index_names().len(), 0);
+    assert_eq!(get_registry_names("src").len(), 0);
+    assert_eq!(get_registry_names("cache").len(), 0);
+}
+
+#[cargo_test]
+fn auto_gc_index() {
+    // Deletes the index if it hasn't been used in a while.
+    let p = basic_foo_bar_project();
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    assert_eq!(get_index_names().len(), 1);
+
+    // Make sure it stays within the time frame.
+    p.change_file(
+        "Cargo.toml",
+        r#"
+            [package]
+            name = "foo"
+            version = "0.1.0"
+        "#,
+    );
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(2))
+        .run();
+    assert_eq!(get_index_names().len(), 1);
+
+    // After it expires, it should be deleted.
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    assert_eq!(get_index_names().len(), 0);
+}
+
+#[cargo_test]
+fn auto_gc_git() {
+    // auto-gc should delete git checkouts and dbs.
+
+    // Returns the short git name of a a checkout.
+    let short_id = |repo: &git2::Repository| -> String {
+        let head = repo.revparse_single("HEAD").unwrap();
+        let short_id = head.short_id().unwrap();
+        short_id.as_str().unwrap().to_owned()
+    };
+
+    // Set up a git dependency and fetch it and populate the database,
+    // 6 months in the past.
+    let (git_project, git_repo) = git::new_repo("bar", |p| {
+        p.file("Cargo.toml", &basic_manifest("bar", "1.0.0"))
+            .file("src/lib.rs", "")
+    });
+    let p = project()
+        .file(
+            "Cargo.toml",
+            &format!(
+                r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                bar = {{ git = '{}' }}
+            "#,
+                git_project.url()
+            ),
+        )
+        .file("src/lib.rs", "")
+        .build();
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(6))
+        .run();
+    let db_names = get_git_db_names();
+    assert_eq!(db_names.len(), 1);
+    let first_short_oid = short_id(&git_repo);
+    assert_eq!(
+        get_git_checkout_names(&db_names[0]),
+        [first_short_oid.clone()]
+    );
+
+    // Use a new git checkout, should keep both.
+    git_project.change_file("src/lib.rs", "// modified");
+    git::add(&git_repo);
+    git::commit(&git_repo);
+    p.cargo("update -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(6))
+        .run();
+    assert_eq!(get_git_db_names().len(), 1);
+    let second_short_oid = short_id(&git_repo);
+    let mut both = vec![first_short_oid, second_short_oid.clone()];
+    both.sort();
+    assert_eq!(get_git_checkout_names(&db_names[0]), both);
+
+    // In the future, using the second checkout should delete the first.
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    assert_eq!(get_git_db_names().len(), 1);
+    assert_eq!(
+        get_git_checkout_names(&db_names[0]),
+        [second_short_oid.clone()]
+    );
+
+    // After three months, the db should get deleted.
+    p.change_file("Cargo.toml", &basic_manifest("foo", "0.2.0"));
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    assert_eq!(get_git_db_names().len(), 0);
+    assert_eq!(get_git_checkout_names(&db_names[0]).len(), 0);
+}
+
+#[cargo_test]
+fn auto_gc_various_commands() {
+    // Checks that auto gc works with a variety of commands.
+    //
+    // Auto-gc is only run on a subset of commands. Generally it is run on
+    // commands that are already doing a lot of work, or heavily involve the
+    // use of the registry.
+    Package::new("bar", "1.0.0").publish();
+    let cmds = ["check", "fetch"];
+    for cmd in cmds {
+        eprintln!("checking command {cmd}");
+        let p = project()
+            .file(
+                "Cargo.toml",
+                r#"
+                    [package]
+                    name = "foo"
+                    version = "0.1.0"
+
+                    [dependencies]
+                    bar = "1.0"
+                "#,
+            )
+            .file("src/lib.rs", "")
+            .build();
+        // Populate the last-use data.
+        p.cargo(cmd)
+            .arg("-Zgc")
+            .masquerade_as_nightly_cargo(&["gc"])
+            .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+            .run();
+        let config = ConfigBuilder::new().unstable_flag("gc").build();
+        let lock = config
+            .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+            .unwrap();
+        let tracker = GlobalCacheTracker::new(&config).unwrap();
+        let indexes = tracker.registry_index_all().unwrap();
+        assert_eq!(indexes.len(), 1);
+        let crates = tracker.registry_crate_all().unwrap();
+        assert_eq!(crates.len(), 1);
+        let srcs = tracker.registry_src_all().unwrap();
+        assert_eq!(srcs.len(), 1);
+        drop(lock);
+
+        // After everything is aged out, it should all be deleted.
+        p.change_file("Cargo.toml", &basic_manifest("foo", "0.2.0"));
+        p.cargo(cmd)
+            .arg("-Zgc")
+            .masquerade_as_nightly_cargo(&["gc"])
+            .run();
+        let lock = config
+            .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+            .unwrap();
+        let indexes = tracker.registry_index_all().unwrap();
+        assert_eq!(indexes.len(), 0);
+        let crates = tracker.registry_crate_all().unwrap();
+        assert_eq!(crates.len(), 0);
+        let srcs = tracker.registry_src_all().unwrap();
+        assert_eq!(srcs.len(), 0);
+        drop(tracker);
+        drop(lock);
+        paths::home().join(".cargo/registry").rm_rf();
+        GlobalCacheTracker::db_path(&config)
+            .into_path_unlocked()
+            .rm_rf();
+    }
+}
+
+#[cargo_test]
+fn updates_last_use_various_commands() {
+    // Checks that last-use tracking is updated by various commands.
+    //
+    // Not *all* commands update the index tracking, even though they
+    // technically involve reading the index. There isn't a convenient place
+    // to ensure it gets saved while avoiding saving too often in other
+    // commands. For the most part, this should be fine, since these commands
+    // usually aren't run without running one of the commands that does save
+    // the tracking. Some of the commands are:
+    //
+    // - login, owner, yank, search
+    // - report future-incompatibilities
+    // - package --no-verify
+    // - fetch --locked
+    Package::new("bar", "1.0.0").publish();
+    let cmds = [
+        // name, expected_crates (0=doesn't download)
+        ("check", 1),
+        ("fetch", 1),
+        ("tree", 1),
+        ("generate-lockfile", 0),
+        ("update", 0),
+        ("metadata", 1),
+        ("vendor --respect-source-config", 1),
+    ];
+    for (cmd, expected_crates) in cmds {
+        eprintln!("checking command {cmd}");
+        let p = project()
+            .file(
+                "Cargo.toml",
+                r#"
+                    [package]
+                    name = "foo"
+                    version = "0.1.0"
+
+                    [dependencies]
+                    bar = "1.0"
+                "#,
+            )
+            .file("src/lib.rs", "")
+            .build();
+        // Populate the last-use data.
+        p.cargo(cmd)
+            .arg("-Zgc")
+            .masquerade_as_nightly_cargo(&["gc"])
+            .run();
+        let config = ConfigBuilder::new().unstable_flag("gc").build();
+        let lock = config
+            .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+            .unwrap();
+        let tracker = GlobalCacheTracker::new(&config).unwrap();
+        let indexes = tracker.registry_index_all().unwrap();
+        assert_eq!(indexes.len(), 1);
+        let crates = tracker.registry_crate_all().unwrap();
+        assert_eq!(crates.len(), expected_crates);
+        let srcs = tracker.registry_src_all().unwrap();
+        assert_eq!(srcs.len(), expected_crates);
+        drop(tracker);
+        drop(lock);
+        paths::home().join(".cargo/registry").rm_rf();
+        GlobalCacheTracker::db_path(&config)
+            .into_path_unlocked()
+            .rm_rf();
+    }
+}
+
+#[cargo_test]
+fn both_git_and_http_index_cleans() {
+    // Checks that either the git or http index cache gets cleaned.
+    let _crates_io = RegistryBuilder::new().build();
+    let _alternative = RegistryBuilder::new().alternative().http_index().build();
+    Package::new("from_git", "1.0.0").publish();
+    Package::new("from_http", "1.0.0")
+        .alternative(true)
+        .publish();
+    let p = project()
+        .file(
+            "Cargo.toml",
+            r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                from_git = "1.0"
+                from_http = { version = "1.0", registry = "alternative" }
+            "#,
+        )
+        .file("src/lib.rs", "")
+        .build();
+
+    p.cargo("update -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    let config = ConfigBuilder::new().unstable_flag("gc").build();
+    let lock = config
+        .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+        .unwrap();
+    let tracker = GlobalCacheTracker::new(&config).unwrap();
+    let indexes = tracker.registry_index_all().unwrap();
+    assert_eq!(indexes.len(), 2);
+    assert_eq!(get_index_names().len(), 2);
+    drop(lock);
+
+    // Running in the future without these indexes should delete them.
+    p.change_file("Cargo.toml", &basic_manifest("foo", "0.2.0"));
+    p.cargo("clean --gc -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    let lock = config
+        .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+        .unwrap();
+    let indexes = tracker.registry_index_all().unwrap();
+    assert_eq!(indexes.len(), 0);
+    assert_eq!(get_index_names().len(), 0);
+    drop(lock);
+}
+
+#[cargo_test]
+fn clean_gc_dry_run() {
+    // Basic `clean --gc --dry-run` test.
+    let p = basic_foo_bar_project();
+    // Populate the last-use data.
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+
+    let registry_root = paths::home().join(".cargo/registry");
+    let glob_registry = |name| -> PathBuf {
+        let mut paths: Vec<_> = glob::glob(registry_root.join(name).join("*").to_str().unwrap())
+            .unwrap()
+            .map(|p| p.unwrap())
+            .collect();
+        assert_eq!(paths.len(), 1);
+        paths.pop().unwrap()
+    };
+    let index = glob_registry("index").ls_r();
+    let src = glob_registry("src").ls_r();
+    let cache = glob_registry("cache").ls_r();
+    let expected_files = index
+        .iter()
+        .chain(src.iter())
+        .chain(cache.iter())
+        .map(|p| p.to_str().unwrap())
+        .join("\n");
+
+    p.cargo("clean --gc --dry-run -v -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stdout_unordered(&expected_files)
+        .with_stderr(
+            "[SUMMARY] [..] files, [..] total\n\
+            [WARNING] no files deleted due to --dry-run",
+        )
+        .run();
+
+    // Again, make sure the information is still tracked.
+    p.cargo("clean --gc --dry-run -v -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stdout_unordered(&expected_files)
+        .with_stderr(
+            "[SUMMARY] [..] files, [..] total\n\
+            [WARNING] no files deleted due to --dry-run",
+        )
+        .run();
+}
+
+#[cargo_test]
+fn clean_default_gc() {
+    // `clean` without options should also gc
+    let p = basic_foo_bar_project();
+    // Populate the last-use data.
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    p.cargo("clean -v -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr_unordered(
+            "\
+[REMOVING] [ROOT]/home/.cargo/registry/index/[..]
+[REMOVING] [ROOT]/home/.cargo/registry/src/[..]
+[REMOVING] [ROOT]/home/.cargo/registry/cache/[..]
+[REMOVED] [..] files, [..] total
+",
+        )
+        .run();
+}
+
+#[cargo_test]
+fn tracks_sizes() {
+    // Checks that sizes are properly tracked in the db.
+    Package::new("dep1", "1.0.0")
+        .file("src/lib.rs", "")
+        .publish();
+    Package::new("dep2", "1.0.0")
+        .file("src/lib.rs", "")
+        .file("data", &"abcdefghijklmnopqrstuvwxyz".repeat(1000))
+        .publish();
+    let p = project()
+        .file(
+            "Cargo.toml",
+            r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                dep1 = "1.0"
+                dep2 = "1.0"
+            "#,
+        )
+        .file("src/lib.rs", "")
+        .build();
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+
+    // Check that the crate sizes are the same as on disk.
+    let config = ConfigBuilder::new().unstable_flag("gc").build();
+    let _lock = config
+        .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+        .unwrap();
+    let tracker = GlobalCacheTracker::new(&config).unwrap();
+    let mut crates = tracker.registry_crate_all().unwrap();
+    crates.sort_by(|a, b| a.0.crate_filename.cmp(&b.0.crate_filename));
+    let db_sizes: Vec<_> = crates.iter().map(|c| c.0.size).collect();
+
+    let mut actual: Vec<_> = p
+        .glob(paths::home().join(".cargo/registry/cache/*/*"))
+        .map(|p| p.unwrap())
+        .collect();
+    actual.sort();
+    let actual_sizes: Vec<_> = actual
+        .iter()
+        .map(|path| std::fs::metadata(path).unwrap().len())
+        .collect();
+    assert_eq!(db_sizes, actual_sizes);
+
+    // Also check the src sizes are computed.
+    let mut srcs = tracker.registry_src_all().unwrap();
+    srcs.sort_by(|a, b| a.0.package_dir.cmp(&b.0.package_dir));
+    let db_sizes: Vec<_> = srcs.iter().map(|c| c.0.size.unwrap()).collect();
+    let mut actual: Vec<_> = p
+        .glob(paths::home().join(".cargo/registry/src/*/*"))
+        .map(|p| p.unwrap())
+        .collect();
+    actual.sort();
+    // .cargo-ok is not tracked in the size.
+    actual.iter().for_each(|p| p.join(".cargo-ok").rm_rf());
+    let actual_sizes: Vec<_> = actual
+        .iter()
+        .map(|path| cargo_util::du(path, &[]).unwrap())
+        .collect();
+    assert_eq!(db_sizes, actual_sizes);
+    assert!(db_sizes[1] > 26000);
+}
+
+#[cargo_test]
+fn max_size() {
+    // Checks --max-crate-size and --max-src-size with various cleaning thresholds.
+    let config = ConfigBuilder::new().unstable_flag("gc").build();
+
+    let test_crates = [
+        // name, age, crate_size, src_size
+        ("a-1.0.0", 5, 1, 1),
+        ("b-1.0.0", 6, 2, 2),
+        ("c-1.0.0", 3, 3, 3),
+        ("d-1.0.0", 2, 4, 4),
+        ("e-1.0.0", 2, 5, 5),
+        ("f-1.0.0", 9, 6, 6),
+        ("g-1.0.0", 1, 1, 1),
+    ];
+
+    // Determine the order things get deleted so they can be verified.
+    let mut names_by_timestamp: Vec<_> = test_crates
+        .iter()
+        .map(|(name, age, _, _)| (days_ago_unix(*age), name))
+        .collect();
+    names_by_timestamp.sort();
+    let names_by_timestamp: Vec<_> = names_by_timestamp
+        .into_iter()
+        .map(|(_, name)| name)
+        .collect();
+
+    // This exercises the different boundary conditions.
+    for (clean_size, files, bytes) in [
+        (22, 0, 0),
+        (21, 1, 6),
+        (16, 1, 6),
+        (15, 2, 8),
+        (14, 2, 8),
+        (13, 3, 9),
+        (12, 4, 12),
+        (10, 4, 12),
+        (9, 5, 16),
+        (6, 5, 16),
+        (5, 6, 21),
+        (1, 6, 21),
+        (0, 7, 22),
+    ] {
+        let (removed, kept) = names_by_timestamp.split_at(files);
+        // --max-crate-size
+        let (cache_dir, src_dir) = populate_cache(&config, &test_crates);
+        let mut stderr = String::new();
+        for name in removed {
+            writeln!(stderr, "[REMOVING] [..]{name}.crate").unwrap();
+        }
+        let total_display = if removed.is_empty() {
+            String::new()
+        } else {
+            format!(", {bytes}B total")
+        };
+        let files_display = if files == 1 {
+            format!("1 file")
+        } else {
+            format!("{files} files")
+        };
+        write!(stderr, "[REMOVED] {files_display}{total_display}").unwrap();
+        cargo_process(&format!("clean -Zgc -v --max-crate-size={clean_size}"))
+            .masquerade_as_nightly_cargo(&["gc"])
+            .with_stderr_unordered(&stderr)
+            .run();
+        for name in kept {
+            assert!(cache_dir.join(format!("{name}.crate")).exists());
+        }
+        for name in removed {
+            assert!(!cache_dir.join(format!("{name}.crate")).exists());
+        }
+
+        // --max-src-size
+        populate_cache(&config, &test_crates);
+        let mut stderr = String::new();
+        for name in removed {
+            writeln!(stderr, "[REMOVING] [..]{name}").unwrap();
+        }
+        let total_display = if files == 0 {
+            String::new()
+        } else {
+            format!(", {bytes}B total")
+        };
+        write!(stderr, "[REMOVED] {files_display}{total_display}").unwrap();
+        cargo_process(&format!("clean -Zgc -v --max-src-size={clean_size}"))
+            .masquerade_as_nightly_cargo(&["gc"])
+            .with_stderr_unordered(&stderr)
+            .run();
+        for name in kept {
+            assert!(src_dir.join(name).exists());
+        }
+        for name in removed {
+            assert!(!src_dir.join(name).exists());
+        }
+    }
+}
+
+#[cargo_test]
+fn max_size_untracked_crate() {
+    // When a .crate file exists from an older version of cargo that did not
+    // track sizes, `clean --max-crate-size` should populate the db with the
+    // sizes.
+    let config = ConfigBuilder::new().unstable_flag("gc").build();
+    let cache = paths::home().join(".cargo/registry/cache/example.com-a6c4a5adcb232b9a");
+    cache.mkdir_p();
+    paths::home()
+        .join(".cargo/registry/index/example.com-a6c4a5adcb232b9a")
+        .mkdir_p();
+    // Create the `.crate files.
+    let test_crates = [
+        // name, size
+        ("a-1.0.0.crate", 1234),
+        ("b-1.0.0.crate", 42),
+        ("c-1.0.0.crate", 0),
+    ];
+    for (name, size) in test_crates {
+        std::fs::write(cache.join(name), "x".repeat(size as usize)).unwrap()
+    }
+    // This should scan the directory and populate the db with the size information.
+    cargo_process("clean -Zgc -v --max-crate-size=100000")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr("[REMOVED] 0 files")
+        .run();
+    // Check that it stored the size data.
+    let _lock = config
+        .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+        .unwrap();
+    let tracker = GlobalCacheTracker::new(&config).unwrap();
+    let crates = tracker.registry_crate_all().unwrap();
+    let mut actual: Vec<_> = crates
+        .iter()
+        .map(|(rc, _time)| (rc.crate_filename.as_str(), rc.size))
+        .collect();
+    actual.sort();
+    assert_eq!(test_crates, actual.as_slice());
+}
+
+/// Helper to prepare the max-size test.
+fn max_size_untracked_prepare() -> (Config, Project) {
+    // First, publish and download a dependency.
+    let p = basic_foo_bar_project();
+    p.cargo("fetch").run();
+    // Pretend it was an older version that did not track last-use.
+    let config = ConfigBuilder::new().unstable_flag("gc").build();
+    GlobalCacheTracker::db_path(&config)
+        .into_path_unlocked()
+        .rm_rf();
+    (config, p)
+}
+
+/// Helper to verify the max-size test.
+fn max_size_untracked_verify(config: &Config) {
+    let actual: Vec<_> = glob::glob(
+        paths::home()
+            .join(".cargo/registry/src/*/*")
+            .to_str()
+            .unwrap(),
+    )
+    .unwrap()
+    .map(|p| p.unwrap())
+    .collect();
+    assert_eq!(actual.len(), 1);
+    let actual_size = cargo_util::du(&actual[0], &[]).unwrap();
+    let lock = config
+        .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+        .unwrap();
+    let tracker = GlobalCacheTracker::new(&config).unwrap();
+    let srcs = tracker.registry_src_all().unwrap();
+    assert_eq!(srcs.len(), 1);
+    assert_eq!(srcs[0].0.size, Some(actual_size));
+    drop(lock);
+}
+
+#[cargo_test]
+fn max_size_untracked_src_from_use() {
+    // When a src directory exists from an older version of cargo that did not
+    // track sizes, doing a build should populate the db with an entry with an
+    // unknown size. `clean --max-src-size` should then fix the size.
+    let (config, p) = max_size_untracked_prepare();
+
+    // Run a command that will update the db with an unknown src size.
+    p.cargo("tree -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    // Check that it is None.
+    let lock = config
+        .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+        .unwrap();
+    let tracker = GlobalCacheTracker::new(&config).unwrap();
+    let srcs = tracker.registry_src_all().unwrap();
+    assert_eq!(srcs.len(), 1);
+    assert_eq!(srcs[0].0.size, None);
+    drop(lock);
+
+    // Fix the size.
+    p.cargo("clean -v --max-src-size=10000 -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr("[REMOVED] 0 files")
+        .run();
+    max_size_untracked_verify(&config);
+}
+
+#[cargo_test]
+fn max_size_untracked_src_from_clean() {
+    // When a src directory exists from an older version of cargo that did not
+    // track sizes, `clean --max-src-size` should populate the db with the
+    // sizes.
+    let (config, p) = max_size_untracked_prepare();
+
+    // Clean should scan the src and update the db.
+    p.cargo("clean -v --max-src-size=10000 -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr("[REMOVED] 0 files")
+        .run();
+    max_size_untracked_verify(&config);
+}
+
+#[cargo_test]
+fn max_download_size() {
+    // --max-download-size
+    //
+    // This creates some sample crates of specific sizes, and then tries
+    // deleting at various specific size thresholds that exercise different
+    // edge conditions.
+    let config = ConfigBuilder::new().unstable_flag("gc").build();
+
+    let test_crates = [
+        // name, age, crate_size, src_size
+        ("d-1.0.0", 4, 4, 5),
+        ("c-1.0.0", 3, 3, 3),
+        ("a-1.0.0", 1, 2, 5),
+        ("b-1.0.0", 1, 1, 7),
+    ];
+
+    for (max_size, num_deleted, files_deleted, bytes) in [
+        (30, 0, 0, 0),
+        (29, 1, 1, 5),
+        (24, 2, 2, 9),
+        (20, 3, 3, 12),
+        (1, 7, 7, 29),
+        (0, 8, 8, 30),
+    ] {
+        populate_cache(&config, &test_crates);
+        // Determine the order things will be deleted.
+        let delete_order: Vec<String> = test_crates
+            .iter()
+            .flat_map(|(name, _, _, _)| [name.to_string(), format!("{name}.crate")])
+            .collect();
+        let (removed, _kept) = delete_order.split_at(num_deleted);
+        let mut stderr = String::new();
+        for name in removed {
+            writeln!(stderr, "[REMOVING] [..]{name}").unwrap();
+        }
+        let files_display = if files_deleted == 1 {
+            format!("1 file")
+        } else {
+            format!("{files_deleted} files")
+        };
+        let total_display = if removed.is_empty() {
+            String::new()
+        } else {
+            format!(", {bytes}B total")
+        };
+        write!(stderr, "[REMOVED] {files_display}{total_display}",).unwrap();
+        cargo_process(&format!("clean -Zgc -v --max-download-size={max_size}"))
+            .masquerade_as_nightly_cargo(&["gc"])
+            .with_stderr_unordered(&stderr)
+            .run();
+    }
+}
+
+#[cargo_test]
+fn package_cache_lock_during_build() {
+    // Verifies that a shared lock is held during a build. Resolution and
+    // downloads should be OK while that is held, but mutation should block.
+    //
+    // This works by launching a build with a build script that will pause.
+    // Then it performs other cargo commands and verifies their behavior.
+    Package::new("bar", "1.0.0").publish();
+    let p_foo = project()
+        .file(
+            "Cargo.toml",
+            r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                bar = "1.0"
+            "#,
+        )
+        .file("src/lib.rs", "")
+        .file(
+            "build.rs",
+            r#"
+                fn main() {
+                    std::fs::write("blocking", "").unwrap();
+                    let path = std::path::Path::new("ready");
+                    loop {
+                        if path.exists() {
+                            break;
+                        } else {
+                            std::thread::sleep(std::time::Duration::from_millis(100))
+                        }
+                    }
+                }
+            "#,
+        )
+        .build();
+    let p_foo2 = project()
+        .at("foo2")
+        .file(
+            "Cargo.toml",
+            r#"
+                [package]
+                name = "foo2"
+                version = "0.1.0"
+
+                [dependencies]
+                bar = "1.0"
+            "#,
+        )
+        .file("src/lib.rs", "")
+        .build();
+
+    // Start a build that will pause once the build starts.
+    let mut foo_child = p_foo
+        .cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .build_command()
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .unwrap();
+
+    // Wait for it to enter build script.
+    retry(100, || p_foo.root().join("blocking").exists().then_some(()));
+
+    // Start a build with a different target directory. It should not block,
+    // even though it gets a download lock, and then a shared lock.
+    //
+    // Also verify that auto-gc gets disabled.
+    p_foo2
+        .cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("CARGO_GC_AUTO_FREQUENCY", "always")
+        .env("CARGO_LOG", "cargo::core::gc=debug")
+        .with_stderr_contains("[UPDATING] `dummy-registry` index")
+        .with_stderr_contains("[CHECKING] bar v1.0.0")
+        .with_stderr_contains("[CHECKING] foo2 v0.1.0 [..]")
+        .with_stderr_contains("[FINISHED] [..]")
+        .with_stderr_contains("[..]unable to acquire mutate lock, auto gc disabled")
+        .run();
+
+    // Ensure that the first build really blocked.
+    assert!(matches!(foo_child.try_wait(), Ok(None)));
+
+    // Cleaning while a command is running should block.
+    let mut clean_cmd = p_foo2
+        .cargo("clean --max-download-size=0 -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .build_command();
+    clean_cmd.stderr(Stdio::piped());
+    let mut clean_child = clean_cmd.spawn().unwrap();
+
+    // Give the clean command a chance to finish (it shouldn't).
+    sleep_ms(500);
+    // They should both still be running.
+    assert!(matches!(foo_child.try_wait(), Ok(None)));
+    assert!(matches!(clean_child.try_wait(), Ok(None)));
+
+    // Let the original build finish.
+    p_foo.change_file("ready", "");
+
+    // Wait for clean to finish.
+    let thread = std::thread::spawn(|| clean_child.wait_with_output().unwrap());
+    let output = thread_wait_timeout(100, thread);
+    assert!(output.status.success());
+    // Validate the output of the clean.
+    execs()
+        .with_stderr(
+            "\
+[BLOCKING] waiting for file lock on package cache mutation
+[REMOVED] [..]
+",
+        )
+        .run_output(&output);
+}
+
+#[cargo_test]
+fn read_only_locking_auto_gc() {
+    // Tests the behavior for auto-gc on a read-only directory.
+    let p = basic_foo_bar_project();
+    // Populate cache.
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    let cargo_home = paths::home().join(".cargo");
+    let mut perms = std::fs::metadata(&cargo_home).unwrap().permissions();
+    // Test when it can't update auto-gc db.
+    perms.set_readonly(true);
+    std::fs::set_permissions(&cargo_home, perms.clone()).unwrap();
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[CHECKING] bar v1.0.0
+[CHECKING] foo v0.1.0 [..]
+[FINISHED] [..]
+",
+        )
+        .run();
+    // Try again without the last-use existing (such as if the cache was
+    // populated by an older version of cargo).
+    perms.set_readonly(false);
+    std::fs::set_permissions(&cargo_home, perms.clone()).unwrap();
+    let config = ConfigBuilder::new().build();
+    GlobalCacheTracker::db_path(&config)
+        .into_path_unlocked()
+        .rm_rf();
+    perms.set_readonly(true);
+    std::fs::set_permissions(&cargo_home, perms.clone()).unwrap();
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr("[FINISHED] [..]")
+        .run();
+    perms.set_readonly(false);
+    std::fs::set_permissions(&cargo_home, perms).unwrap();
+}
+
+#[cargo_test]
+fn delete_index_also_deletes_crates() {
+    // Checks that when an index is delete that src and cache directories also get deleted.
+    let p = basic_foo_bar_project();
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+
+    assert_eq!(get_registry_names("src"), ["bar-1.0.0"]);
+    assert_eq!(get_registry_names("cache"), ["bar-1.0.0.crate"]);
+
+    p.cargo("clean")
+        .arg("--max-index-age=0 days")
+        .arg("-Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr("[REMOVED] [..]")
+        .run();
+
+    assert_eq!(get_registry_names("src").len(), 0);
+    assert_eq!(get_registry_names("cache").len(), 0);
+}
+
+#[cargo_test]
+fn clean_syncs_missing_files() {
+    // When files go missing in the cache, clean operations that need to track
+    // the size should also remove them from the database.
+    Package::new("bar", "1.0.0").publish();
+    Package::new("baz", "1.0.0").publish();
+    let p = project()
+        .file(
+            "Cargo.toml",
+            r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                bar = "1.0"
+                baz = "1.0"
+            "#,
+        )
+        .file("src/lib.rs", "")
+        .build();
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+
+    // Verify things are tracked.
+    let config = ConfigBuilder::new().unstable_flag("gc").build();
+    let lock = config
+        .acquire_package_cache_lock(CacheLockMode::MutateExclusive)
+        .unwrap();
+    let tracker = GlobalCacheTracker::new(&config).unwrap();
+    let crates = tracker.registry_crate_all().unwrap();
+    assert_eq!(crates.len(), 2);
+    let srcs = tracker.registry_src_all().unwrap();
+    assert_eq!(srcs.len(), 2);
+    drop(lock);
+
+    // Remove the files.
+    for pattern in [
+        ".cargo/registry/cache/*/bar-1.0.0.crate",
+        ".cargo/registry/src/*/bar-1.0.0",
+    ] {
+        p.glob(paths::home().join(pattern))
+            .map(|p| p.unwrap())
+            .next()
+            .unwrap()
+            .rm_rf();
+    }
+
+    // Clean should update the db.
+    p.cargo("clean -v --max-download-size=1GB -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr("[REMOVED] 0 files")
+        .run();
+
+    // Verify
+    let crates = tracker.registry_crate_all().unwrap();
+    assert_eq!(crates.len(), 1);
+    let srcs = tracker.registry_src_all().unwrap();
+    assert_eq!(srcs.len(), 1);
+}
+
+#[cargo_test]
+fn offline_doesnt_auto_gc() {
+    // When running offline, auto-gc shouldn't run.
+    let p = basic_foo_bar_project();
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    // Remove the dependency.
+    p.change_file("Cargo.toml", &basic_manifest("foo", "0.1.0"));
+    // Run offline, make sure it doesn't delete anything
+    p.cargo("check --offline -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr("[CHECKING] foo v0.1.0[..]\n[FINISHED][..]")
+        .run();
+    assert_eq!(get_registry_names("src"), ["bar-1.0.0"]);
+    assert_eq!(get_registry_names("cache"), ["bar-1.0.0.crate"]);
+    // Run online, make sure auto-gc runs.
+    p.cargo("check -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr("[FINISHED][..]")
+        .run();
+    assert_eq!(get_registry_names("src"), &[] as &[String]);
+    assert_eq!(get_registry_names("cache"), &[] as &[String]);
+}
+
+#[cargo_test]
+fn can_handle_future_schema() -> anyhow::Result<()> {
+    // It should work when a future version of cargo has made schema changes
+    // to the database.
+    let p = basic_foo_bar_project();
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    // Modify the schema to pretend this is done by a future version of cargo.
+    let config = ConfigBuilder::new().build();
+    let db_path = GlobalCacheTracker::db_path(&config).into_path_unlocked();
+    let conn = rusqlite::Connection::open(&db_path)?;
+    let user_version: u32 =
+        conn.query_row("SELECT user_version FROM pragma_user_version", [], |row| {
+            row.get(0)
+        })?;
+    conn.execute("ALTER TABLE global_data ADD COLUMN foo DEFAULT 123", [])?;
+    conn.pragma_update(None, "user_version", &(user_version + 1))?;
+    drop(conn);
+    // Verify it doesn't blow up.
+    p.cargo("clean --max-download-size=0 -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr("[REMOVED] 4 files, [..] total")
+        .run();
+    Ok(())
+}
+
+#[cargo_test]
+fn clean_max_git_age() {
+    // --max-git-*-age flags
+    let (git_a, git_a_repo) = git::new_repo("git_a", |p| {
+        p.file("Cargo.toml", &basic_manifest("git_a", "1.0.0"))
+            .file("src/lib.rs", "")
+    });
+    let p = project()
+        .file(
+            "Cargo.toml",
+            &format!(
+                r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                git_a = {{ git = '{}' }}
+            "#,
+                git_a.url()
+            ),
+        )
+        .file("src/lib.rs", "")
+        .build();
+    // Populate last-use tracking.
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(4))
+        .run();
+    // Update git_a to create a separate checkout.
+    git_a.change_file("src/lib.rs", "// test");
+    git::add(&git_a_repo);
+    git::commit(&git_a_repo);
+    // Update last-use tracking, where the first git checkout will stay "old".
+    p.cargo("update -p git_a -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2))
+        .with_stderr(
+            "\
+[UPDATING] git repository [..]
+[UPDATING] git_a v1.0.0 [..]
+",
+        )
+        .run();
+
+    let db_names = get_git_db_names();
+    assert_eq!(db_names.len(), 1);
+    let db_name = &db_names[0];
+    let co_names = get_git_checkout_names(&db_name);
+    assert_eq!(co_names.len(), 2);
+
+    // Delete the first checkout
+    p.cargo("clean -v -Zgc")
+        .arg("--max-git-co-age=3 days")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/git_a-[..]/[..]
+[REMOVED] [..]
+",
+        )
+        .run();
+
+    let db_names = get_git_db_names();
+    assert_eq!(db_names.len(), 1);
+    let co_names = get_git_checkout_names(&db_name);
+    assert_eq!(co_names.len(), 1);
+
+    // delete the second checkout
+    p.cargo("clean -v -Zgc")
+        .arg("--max-git-co-age=0 days")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/git_a-[..]/[..]
+[REMOVED] [..]
+",
+        )
+        .run();
+
+    let db_names = get_git_db_names();
+    assert_eq!(db_names.len(), 1);
+    let co_names = get_git_checkout_names(&db_name);
+    assert_eq!(co_names.len(), 0);
+
+    // delete the db
+    p.cargo("clean -v -Zgc")
+        .arg("--max-git-db-age=1 days")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/db/git_a-[..]
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/git_a-[..]
+[REMOVED] [..]
+",
+        )
+        .run();
+
+    let db_names = get_git_db_names();
+    assert_eq!(db_names.len(), 0);
+    let co_names = get_git_checkout_names(&db_name);
+    assert_eq!(co_names.len(), 0);
+}
+
+#[cargo_test]
+fn clean_max_src_crate_age() {
+    // --max-src-age and --max-crate-age flags
+    let p = basic_foo_bar_project();
+    // Populate last-use tracking.
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(4))
+        .run();
+    // Update bar to create a separate copy with a different timestamp.
+    Package::new("bar", "1.0.1").publish();
+    p.cargo("update -p bar -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2))
+        .with_stderr(
+            "\
+[UPDATING] `dummy-registry` index
+[UPDATING] bar v1.0.0 -> v1.0.1
+",
+        )
+        .run();
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2))
+        .with_stderr(
+            "\
+[DOWNLOADING] crates ...
+[DOWNLOADED] bar v1.0.1 [..]
+",
+        )
+        .run();
+
+    assert_eq!(get_registry_names("src"), ["bar-1.0.0", "bar-1.0.1"]);
+    assert_eq!(
+        get_registry_names("cache"),
+        ["bar-1.0.0.crate", "bar-1.0.1.crate"]
+    );
+
+    // Delete the old src.
+    p.cargo("clean -v -Zgc")
+        .arg("--max-src-age=3 days")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [..]/bar-1.0.0
+[REMOVED] [..]
+",
+        )
+        .run();
+
+    // delete the second src
+    p.cargo("clean -v -Zgc")
+        .arg("--max-src-age=0 days")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [..]/bar-1.0.1
+[REMOVED] [..]
+",
+        )
+        .run();
+
+    // delete the old crate
+    p.cargo("clean -v -Zgc")
+        .arg("--max-crate-age=3 days")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [..]/bar-1.0.0.crate
+[REMOVED] [..]
+",
+        )
+        .run();
+
+    // delete the seecond crate
+    p.cargo("clean -v -Zgc")
+        .arg("--max-crate-age=0 days")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [..]/bar-1.0.1.crate
+[REMOVED] [..]
+",
+        )
+        .run();
+}
+
+#[cargo_test]
+fn clean_doc_with_cache() {
+    // clean --doc with other cache flags should do both.
+    let p = basic_foo_bar_project();
+    p.cargo("doc -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", months_ago_unix(4))
+        .run();
+    assert_eq!(get_registry_names("src"), ["bar-1.0.0"]);
+    assert_eq!(get_registry_names("cache"), ["bar-1.0.0.crate"]);
+    assert!(p.build_dir().join("doc").exists());
+    p.cargo("clean --doc --max-download-size=0 -v -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr_unordered(
+            "\
+[REMOVING] [ROOT]/foo/target/doc
+[REMOVING] [ROOT]/home/.cargo/registry/src/[..]/bar-1.0.0
+[REMOVING] [ROOT]/home/.cargo/registry/cache/[..]/bar-1.0.0.crate
+[REMOVED] [..]
+",
+        )
+        .run();
+}
+
+#[cargo_test]
+fn clean_max_git_size() {
+    // clean --max-git-size
+    //
+    // Creates two checkouts. The sets a size threshold to delete one. And
+    // then with 0 max size to delete everything.
+    let (git_project, git_repo) = git::new_repo("bar", |p| {
+        p.file("Cargo.toml", &basic_manifest("bar", "1.0.0"))
+            .file("src/lib.rs", "")
+    });
+    let p = project()
+        .file(
+            "Cargo.toml",
+            &format!(
+                r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                bar = {{ git = '{}' }}
+            "#,
+                git_project.url()
+            ),
+        )
+        .file("src/lib.rs", "")
+        .build();
+    // Fetch and populate db.
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(3))
+        .run();
+
+    // Figure out the name of the first checkout.
+    let git_root = paths::home().join(".cargo/git");
+    let db_names = get_git_db_names();
+    assert_eq!(db_names.len(), 1);
+    let db_name = &db_names[0];
+    let co_names = get_git_checkout_names(&db_name);
+    assert_eq!(co_names.len(), 1);
+    let first_co_name = &co_names[0];
+
+    // Make an update and create a new checkout.
+    git_project.change_file("src/lib.rs", "// modified");
+    git::add(&git_repo);
+    git::commit(&git_repo);
+    p.cargo("update -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        // Use a different time so that the first checkout timestamp is less
+        // than the second.
+        .env("__CARGO_TEST_LAST_USE_NOW", days_ago_unix(2))
+        .run();
+
+    // Figure out the threshold to use.
+    let mut co_names = get_git_checkout_names(&db_name);
+    assert_eq!(co_names.len(), 2);
+    co_names.retain(|name| name != first_co_name);
+    assert_eq!(co_names.len(), 1);
+    let second_co_name = &co_names[0];
+    let second_co_path = git_root
+        .join("checkouts")
+        .join(db_name)
+        .join(second_co_name);
+    let second_co_size = cargo_util::du(&second_co_path, &["!.git"]).unwrap();
+
+    let db_size = cargo_util::du(&git_root.join("db").join(db_name), &[]).unwrap();
+
+    let threshold = db_size + second_co_size;
+
+    p.cargo(&format!("clean --max-git-size={threshold} -Zgc -v"))
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(&format!(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/{db_name}/{first_co_name}
+[REMOVED] [..]
+"
+        ))
+        .run();
+
+    // And then try cleaning everything.
+    p.cargo("clean --max-git-size=0 -Zgc -v")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr_unordered(&format!(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/{db_name}/{second_co_name}
+[REMOVING] [ROOT]/home/.cargo/git/db/{db_name}
+[REMOVED] [..]
+"
+        ))
+        .run();
+}
+
+// Helper for setting up fake git sizes for git size cleaning.
+fn setup_fake_git_sizes(db_name: &str, db_size: usize, co_sizes: &[usize]) {
+    let base_git = paths::home().join(".cargo/git");
+    let db_path = base_git.join("db").join(db_name);
+    db_path.mkdir_p();
+    std::fs::write(db_path.join("test"), "x".repeat(db_size)).unwrap();
+    let base_co = base_git.join("checkouts").join(db_name);
+    for (i, size) in co_sizes.iter().enumerate() {
+        let co_name = format!("co{i}");
+        let co_path = base_co.join(co_name);
+        co_path.mkdir_p();
+        std::fs::write(co_path.join("test"), "x".repeat(*size)).unwrap();
+    }
+}
+
+#[cargo_test]
+fn clean_max_git_size_untracked() {
+    // If there are git directories that aren't tracked in the database,
+    // `--max-git-size` should pick it up.
+    //
+    // The db_name of "example" depends on the sorting order of the names ("e"
+    // should be after "c"), so that the db comes after the checkouts.
+    setup_fake_git_sizes("example", 5000, &[1000, 2000]);
+    cargo_process(&format!("clean -Zgc -v --max-git-size=7000"))
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/example/co0
+[REMOVED] [..]
+",
+        )
+        .run();
+    cargo_process(&format!("clean -Zgc -v --max-git-size=5000"))
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/example/co1
+[REMOVED] [..]
+",
+        )
+        .run();
+    cargo_process(&format!("clean -Zgc -v --max-git-size=0"))
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/db/example
+[REMOVED] [..]
+",
+        )
+        .run();
+}
+
+#[cargo_test]
+fn clean_max_git_size_deletes_co_from_db() {
+    // In the scenario where it thinks it needs to delete the db, it should
+    // also delete all the checkouts.
+    //
+    // The db_name of "abc" depends on the sorting order of the names ("a"
+    // should be before "c"), so that the db comes before the checkouts.
+    setup_fake_git_sizes("abc", 5000, &[1000, 2000]);
+    // This deletes everything because it tries to delete the db, which then
+    // deletes all checkouts.
+    cargo_process(&format!("clean -Zgc -v --max-git-size=3000"))
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/db/abc
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/abc/co1
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/abc/co0
+[REMOVED] [..]
+",
+        )
+        .run();
+}
+
+#[cargo_test]
+fn handles_missing_index() {
+    // Checks behavior when index is missing.
+    let p = basic_foo_bar_project();
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    paths::home().join(".cargo/registry/index").rm_rf();
+    cargo_process("clean -v --max-download-size=0 -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr_unordered(
+            "\
+[REMOVING] [ROOT]/home/.cargo/registry/cache/[..]
+[REMOVING] [ROOT]/home/.cargo/registry/src/[..]
+[REMOVED] [..]
+",
+        )
+        .run();
+}
+
+#[cargo_test]
+fn handles_missing_git_db() {
+    // Checks behavior when git db is missing.
+    let git_project = git::new("bar", |p| {
+        p.file("Cargo.toml", &basic_manifest("bar", "1.0.0"))
+            .file("src/lib.rs", "")
+    });
+    let p = project()
+        .file(
+            "Cargo.toml",
+            &format!(
+                r#"
+                [package]
+                name = "foo"
+                version = "0.1.0"
+
+                [dependencies]
+                bar = {{ git = '{}' }}
+            "#,
+                git_project.url()
+            ),
+        )
+        .file("src/lib.rs", "")
+        .build();
+    p.cargo("fetch -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .run();
+    paths::home().join(".cargo/git/db").rm_rf();
+    cargo_process("clean -v --max-git-size=0 -Zgc")
+        .masquerade_as_nightly_cargo(&["gc"])
+        .with_stderr(
+            "\
+[REMOVING] [ROOT]/home/.cargo/git/checkouts/[..]
+[REMOVED] [..]
+",
+        )
+        .run();
+}
diff --git a/tests/testsuite/main.rs b/tests/testsuite/main.rs
index 07f749e34..e2e46c400 100644
--- a/tests/testsuite/main.rs
+++ b/tests/testsuite/main.rs
@@ -98,6 +98,7 @@ mod git_auth;
 mod git_gc;
 mod git_shallow;
 mod glob_targets;
+mod global_cache_tracker;
 mod help;
 mod https;
 mod inheritable_workspace_fields;