fix(vendor): dont remove non-cached source (#15260)

### What does this PR try to resolve?

Fixes #15244

With this fix,
`cargo vendor` will not delete original sources,
if you want to vendor things from one directory sources to the other

#### Background

cargo-vendor has a workaround that to mitigate #5956:
it removes all cached sources in order to trigger a re-unpack.
It was meant for dealing with registry sources only,
but accidentally applied to directory source kind.

While directory source kind was invented for vendoring,
and vendoring from one vendored directory to the other seems unusual,
Cargo IMO should not delete any real sources.

It does not mean that registry sources are okay to delete,
In long term, we should explore a way that unpacks `.crate` files
directly, without any removal. See
https://github.com/rust-lang/cargo/pull/12509#issuecomment-1732415990

### How should we test and review this PR?

The added test should suffice.

Also, although this is for fixing #15244,
`cargo vendor` still doesn't support vendor from and to the same
location.
Unless we figure out an `rsync`-like solutin to update vendor sources,
it is not going to support in short term.
(And I also doubt the real world use case of it)

### Additional information
This commit is contained in:
Ed Page 2025-03-04 02:25:41 +00:00 committed by GitHub
commit 29f8d039bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 92 additions and 6 deletions

View File

@ -60,7 +60,8 @@ pub fn exec(gctx: &mut GlobalContext, args: &ArgMatches) -> CliResult {
// to respect any of the `source` configuration in Cargo itself. That's
// intended for other consumers of Cargo, but we want to go straight to the
// source, e.g. crates.io, to fetch crates.
if !args.flag("respect-source-config") {
let respect_source_config = args.flag("respect-source-config");
if !respect_source_config {
gctx.values_mut()?.remove("source");
}
@ -80,6 +81,7 @@ pub fn exec(gctx: &mut GlobalContext, args: &ArgMatches) -> CliResult {
.unwrap_or_default()
.cloned()
.collect(),
respect_source_config,
},
)?;
Ok(())

View File

@ -1,8 +1,10 @@
use crate::core::shell::Verbosity;
use crate::core::SourceId;
use crate::core::{GitReference, Package, Workspace};
use crate::ops;
use crate::sources::path::PathSource;
use crate::sources::PathEntry;
use crate::sources::SourceConfigMap;
use crate::sources::CRATES_IO_REGISTRY;
use crate::util::cache_lock::CacheLockMode;
use crate::util::{try_canonicalize, CargoResult, GlobalContext};
@ -21,6 +23,7 @@ pub struct VendorOptions<'a> {
pub versioned_dirs: bool,
pub destination: &'a Path,
pub extra: Vec<PathBuf>,
pub respect_source_config: bool,
}
pub fn vendor(ws: &Workspace<'_>, opts: &VendorOptions<'_>) -> CargoResult<()> {
@ -76,6 +79,32 @@ enum VendorSource {
},
}
/// Cache for mapping replaced sources to replacements.
struct SourceReplacementCache<'gctx> {
map: SourceConfigMap<'gctx>,
cache: HashMap<SourceId, SourceId>,
}
impl SourceReplacementCache<'_> {
fn new(gctx: &GlobalContext) -> CargoResult<SourceReplacementCache<'_>> {
Ok(SourceReplacementCache {
map: SourceConfigMap::new(gctx)?,
cache: Default::default(),
})
}
fn get(&mut self, id: SourceId) -> CargoResult<SourceId> {
use std::collections::hash_map::Entry;
match self.cache.entry(id) {
Entry::Occupied(e) => Ok(e.get().clone()),
Entry::Vacant(e) => {
let replaced = self.map.load(id, &HashSet::new())?.replaced_source_id();
Ok(e.insert(replaced).clone())
}
}
}
}
fn sync(
gctx: &GlobalContext,
workspaces: &[&Workspace<'_>],
@ -101,6 +130,8 @@ fn sync(
}
}
let mut source_replacement_cache = SourceReplacementCache::new(gctx)?;
// First up attempt to work around rust-lang/cargo#5956. Apparently build
// artifacts sprout up in Cargo's global cache for whatever reason, although
// it's unsure what tool is causing these issues at this time. For now we
@ -121,21 +152,32 @@ fn sync(
.context("failed to download packages")?;
for pkg in resolve.iter() {
let sid = if opts.respect_source_config {
source_replacement_cache.get(pkg.source_id())?
} else {
pkg.source_id()
};
// Don't delete actual source code!
if pkg.source_id().is_path() {
if let Ok(path) = pkg.source_id().url().to_file_path() {
if sid.is_path() {
if let Ok(path) = sid.url().to_file_path() {
if let Ok(path) = try_canonicalize(path) {
to_remove.remove(&path);
}
}
continue;
}
if pkg.source_id().is_git() {
if sid.is_git() {
continue;
}
// Only delete sources that are safe to delete, i.e. they are caches.
if sid.is_registry() {
if let Ok(pkg) = packages.get_one(pkg) {
drop(fs::remove_dir_all(pkg.root()));
}
continue;
}
}
}

View File

@ -1939,3 +1939,45 @@ fn vendor_crate_with_ws_inherit() {
"#]])
.run();
}
#[cargo_test]
fn dont_delete_non_registry_sources_with_respect_source_config() {
let p = project()
.file(
"Cargo.toml",
r#"
[package]
name = "foo"
version = "0.1.0"
[dependencies]
log = "0.3.5"
"#,
)
.file("src/lib.rs", "")
.build();
Package::new("log", "0.3.5").publish();
p.cargo("vendor --respect-source-config").run();
let lock = p.read_file("vendor/log/Cargo.toml");
assert!(lock.contains("version = \"0.3.5\""));
add_crates_io_vendor_config(&p);
p.cargo("vendor --respect-source-config new-vendor-dir")
.with_stderr_data(str![[r#"
Vendoring log v0.3.5 ([ROOT]/foo/vendor/log) to new-vendor-dir/log
To use vendored sources, add this to your .cargo/config.toml for this project:
"#]])
.with_stdout_data(str![[r#"
[source.crates-io]
replace-with = "vendored-sources"
[source.vendored-sources]
directory = "new-vendor-dir"
"#]])
.run();
}