mirror of
https://github.com/rust-lang/cargo.git
synced 2025-09-28 11:20:36 +00:00
Auto merge of #7368 - alexcrichton:canonical-urls-omg, r=ehuss
Work with canonical URLs in `[patch]` This commit addresses an issue with how the resolver processes `[patch]` annotations in manifests and lock files. Previously the resolver would use the raw `Url` coming out of a manifest, but the rest of resolution, when comparing `SourceId`, uses a canonical form of a `Url` rather than the actual raw `Url`. This ended up causing discrepancies like those found in #7282. To fix the issue all `patch` intermediate storage in the resolver uses a newly-added `CanonicalUrl` type instead of a `Url`. This `CanonicalUrl` is then also used throughout the codebase, and all lookups in the resolver as switched to using `CanonicalUrl` instead of `Url`, which... Closes #7282
This commit is contained in:
commit
35c55a9320
@ -9,7 +9,7 @@ use crate::core::PackageSet;
|
||||
use crate::core::{Dependency, PackageId, Source, SourceId, SourceMap, Summary};
|
||||
use crate::sources::config::SourceConfigMap;
|
||||
use crate::util::errors::{CargoResult, CargoResultExt};
|
||||
use crate::util::{profile, Config};
|
||||
use crate::util::{profile, CanonicalUrl, Config};
|
||||
|
||||
/// Source of information about a group of packages.
|
||||
///
|
||||
@ -75,9 +75,9 @@ pub struct PackageRegistry<'cfg> {
|
||||
yanked_whitelist: HashSet<PackageId>,
|
||||
source_config: SourceConfigMap<'cfg>,
|
||||
|
||||
patches: HashMap<Url, Vec<Summary>>,
|
||||
patches: HashMap<CanonicalUrl, Vec<Summary>>,
|
||||
patches_locked: bool,
|
||||
patches_available: HashMap<Url, Vec<PackageId>>,
|
||||
patches_available: HashMap<CanonicalUrl, Vec<PackageId>>,
|
||||
}
|
||||
|
||||
/// A map of all "locked packages" which is filled in when parsing a lock file
|
||||
@ -230,6 +230,8 @@ impl<'cfg> PackageRegistry<'cfg> {
|
||||
/// `query` until `lock_patches` is called below, which should be called
|
||||
/// once all patches have been added.
|
||||
pub fn patch(&mut self, url: &Url, deps: &[Dependency]) -> CargoResult<()> {
|
||||
let canonical = CanonicalUrl::new(url)?;
|
||||
|
||||
// First up we need to actually resolve each `deps` specification to
|
||||
// precisely one summary. We're not using the `query` method below as it
|
||||
// internally uses maps we're building up as part of this method
|
||||
@ -284,7 +286,7 @@ impl<'cfg> PackageRegistry<'cfg> {
|
||||
url
|
||||
)
|
||||
}
|
||||
if summary.package_id().source_id().url() == url {
|
||||
if *summary.package_id().source_id().canonical_url() == canonical {
|
||||
failure::bail!(
|
||||
"patch for `{}` in `{}` points to the same source, but \
|
||||
patches must point to different sources",
|
||||
@ -317,8 +319,8 @@ impl<'cfg> PackageRegistry<'cfg> {
|
||||
// `lock` method) and otherwise store the unlocked summaries in
|
||||
// `patches` to get locked in a future call to `lock_patches`.
|
||||
let ids = unlocked_summaries.iter().map(|s| s.package_id()).collect();
|
||||
self.patches_available.insert(url.clone(), ids);
|
||||
self.patches.insert(url.clone(), unlocked_summaries);
|
||||
self.patches_available.insert(canonical.clone(), ids);
|
||||
self.patches.insert(canonical, unlocked_summaries);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -340,8 +342,11 @@ impl<'cfg> PackageRegistry<'cfg> {
|
||||
self.patches_locked = true;
|
||||
}
|
||||
|
||||
pub fn patches(&self) -> &HashMap<Url, Vec<Summary>> {
|
||||
&self.patches
|
||||
pub fn patches(&self) -> Vec<Summary> {
|
||||
self.patches
|
||||
.values()
|
||||
.flat_map(|v| v.iter().cloned())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn load(&mut self, source_id: SourceId, kind: Kind) -> CargoResult<()> {
|
||||
@ -472,7 +477,7 @@ impl<'cfg> Registry for PackageRegistry<'cfg> {
|
||||
// This means that `dep.matches(..)` will always return false, when
|
||||
// what we really care about is the name/version match.
|
||||
let mut patches = Vec::<Summary>::new();
|
||||
if let Some(extra) = self.patches.get(dep.source_id().url()) {
|
||||
if let Some(extra) = self.patches.get(dep.source_id().canonical_url()) {
|
||||
patches.extend(
|
||||
extra
|
||||
.iter()
|
||||
@ -605,7 +610,11 @@ impl<'cfg> Registry for PackageRegistry<'cfg> {
|
||||
}
|
||||
}
|
||||
|
||||
fn lock(locked: &LockedMap, patches: &HashMap<Url, Vec<PackageId>>, summary: Summary) -> Summary {
|
||||
fn lock(
|
||||
locked: &LockedMap,
|
||||
patches: &HashMap<CanonicalUrl, Vec<PackageId>>,
|
||||
summary: Summary,
|
||||
) -> Summary {
|
||||
let pair = locked
|
||||
.get(&summary.source_id())
|
||||
.and_then(|map| map.get(&*summary.name()))
|
||||
@ -669,7 +678,7 @@ fn lock(locked: &LockedMap, patches: &HashMap<Url, Vec<PackageId>>, summary: Sum
|
||||
// map, and we see if `id` is contained in the list of patches
|
||||
// for that url. If it is then this lock is still valid,
|
||||
// otherwise the lock is no longer valid.
|
||||
match patches.get(dep.source_id().url()) {
|
||||
match patches.get(dep.source_id().canonical_url()) {
|
||||
Some(list) => list.contains(&id),
|
||||
None => false,
|
||||
}
|
||||
|
@ -3,8 +3,6 @@ use std::collections::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use url::Url;
|
||||
|
||||
use crate::core::dependency::Kind;
|
||||
use crate::core::{Dependency, PackageId, PackageIdSpec, Summary, Target};
|
||||
use crate::util::errors::CargoResult;
|
||||
@ -114,8 +112,8 @@ impl Resolve {
|
||||
self.graph.path_to_top(pkg)
|
||||
}
|
||||
|
||||
pub fn register_used_patches(&mut self, patches: &HashMap<Url, Vec<Summary>>) {
|
||||
for summary in patches.values().flat_map(|v| v) {
|
||||
pub fn register_used_patches(&mut self, patches: &[Summary]) {
|
||||
for summary in patches {
|
||||
if self.iter().any(|id| id == summary.package_id()) {
|
||||
continue;
|
||||
}
|
||||
|
@ -15,10 +15,9 @@ use url::Url;
|
||||
|
||||
use crate::core::PackageId;
|
||||
use crate::ops;
|
||||
use crate::sources::git;
|
||||
use crate::sources::DirectorySource;
|
||||
use crate::sources::{GitSource, PathSource, RegistrySource, CRATES_IO_INDEX};
|
||||
use crate::util::{CargoResult, Config, IntoUrl};
|
||||
use crate::util::{CanonicalUrl, CargoResult, Config, IntoUrl};
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref SOURCE_ID_CACHE: Mutex<HashSet<&'static SourceIdInner>> = Mutex::new(HashSet::new());
|
||||
@ -34,8 +33,8 @@ pub struct SourceId {
|
||||
struct SourceIdInner {
|
||||
/// The source URL.
|
||||
url: Url,
|
||||
/// The result of `git::canonicalize_url()` on `url` field.
|
||||
canonical_url: Url,
|
||||
/// The canonical version of the above url
|
||||
canonical_url: CanonicalUrl,
|
||||
/// The source kind.
|
||||
kind: Kind,
|
||||
/// For example, the exact Git revision of the specified branch for a Git Source.
|
||||
@ -80,7 +79,7 @@ impl SourceId {
|
||||
fn new(kind: Kind, url: Url) -> CargoResult<SourceId> {
|
||||
let source_id = SourceId::wrap(SourceIdInner {
|
||||
kind,
|
||||
canonical_url: git::canonicalize_url(&url)?,
|
||||
canonical_url: CanonicalUrl::new(&url)?,
|
||||
url,
|
||||
precise: None,
|
||||
name: None,
|
||||
@ -216,7 +215,7 @@ impl SourceId {
|
||||
let url = config.get_registry_index(key)?;
|
||||
Ok(SourceId::wrap(SourceIdInner {
|
||||
kind: Kind::Registry,
|
||||
canonical_url: git::canonicalize_url(&url)?,
|
||||
canonical_url: CanonicalUrl::new(&url)?,
|
||||
url,
|
||||
precise: None,
|
||||
name: Some(key.to_string()),
|
||||
@ -228,6 +227,12 @@ impl SourceId {
|
||||
&self.inner.url
|
||||
}
|
||||
|
||||
/// Gets the canonical URL of this source, used for internal comparison
|
||||
/// purposes.
|
||||
pub fn canonical_url(&self) -> &CanonicalUrl {
|
||||
&self.inner.canonical_url
|
||||
}
|
||||
|
||||
pub fn display_index(self) -> String {
|
||||
if self.is_default_registry() {
|
||||
"crates.io index".to_string()
|
||||
@ -508,7 +513,7 @@ impl Hash for SourceId {
|
||||
fn hash<S: hash::Hasher>(&self, into: &mut S) {
|
||||
self.inner.kind.hash(into);
|
||||
match self.inner.kind {
|
||||
Kind::Git(_) => self.inner.canonical_url.as_str().hash(into),
|
||||
Kind::Git(_) => self.inner.canonical_url.hash(into),
|
||||
_ => self.inner.url.as_str().hash(into),
|
||||
}
|
||||
}
|
||||
|
@ -343,7 +343,7 @@ pub fn resolve_with_previous<'cfg>(
|
||||
Some(ws.config()),
|
||||
ws.features().require(Feature::public_dependency()).is_ok(),
|
||||
)?;
|
||||
resolved.register_used_patches(registry.patches());
|
||||
resolved.register_used_patches(®istry.patches());
|
||||
if register_patches {
|
||||
// It would be good if this warning was more targeted and helpful
|
||||
// (such as showing close candidates that failed to match). However,
|
||||
|
@ -1,4 +1,4 @@
|
||||
pub use self::source::{canonicalize_url, GitSource};
|
||||
pub use self::source::GitSource;
|
||||
pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote, GitRevision};
|
||||
mod source;
|
||||
mod utils;
|
||||
|
@ -27,7 +27,7 @@ impl<'cfg> GitSource<'cfg> {
|
||||
assert!(source_id.is_git(), "id is not git, id={}", source_id);
|
||||
|
||||
let remote = GitRemote::new(source_id.url());
|
||||
let ident = ident(source_id.url())?;
|
||||
let ident = ident(&source_id);
|
||||
|
||||
let reference = match source_id.precise() {
|
||||
Some(s) => GitReference::Rev(s.to_string()),
|
||||
@ -59,58 +59,17 @@ impl<'cfg> GitSource<'cfg> {
|
||||
}
|
||||
}
|
||||
|
||||
fn ident(url: &Url) -> CargoResult<String> {
|
||||
let url = canonicalize_url(url)?;
|
||||
let ident = url
|
||||
fn ident(id: &SourceId) -> String {
|
||||
let ident = id
|
||||
.canonical_url()
|
||||
.raw_canonicalized_url()
|
||||
.path_segments()
|
||||
.and_then(|mut s| s.next_back())
|
||||
.and_then(|s| s.rev().next())
|
||||
.unwrap_or("");
|
||||
|
||||
let ident = if ident == "" { "_empty" } else { ident };
|
||||
|
||||
Ok(format!("{}-{}", ident, short_hash(&url)))
|
||||
}
|
||||
|
||||
// Some hacks and heuristics for making equivalent URLs hash the same.
|
||||
pub fn canonicalize_url(url: &Url) -> CargoResult<Url> {
|
||||
let mut url = url.clone();
|
||||
|
||||
// cannot-be-a-base-urls (e.g., `github.com:rust-lang-nursery/rustfmt.git`)
|
||||
// are not supported.
|
||||
if url.cannot_be_a_base() {
|
||||
failure::bail!(
|
||||
"invalid url `{}`: cannot-be-a-base-URLs are not supported",
|
||||
url
|
||||
)
|
||||
}
|
||||
|
||||
// Strip a trailing slash.
|
||||
if url.path().ends_with('/') {
|
||||
url.path_segments_mut().unwrap().pop_if_empty();
|
||||
}
|
||||
|
||||
// HACK: for GitHub URLs specifically, just lower-case
|
||||
// everything. GitHub treats both the same, but they hash
|
||||
// differently, and we're gonna be hashing them. This wants a more
|
||||
// general solution, and also we're almost certainly not using the
|
||||
// same case conversion rules that GitHub does. (See issue #84.)
|
||||
if url.host_str() == Some("github.com") {
|
||||
url.set_scheme("https").unwrap();
|
||||
let path = url.path().to_lowercase();
|
||||
url.set_path(&path);
|
||||
}
|
||||
|
||||
// Repos can generally be accessed with or without `.git` extension.
|
||||
let needs_chopping = url.path().ends_with(".git");
|
||||
if needs_chopping {
|
||||
let last = {
|
||||
let last = url.path_segments().unwrap().next_back().unwrap();
|
||||
last[..last.len() - 4].to_owned()
|
||||
};
|
||||
url.path_segments_mut().unwrap().pop().push(&last);
|
||||
}
|
||||
|
||||
Ok(url)
|
||||
format!("{}-{}", ident, short_hash(id.canonical_url()))
|
||||
}
|
||||
|
||||
impl<'cfg> Debug for GitSource<'cfg> {
|
||||
@ -241,56 +200,54 @@ impl<'cfg> Source for GitSource<'cfg> {
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::ident;
|
||||
use crate::core::{GitReference, SourceId};
|
||||
use crate::util::IntoUrl;
|
||||
use url::Url;
|
||||
|
||||
#[test]
|
||||
pub fn test_url_to_path_ident_with_path() {
|
||||
let ident = ident(&url("https://github.com/carlhuda/cargo")).unwrap();
|
||||
let ident = ident(&src("https://github.com/carlhuda/cargo"));
|
||||
assert!(ident.starts_with("cargo-"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_url_to_path_ident_without_path() {
|
||||
let ident = ident(&url("https://github.com")).unwrap();
|
||||
let ident = ident(&src("https://github.com"));
|
||||
assert!(ident.starts_with("_empty-"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_idents_by_stripping_trailing_url_slash() {
|
||||
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston/")).unwrap();
|
||||
let ident2 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
|
||||
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston/"));
|
||||
let ident2 = ident(&src("https://github.com/PistonDevelopers/piston"));
|
||||
assert_eq!(ident1, ident2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_idents_by_lowercasing_github_urls() {
|
||||
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
|
||||
let ident2 = ident(&url("https://github.com/pistondevelopers/piston")).unwrap();
|
||||
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
|
||||
let ident2 = ident(&src("https://github.com/pistondevelopers/piston"));
|
||||
assert_eq!(ident1, ident2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_idents_by_stripping_dot_git() {
|
||||
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
|
||||
let ident2 = ident(&url("https://github.com/PistonDevelopers/piston.git")).unwrap();
|
||||
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
|
||||
let ident2 = ident(&src("https://github.com/PistonDevelopers/piston.git"));
|
||||
assert_eq!(ident1, ident2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_idents_different_protocols() {
|
||||
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
|
||||
let ident2 = ident(&url("git://github.com/PistonDevelopers/piston")).unwrap();
|
||||
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
|
||||
let ident2 = ident(&src("git://github.com/PistonDevelopers/piston"));
|
||||
assert_eq!(ident1, ident2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_canonicalize_cannot_be_a_base_urls() {
|
||||
assert!(ident(&url("github.com:PistonDevelopers/piston")).is_err());
|
||||
assert!(ident(&url("google.com:PistonDevelopers/piston")).is_err());
|
||||
}
|
||||
|
||||
fn url(s: &str) -> Url {
|
||||
s.into_url().unwrap()
|
||||
fn src(s: &str) -> SourceId {
|
||||
SourceId::for_git(
|
||||
&s.into_url().unwrap(),
|
||||
GitReference::Branch("master".to_string()),
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
73
src/cargo/util/canonical_url.rs
Normal file
73
src/cargo/util/canonical_url.rs
Normal file
@ -0,0 +1,73 @@
|
||||
use crate::util::errors::CargoResult;
|
||||
use std::hash::{self, Hash};
|
||||
use url::Url;
|
||||
|
||||
/// A newtype wrapper around `Url` which represents a "canonical" version of an
|
||||
/// original URL.
|
||||
///
|
||||
/// A "canonical" url is only intended for internal comparison purposes in
|
||||
/// Cargo. It's to help paper over mistakes such as depending on
|
||||
/// `github.com/foo/bar` vs `github.com/foo/bar.git`. This is **only** for
|
||||
/// internal purposes within Cargo and provides no means to actually read the
|
||||
/// underlying string value of the `Url` it contains. This is intentional,
|
||||
/// because all fetching should still happen within the context of the original
|
||||
/// URL.
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub struct CanonicalUrl(Url);
|
||||
|
||||
impl CanonicalUrl {
|
||||
pub fn new(url: &Url) -> CargoResult<CanonicalUrl> {
|
||||
let mut url = url.clone();
|
||||
|
||||
// cannot-be-a-base-urls (e.g., `github.com:rust-lang-nursery/rustfmt.git`)
|
||||
// are not supported.
|
||||
if url.cannot_be_a_base() {
|
||||
failure::bail!(
|
||||
"invalid url `{}`: cannot-be-a-base-URLs are not supported",
|
||||
url
|
||||
)
|
||||
}
|
||||
|
||||
// Strip a trailing slash.
|
||||
if url.path().ends_with('/') {
|
||||
url.path_segments_mut().unwrap().pop_if_empty();
|
||||
}
|
||||
|
||||
// For GitHub URLs specifically, just lower-case everything. GitHub
|
||||
// treats both the same, but they hash differently, and we're gonna be
|
||||
// hashing them. This wants a more general solution, and also we're
|
||||
// almost certainly not using the same case conversion rules that GitHub
|
||||
// does. (See issue #84)
|
||||
if url.host_str() == Some("github.com") {
|
||||
url.set_scheme("https").unwrap();
|
||||
let path = url.path().to_lowercase();
|
||||
url.set_path(&path);
|
||||
}
|
||||
|
||||
// Repos can generally be accessed with or without `.git` extension.
|
||||
let needs_chopping = url.path().ends_with(".git");
|
||||
if needs_chopping {
|
||||
let last = {
|
||||
let last = url.path_segments().unwrap().next_back().unwrap();
|
||||
last[..last.len() - 4].to_owned()
|
||||
};
|
||||
url.path_segments_mut().unwrap().pop().push(&last);
|
||||
}
|
||||
|
||||
Ok(CanonicalUrl(url))
|
||||
}
|
||||
|
||||
/// Returns the raw canonicalized URL, although beware that this should
|
||||
/// never be used/displayed/etc, it should only be used for internal data
|
||||
/// structures and hashes and such.
|
||||
pub fn raw_canonicalized_url(&self) -> &Url {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
// See comment in `source_id.rs` for why we explicitly use `as_str()` here.
|
||||
impl Hash for CanonicalUrl {
|
||||
fn hash<S: hash::Hasher>(&self, into: &mut S) {
|
||||
self.0.as_str().hash(into);
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
use std::time::Duration;
|
||||
|
||||
pub use self::canonical_url::CanonicalUrl;
|
||||
pub use self::cfg::{Cfg, CfgExpr};
|
||||
pub use self::config::{homedir, Config, ConfigValue};
|
||||
pub use self::dependency_queue::DependencyQueue;
|
||||
@ -28,6 +29,7 @@ pub use self::workspace::{
|
||||
print_available_tests,
|
||||
};
|
||||
|
||||
mod canonical_url;
|
||||
mod cfg;
|
||||
pub mod command_prelude;
|
||||
pub mod config;
|
||||
|
@ -1338,3 +1338,68 @@ version. [..]
|
||||
)
|
||||
.run();
|
||||
}
|
||||
|
||||
#[cargo_test]
|
||||
fn canonicalize_a_bunch() {
|
||||
let base = git::repo(&paths::root().join("base"))
|
||||
.file("Cargo.toml", &basic_manifest("base", "0.1.0"))
|
||||
.file("src/lib.rs", "")
|
||||
.build();
|
||||
|
||||
let intermediate = git::repo(&paths::root().join("intermediate"))
|
||||
.file(
|
||||
"Cargo.toml",
|
||||
&format!(
|
||||
r#"
|
||||
[package]
|
||||
name = "intermediate"
|
||||
version = "0.1.0"
|
||||
|
||||
[dependencies]
|
||||
# Note the lack of trailing slash
|
||||
base = {{ git = '{}' }}
|
||||
"#,
|
||||
base.url(),
|
||||
),
|
||||
)
|
||||
.file("src/lib.rs", "pub fn f() { base::f() }")
|
||||
.build();
|
||||
|
||||
let newbase = git::repo(&paths::root().join("newbase"))
|
||||
.file("Cargo.toml", &basic_manifest("base", "0.1.0"))
|
||||
.file("src/lib.rs", "pub fn f() {}")
|
||||
.build();
|
||||
|
||||
let p = project()
|
||||
.file(
|
||||
"Cargo.toml",
|
||||
&format!(
|
||||
r#"
|
||||
[package]
|
||||
name = "foo"
|
||||
version = "0.0.1"
|
||||
|
||||
[dependencies]
|
||||
# Note the trailing slashes
|
||||
base = {{ git = '{base}/' }}
|
||||
intermediate = {{ git = '{intermediate}/' }}
|
||||
|
||||
[patch.'{base}'] # Note the lack of trailing slash
|
||||
base = {{ git = '{newbase}' }}
|
||||
"#,
|
||||
base = base.url(),
|
||||
intermediate = intermediate.url(),
|
||||
newbase = newbase.url(),
|
||||
),
|
||||
)
|
||||
.file("src/lib.rs", "pub fn a() { base::f(); intermediate::f() }")
|
||||
.build();
|
||||
|
||||
// Once to make sure it actually works
|
||||
p.cargo("build").run();
|
||||
|
||||
// Then a few more times for good measure to ensure no weird warnings about
|
||||
// `[patch]` are printed.
|
||||
p.cargo("build").with_stderr("[FINISHED] [..]").run();
|
||||
p.cargo("build").with_stderr("[FINISHED] [..]").run();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user