Auto merge of #5691 - Eh2406:hyphen-underscore, r=alexcrichton

Make index lookup robust to _ vs -, but don't let the user get it wrong.

This does a brute force search thru combinations of hyphen and underscores to allow queries of crates to pass the wrong one.

This is a small first step of fixing #2775

Where is best to add test?
This commit is contained in:
bors 2018-07-16 19:35:38 +00:00
commit fefbb68f89
15 changed files with 369 additions and 75 deletions

View File

@ -113,12 +113,11 @@ fn find_closest(config: &Config, cmd: &str) -> Option<String> {
let cmds = list_commands(config); let cmds = list_commands(config);
// Only consider candidates with a lev_distance of 3 or less so we don't // Only consider candidates with a lev_distance of 3 or less so we don't
// suggest out-of-the-blue options. // suggest out-of-the-blue options.
let mut filtered = cmds.iter() cmds.into_iter()
.map(|&(ref c, _)| (lev_distance(c, cmd), c)) .map(|(c, _)| (lev_distance(&c, cmd), c))
.filter(|&(d, _)| d < 4) .filter(|&(d, _)| d < 4)
.collect::<Vec<_>>(); .min_by_key(|a| a.0)
filtered.sort_by(|a, b| a.0.cmp(&b.0)); .map(|slot| slot.1)
filtered.get(0).map(|slot| slot.1.clone())
} }
fn execute_external_subcommand(config: &Config, cmd: &str, args: &[&str]) -> CliResult { fn execute_external_subcommand(config: &Config, cmd: &str, args: &[&str]) -> CliResult {

View File

@ -14,11 +14,11 @@ use sources::config::SourceConfigMap;
/// See also `core::Source`. /// See also `core::Source`.
pub trait Registry { pub trait Registry {
/// Attempt to find the packages that match a dependency request. /// Attempt to find the packages that match a dependency request.
fn query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()>; fn query(&mut self, dep: &Dependency, f: &mut FnMut(Summary), fuzzy: bool) -> CargoResult<()>;
fn query_vec(&mut self, dep: &Dependency) -> CargoResult<Vec<Summary>> { fn query_vec(&mut self, dep: &Dependency, fuzzy: bool) -> CargoResult<Vec<Summary>> {
let mut ret = Vec::new(); let mut ret = Vec::new();
self.query(dep, &mut |s| ret.push(s))?; self.query(dep, &mut |s| ret.push(s), fuzzy)?;
Ok(ret) Ok(ret)
} }
} }
@ -395,7 +395,7 @@ http://doc.crates.io/specifying-dependencies.html#overriding-dependencies
} }
impl<'cfg> Registry for PackageRegistry<'cfg> { impl<'cfg> Registry for PackageRegistry<'cfg> {
fn query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()> { fn query(&mut self, dep: &Dependency, f: &mut FnMut(Summary), fuzzy: bool) -> CargoResult<()> {
assert!(self.patches_locked); assert!(self.patches_locked);
let (override_summary, n, to_warn) = { let (override_summary, n, to_warn) = {
// Look for an override and get ready to query the real source. // Look for an override and get ready to query the real source.
@ -476,7 +476,7 @@ impl<'cfg> Registry for PackageRegistry<'cfg> {
// already selected, then we skip this `summary`. // already selected, then we skip this `summary`.
let locked = &self.locked; let locked = &self.locked;
let all_patches = &self.patches_available; let all_patches = &self.patches_available;
return source.query(dep, &mut |summary| { let callback = &mut |summary: Summary| {
for patch in patches.iter() { for patch in patches.iter() {
let patch = patch.package_id().version(); let patch = patch.package_id().version();
if summary.package_id().version() == patch { if summary.package_id().version() == patch {
@ -484,7 +484,12 @@ impl<'cfg> Registry for PackageRegistry<'cfg> {
} }
} }
f(lock(locked, all_patches, summary)) f(lock(locked, all_patches, summary))
}); };
return if fuzzy {
source.fuzzy_query(dep, callback)
} else {
source.query(dep, callback)
};
} }
// If we have an override summary then we query the source // If we have an override summary then we query the source
@ -496,10 +501,17 @@ impl<'cfg> Registry for PackageRegistry<'cfg> {
} }
let mut n = 0; let mut n = 0;
let mut to_warn = None; let mut to_warn = None;
source.query(dep, &mut |summary| { {
let callback = &mut |summary| {
n += 1; n += 1;
to_warn = Some(summary); to_warn = Some(summary);
})?; };
if fuzzy {
source.fuzzy_query(dep, callback)?;
} else {
source.query(dep, callback)?;
}
}
(override_summary, n, to_warn) (override_summary, n, to_warn)
} }
} }

View File

@ -59,6 +59,7 @@ use core::PackageIdSpec;
use core::{Dependency, PackageId, Registry, Summary}; use core::{Dependency, PackageId, Registry, Summary};
use util::config::Config; use util::config::Config;
use util::errors::{CargoError, CargoResult}; use util::errors::{CargoError, CargoResult};
use util::lev_distance::lev_distance;
use util::profile; use util::profile;
use self::context::{Activations, Context}; use self::context::{Activations, Context};
@ -230,7 +231,9 @@ fn activate_deps_loop(
// to amortize the cost of the current time lookup. // to amortize the cost of the current time lookup.
ticks += 1; ticks += 1;
if let Some(config) = config { if let Some(config) = config {
if config.shell().is_err_tty() && !printed && ticks % 1000 == 0 if config.shell().is_err_tty()
&& !printed
&& ticks % 1000 == 0
&& start.elapsed() - deps_time > time_to_print && start.elapsed() - deps_time > time_to_print
{ {
printed = true; printed = true;
@ -857,12 +860,14 @@ fn activation_error(
msg.push_str("\nversions that meet the requirements `"); msg.push_str("\nversions that meet the requirements `");
msg.push_str(&dep.version_req().to_string()); msg.push_str(&dep.version_req().to_string());
msg.push_str("` are: "); msg.push_str("` are: ");
msg.push_str(&candidates msg.push_str(
&candidates
.iter() .iter()
.map(|v| v.summary.version()) .map(|v| v.summary.version())
.map(|v| v.to_string()) .map(|v| v.to_string())
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(", ")); .join(", "),
);
let mut conflicting_activations: Vec<_> = conflicting_activations.iter().collect(); let mut conflicting_activations: Vec<_> = conflicting_activations.iter().collect();
conflicting_activations.sort_unstable(); conflicting_activations.sort_unstable();
@ -922,17 +927,16 @@ fn activation_error(
return format_err!("{}", msg); return format_err!("{}", msg);
} }
// Once we're all the way down here, we're definitely lost in the // We didn't actually find any candidates, so we need to
// weeds! We didn't actually find any candidates, so we need to
// give an error message that nothing was found. // give an error message that nothing was found.
// //
// Note that we re-query the registry with a new dependency that // Maybe the user mistyped the ver_req? Like `dep="2"` when `dep="0.2"`
// allows any version so we can give some nicer error reporting // was meant. So we re-query the registry with `deb="*"` so we can
// which indicates a few versions that were actually found. // list a few versions that were actually found.
let all_req = semver::VersionReq::parse("*").unwrap(); let all_req = semver::VersionReq::parse("*").unwrap();
let mut new_dep = dep.clone(); let mut new_dep = dep.clone();
new_dep.set_version_req(all_req); new_dep.set_version_req(all_req);
let mut candidates = match registry.query_vec(&new_dep) { let mut candidates = match registry.query_vec(&new_dep, false) {
Ok(candidates) => candidates, Ok(candidates) => candidates,
Err(e) => return e, Err(e) => return e,
}; };
@ -977,12 +981,41 @@ fn activation_error(
msg msg
} else { } else {
// Maybe the user mistyped the name? Like `dep-thing` when `Dep_Thing`
// was meant. So we try asking the registry for a `fuzzy` search for suggestions.
let mut candidates = Vec::new();
if let Err(e) = registry.query(&new_dep, &mut |s| candidates.push(s.name()), true) {
return e;
};
candidates.sort_unstable();
candidates.dedup();
let mut candidates: Vec<_> = candidates
.iter()
.map(|n| (lev_distance(&*new_dep.name(), &*n), n))
.filter(|&(d, _)| d < 4)
.collect();
candidates.sort_by_key(|o| o.0);
let mut msg = format!( let mut msg = format!(
"no matching package named `{}` found\n\ "no matching package named `{}` found\n\
location searched: {}\n", location searched: {}\n",
dep.name(), dep.name(),
dep.source_id() dep.source_id()
); );
if !candidates.is_empty() {
let mut names = candidates
.iter()
.take(3)
.map(|c| c.1.as_str())
.collect::<Vec<_>>();
if candidates.len() > 3 {
names.push("...");
}
msg.push_str("did you mean: ");
msg.push_str(&names.join(", "));
msg.push_str("\n");
}
msg.push_str("required by "); msg.push_str("required by ");
msg.push_str(&describe_path(&graph.path_to_top(parent.package_id()))); msg.push_str(&describe_path(&graph.path_to_top(parent.package_id())));

View File

@ -51,7 +51,7 @@ impl<'a> RegistryQueryer<'a> {
summary: s, summary: s,
replace: None, replace: None,
}); });
})?; }, false)?;
for candidate in ret.iter_mut() { for candidate in ret.iter_mut() {
let summary = &candidate.summary; let summary = &candidate.summary;
@ -65,7 +65,7 @@ impl<'a> RegistryQueryer<'a> {
}; };
debug!("found an override for {} {}", dep.name(), dep.version_req()); debug!("found an override for {} {}", dep.name(), dep.version_req());
let mut summaries = self.registry.query_vec(dep)?.into_iter(); let mut summaries = self.registry.query_vec(dep, false)?.into_iter();
let s = summaries.next().ok_or_else(|| { let s = summaries.next().ok_or_else(|| {
format_err!( format_err!(
"no matching package for override `{}` found\n\ "no matching package for override `{}` found\n\

View File

@ -25,6 +25,12 @@ pub trait Source {
/// Attempt to find the packages that match a dependency request. /// Attempt to find the packages that match a dependency request.
fn query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()>; fn query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()>;
/// Attempt to find the packages that are close to a dependency request.
/// Each source gets to define what `close` means for it.
/// path/git sources may return all dependencies that are at that uri.
/// where as an Index source may return dependencies that have the same canonicalization.
fn fuzzy_query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()>;
fn query_vec(&mut self, dep: &Dependency) -> CargoResult<Vec<Summary>> { fn query_vec(&mut self, dep: &Dependency) -> CargoResult<Vec<Summary>> {
let mut ret = Vec::new(); let mut ret = Vec::new();
self.query(dep, &mut |s| ret.push(s))?; self.query(dep, &mut |s| ret.push(s))?;
@ -79,6 +85,11 @@ impl<'a, T: Source + ?Sized + 'a> Source for Box<T> {
(**self).query(dep, f) (**self).query(dep, f)
} }
/// Forwards to `Source::query`
fn fuzzy_query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()> {
(**self).fuzzy_query(dep, f)
}
/// Forwards to `Source::source_id` /// Forwards to `Source::source_id`
fn source_id(&self) -> &SourceId { fn source_id(&self) -> &SourceId {
(**self).source_id() (**self).source_id()

View File

@ -54,6 +54,14 @@ impl<'cfg> Source for DirectorySource<'cfg> {
Ok(()) Ok(())
} }
fn fuzzy_query(&mut self, _dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()> {
let packages = self.packages.values().map(|p| &p.0);
for summary in packages.map(|pkg| pkg.summary().clone()) {
f(summary);
}
Ok(())
}
fn supports_checksums(&self) -> bool { fn supports_checksums(&self) -> bool {
true true
} }

View File

@ -130,6 +130,13 @@ impl<'cfg> Source for GitSource<'cfg> {
src.query(dep, f) src.query(dep, f)
} }
fn fuzzy_query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()> {
let src = self.path_source
.as_mut()
.expect("BUG: update() must be called before query()");
src.fuzzy_query(dep, f)
}
fn supports_checksums(&self) -> bool { fn supports_checksums(&self) -> bool {
false false
} }

View File

@ -508,6 +508,13 @@ impl<'cfg> Source for PathSource<'cfg> {
Ok(()) Ok(())
} }
fn fuzzy_query(&mut self, _dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()> {
for s in self.packages.iter().map(|p| p.summary()) {
f(s.clone())
}
Ok(())
}
fn supports_checksums(&self) -> bool { fn supports_checksums(&self) -> bool {
false false
} }

View File

@ -11,6 +11,85 @@ use sources::registry::RegistryData;
use sources::registry::{RegistryPackage, INDEX_LOCK}; use sources::registry::{RegistryPackage, INDEX_LOCK};
use util::{internal, CargoResult, Config, Filesystem}; use util::{internal, CargoResult, Config, Filesystem};
/// Crates.io treats hyphen and underscores as interchangeable
/// but, the index and old cargo do not. So the index must store uncanonicalized version
/// of the name so old cargos can find it.
/// This loop tries all possible combinations of switching
/// hyphen and underscores to find the uncanonicalized one.
/// As all stored inputs have the correct spelling, we start with the spelling as provided.
struct UncanonicalizedIter<'s> {
input: &'s str,
num_hyphen_underscore: u32,
hyphen_combination_num: u16,
}
impl<'s> UncanonicalizedIter<'s> {
fn new(input: &'s str) -> Self {
let num_hyphen_underscore = input.chars().filter(|&c| c == '_' || c == '-').count() as u32;
UncanonicalizedIter {
input,
num_hyphen_underscore,
hyphen_combination_num: 0,
}
}
}
impl<'s> Iterator for UncanonicalizedIter<'s> {
type Item = String;
fn next(&mut self) -> Option<Self::Item> {
if self.hyphen_combination_num > 0 && self.hyphen_combination_num.trailing_zeros() >= self.num_hyphen_underscore {
return None;
}
let ret = Some(self.input
.chars()
.scan(0u16, |s, c| {
// the check against 15 here's to prevent
// shift overflow on inputs with more then 15 hyphens
if (c == '_' || c == '-') && *s <= 15 {
let switch = (self.hyphen_combination_num & (1u16 << *s)) > 0;
let out = if (c == '_') ^ switch {
'_'
} else {
'-'
};
*s += 1;
Some(out)
} else {
Some(c)
}
})
.collect());
self.hyphen_combination_num += 1;
ret
}
}
#[test]
fn no_hyphen() {
assert_eq!(
UncanonicalizedIter::new("test").collect::<Vec<_>>(),
vec!["test".to_string()]
)
}
#[test]
fn two_hyphen() {
assert_eq!(
UncanonicalizedIter::new("te-_st").collect::<Vec<_>>(),
vec!["te-_st".to_string(), "te__st".to_string(), "te--st".to_string(), "te_-st".to_string()]
)
}
#[test]
fn overflow_hyphen() {
assert_eq!(
UncanonicalizedIter::new("te-_-_-_-_-_-_-_-_-st").take(100).count(),
100
)
}
pub struct RegistryIndex<'cfg> { pub struct RegistryIndex<'cfg> {
source_id: SourceId, source_id: SourceId,
path: Filesystem, path: Filesystem,
@ -99,13 +178,14 @@ impl<'cfg> RegistryIndex<'cfg> {
.collect::<String>(); .collect::<String>();
// see module comment for why this is structured the way it is // see module comment for why this is structured the way it is
let path = match fs_name.len() { let raw_path = match fs_name.len() {
1 => format!("1/{}", fs_name), 1 => format!("1/{}", fs_name),
2 => format!("2/{}", fs_name), 2 => format!("2/{}", fs_name),
3 => format!("3/{}/{}", &fs_name[..1], fs_name), 3 => format!("3/{}/{}", &fs_name[..1], fs_name),
_ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name), _ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name),
}; };
let mut ret = Vec::new(); let mut ret = Vec::new();
for path in UncanonicalizedIter::new(&raw_path).take(1024) {
let mut hit_closure = false; let mut hit_closure = false;
let err = load.load(&root, Path::new(&path), &mut |contents| { let err = load.load(&root, Path::new(&path), &mut |contents| {
hit_closure = true; hit_closure = true;
@ -144,6 +224,10 @@ impl<'cfg> RegistryIndex<'cfg> {
// closure though then we care about those errors. // closure though then we care about those errors.
if hit_closure { if hit_closure {
err?; err?;
// Crates.io ensures that there is only one hyphen and underscore equivalent
// result in the index so return when we find it.
return Ok(ret);
}
} }
Ok(ret) Ok(ret)
@ -178,7 +262,7 @@ impl<'cfg> RegistryIndex<'cfg> {
Ok((summary, yanked.unwrap_or(false))) Ok((summary, yanked.unwrap_or(false)))
} }
pub fn query( pub fn query_inner(
&mut self, &mut self,
dep: &Dependency, dep: &Dependency,
load: &mut RegistryData, load: &mut RegistryData,
@ -212,10 +296,8 @@ impl<'cfg> RegistryIndex<'cfg> {
}); });
for summary in summaries { for summary in summaries {
if dep.matches(&summary) {
f(summary); f(summary);
} }
}
Ok(()) Ok(())
} }
} }

View File

@ -463,9 +463,11 @@ impl<'cfg> Source for RegistrySource<'cfg> {
if dep.source_id().precise().is_some() && !self.updated { if dep.source_id().precise().is_some() && !self.updated {
debug!("attempting query without update"); debug!("attempting query without update");
let mut called = false; let mut called = false;
self.index.query(dep, &mut *self.ops, &mut |s| { self.index.query_inner(dep, &mut *self.ops, &mut |s| {
if dep.matches(&s) {
called = true; called = true;
f(s); f(s);
}
})?; })?;
if called { if called {
return Ok(()); return Ok(());
@ -475,7 +477,15 @@ impl<'cfg> Source for RegistrySource<'cfg> {
} }
} }
self.index.query(dep, &mut *self.ops, f) self.index.query_inner(dep, &mut *self.ops, &mut |s| {
if dep.matches(&s) {
f(s);
}
})
}
fn fuzzy_query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()> {
self.index.query_inner(dep, &mut *self.ops, f)
} }
fn supports_checksums(&self) -> bool { fn supports_checksums(&self) -> bool {

View File

@ -35,6 +35,19 @@ impl<'cfg> Source for ReplacedSource<'cfg> {
Ok(()) Ok(())
} }
fn fuzzy_query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()> {
let (replace_with, to_replace) = (&self.replace_with, &self.to_replace);
let dep = dep.clone().map_source(to_replace, replace_with);
self.inner
.fuzzy_query(
&dep,
&mut |summary| f(summary.map_source(replace_with, to_replace)),
)
.chain_err(|| format!("failed to query replaced source {}", self.to_replace))?;
Ok(())
}
fn supports_checksums(&self) -> bool { fn supports_checksums(&self) -> bool {
self.inner.supports_checksums() self.inner.supports_checksums()
} }

View File

@ -230,6 +230,7 @@ error: failed to compile `bar v0.1.0`, intermediate artifacts can be found at `[
Caused by: Caused by:
no matching package named `baz` found no matching package named `baz` found
location searched: registry `https://github.com/rust-lang/crates.io-index` location searched: registry `https://github.com/rust-lang/crates.io-index`
did you mean: bar, foo
required by package `bar v0.1.0` required by package `bar v0.1.0`
", ",
), ),

View File

@ -1239,6 +1239,7 @@ fn invalid_path_dep_in_workspace_with_lockfile() {
"\ "\
error: no matching package named `bar` found error: no matching package named `bar` found
location searched: [..] location searched: [..]
did you mean: foo
required by package `foo v0.5.0 ([..])` required by package `foo v0.5.0 ([..])`
", ",
), ),

View File

@ -141,6 +141,76 @@ required by package `foo v0.0.1 ([..])`
); );
} }
#[test]
fn wrong_case() {
Package::new("init", "0.0.1").publish();
let p = project("foo")
.file(
"Cargo.toml",
r#"
[project]
name = "foo"
version = "0.0.1"
authors = []
[dependencies]
Init = ">= 0.0.0"
"#,
)
.file("src/main.rs", "fn main() {}")
.build();
// #5678 to make this work
assert_that(
p.cargo("build"),
execs().with_status(101).with_stderr(
"\
[UPDATING] registry [..]
error: no matching package named `Init` found
location searched: registry [..]
did you mean: init
required by package `foo v0.0.1 ([..])`
",
),
);
}
#[test]
fn mis_hyphenated() {
Package::new("mis-hyphenated", "0.0.1").publish();
let p = project("foo")
.file(
"Cargo.toml",
r#"
[project]
name = "foo"
version = "0.0.1"
authors = []
[dependencies]
mis_hyphenated = ">= 0.0.0"
"#,
)
.file("src/main.rs", "fn main() {}")
.build();
// #2775 to make this work
assert_that(
p.cargo("build"),
execs().with_status(101).with_stderr(
"\
[UPDATING] registry [..]
error: no matching package named `mis_hyphenated` found
location searched: registry [..]
did you mean: mis-hyphenated
required by package `foo v0.0.1 ([..])`
",
),
);
}
#[test] #[test]
fn wrong_version() { fn wrong_version() {
let p = project("foo") let p = project("foo")

View File

@ -28,9 +28,9 @@ fn resolve_with_config(
) -> CargoResult<Vec<PackageId>> { ) -> CargoResult<Vec<PackageId>> {
struct MyRegistry<'a>(&'a [Summary]); struct MyRegistry<'a>(&'a [Summary]);
impl<'a> Registry for MyRegistry<'a> { impl<'a> Registry for MyRegistry<'a> {
fn query(&mut self, dep: &Dependency, f: &mut FnMut(Summary)) -> CargoResult<()> { fn query(&mut self, dep: &Dependency, f: &mut FnMut(Summary), fuzzy: bool) -> CargoResult<()> {
for summary in self.0.iter() { for summary in self.0.iter() {
if dep.matches(summary) { if fuzzy || dep.matches(summary) {
f(summary.clone()); f(summary.clone());
} }
} }
@ -440,6 +440,46 @@ fn resolving_incompat_versions() {
); );
} }
#[test]
fn resolving_wrong_case_from_registry() {
// In the future we may #5678 allow this to happen.
// For back compatibility reasons, we probably won't.
// But we may want to future prove ourselves by understanding it.
// This test documents the current behavior.
let reg = registry(vec![
pkg!(("foo", "1.0.0")),
pkg!("bar" => ["Foo"]),
]);
assert!(
resolve(
&pkg_id("root"),
vec![dep("bar")],
&reg
).is_err()
);
}
#[test]
fn resolving_mis_hyphenated_from_registry() {
// In the future we may #2775 allow this to happen.
// For back compatibility reasons, we probably won't.
// But we may want to future prove ourselves by understanding it.
// This test documents the current behavior.
let reg = registry(vec![
pkg!(("fo-o", "1.0.0")),
pkg!("bar" => ["fo_o"]),
]);
assert!(
resolve(
&pkg_id("root"),
vec![dep("bar")],
&reg
).is_err()
);
}
#[test] #[test]
fn resolving_backtrack() { fn resolving_backtrack() {
let reg = registry(vec![ let reg = registry(vec![