Plan for a V3 lockfile format

This commit lays the groundwork for an eventual V3 of the lock file
format. The changes in this format are:

* A `version` indicator will be at the top of the file so we don't have
  to guess what format the lock is in, we know for sure. Additionally
  Cargo now reading a super-from-the-future lock file format will give a
  better error.

* Git dependencies with `Branch("master")` will be encoded with
  `?branch=master` instead of with nothing.

The motivation for this change is to eventually switch Cargo's
interpretation of default git branches.
This commit is contained in:
Alex Crichton 2020-07-20 10:11:25 -07:00
parent 32f52fd2e1
commit 4fb8e115c2
6 changed files with 171 additions and 43 deletions

View File

@ -42,6 +42,13 @@
//! Listed from most recent to oldest, these are some of the changes we've made
//! to `Cargo.lock`'s serialization format:
//!
//! * A `version` marker is now at the top of the lock file which is a way for
//! super-old Cargos (at least since this was implemented) to give a formal
//! error if they see a lock file from a super-future Cargo. Additionally as
//! part of this change the encoding of `git` dependencies in lock files
//! changed where `branch = "master"` is now encoded with `branch=master`
//! instead of with nothing at all.
//!
//! * The entries in `dependencies` arrays have been shortened and the
//! `checksum` field now shows up directly in `[[package]]` instead of always
//! at the end of the file. The goal of this change was to ideally reduce
@ -89,25 +96,24 @@
//! special fashion to make sure we have strict control over the on-disk
//! format.
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt;
use std::str::FromStr;
use super::{Resolve, ResolveVersion};
use crate::core::{Dependency, GitReference, Package, PackageId, SourceId, Workspace};
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::interning::InternedString;
use crate::util::{internal, Graph};
use anyhow::bail;
use log::debug;
use serde::de;
use serde::ser;
use serde::{Deserialize, Serialize};
use crate::core::{Dependency, Package, PackageId, SourceId, Workspace};
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::interning::InternedString;
use crate::util::{internal, Graph};
use super::{Resolve, ResolveVersion};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt;
use std::str::FromStr;
/// The `Cargo.lock` structure.
#[derive(Serialize, Deserialize, Debug)]
pub struct EncodableResolve {
version: Option<u32>,
package: Option<Vec<EncodableDependency>>,
/// `root` is optional to allow backward compatibility.
root: Option<EncodableDependency>,
@ -136,8 +142,19 @@ impl EncodableResolve {
let path_deps = build_path_deps(ws);
let mut checksums = HashMap::new();
// We assume an older format is being parsed until we see so otherwise.
let mut version = ResolveVersion::V1;
let mut version = match self.version {
Some(1) => ResolveVersion::V3,
Some(n) => bail!(
"lock file version `{}` was found, but this version of Cargo \
does not understand this lock file, perhaps Cargo needs \
to be updated?",
n,
),
// Historically Cargo did not have a version indicator in lock
// files, so this could either be the V1 or V2 encoding. We assume
// an older format is being parsed until we see so otherwise.
None => ResolveVersion::V1,
};
let packages = {
let mut packages = self.package.unwrap_or_default();
@ -176,7 +193,7 @@ impl EncodableResolve {
// that here, and we also bump our version up to 2 since V1
// didn't ever encode this field.
if let Some(cksum) = &pkg.checksum {
version = ResolveVersion::V2;
version = version.max(ResolveVersion::V2);
checksums.insert(id, Some(cksum.clone()));
}
@ -213,7 +230,7 @@ impl EncodableResolve {
let by_source = match &enc_id.version {
Some(version) => by_version.get(version)?,
None => {
version = ResolveVersion::V2;
version = version.max(ResolveVersion::V2);
if by_version.len() == 1 {
by_version.values().next().unwrap()
} else {
@ -245,7 +262,7 @@ impl EncodableResolve {
// the lock file
} else if by_source.len() == 1 {
let id = by_source.values().next().unwrap();
version = ResolveVersion::V2;
version = version.max(ResolveVersion::V2);
Some(*id)
// ... and failing that we probably had a bad git merge of
@ -317,7 +334,7 @@ impl EncodableResolve {
// If `checksum` was listed in `[metadata]` but we were previously
// listed as `V2` then assume some sort of bad git merge happened, so
// discard all checksums and let's regenerate them later.
if !to_remove.is_empty() && version == ResolveVersion::V2 {
if !to_remove.is_empty() && version >= ResolveVersion::V2 {
checksums.drain();
}
for k in to_remove {
@ -539,13 +556,13 @@ impl<'a> ser::Serialize for Resolve {
let mut metadata = self.metadata().clone();
if *self.version() == ResolveVersion::V1 {
if self.version() == ResolveVersion::V1 {
for &id in ids.iter().filter(|id| !id.source_id().is_path()) {
let checksum = match self.checksums()[&id] {
Some(ref s) => &s[..],
None => "<none>",
};
let id = encodable_package_id(id, &state);
let id = encodable_package_id(id, &state, self.version());
metadata.insert(format!("checksum {}", id.to_string()), checksum.to_string());
}
}
@ -566,9 +583,10 @@ impl<'a> ser::Serialize for Resolve {
source: encode_source(id.source_id()),
dependencies: None,
replace: None,
checksum: match self.version() {
ResolveVersion::V2 => self.checksums().get(id).and_then(|x| x.clone()),
ResolveVersion::V1 => None,
checksum: if self.version() >= ResolveVersion::V2 {
self.checksums().get(id).and_then(|x| x.clone())
} else {
None
},
})
.collect(),
@ -578,6 +596,10 @@ impl<'a> ser::Serialize for Resolve {
root: None,
metadata,
patch,
version: match self.version() {
ResolveVersion::V3 => Some(1),
ResolveVersion::V2 | ResolveVersion::V1 => None,
},
}
.serialize(s)
}
@ -589,7 +611,7 @@ pub struct EncodeState<'a> {
impl<'a> EncodeState<'a> {
pub fn new(resolve: &'a Resolve) -> EncodeState<'a> {
let counts = if *resolve.version() == ResolveVersion::V2 {
let counts = if resolve.version() >= ResolveVersion::V2 {
let mut map = HashMap::new();
for id in resolve.iter() {
let slot = map
@ -613,11 +635,14 @@ fn encodable_resolve_node(
state: &EncodeState<'_>,
) -> EncodableDependency {
let (replace, deps) = match resolve.replacement(id) {
Some(id) => (Some(encodable_package_id(id, state)), None),
Some(id) => (
Some(encodable_package_id(id, state, resolve.version())),
None,
),
None => {
let mut deps = resolve
.deps_not_replaced(id)
.map(|(id, _)| encodable_package_id(id, state))
.map(|(id, _)| encodable_package_id(id, state, resolve.version()))
.collect::<Vec<_>>();
deps.sort();
(None, Some(deps))
@ -630,16 +655,30 @@ fn encodable_resolve_node(
source: encode_source(id.source_id()),
dependencies: deps,
replace,
checksum: match resolve.version() {
ResolveVersion::V2 => resolve.checksums().get(&id).and_then(|s| s.clone()),
ResolveVersion::V1 => None,
checksum: if resolve.version() >= ResolveVersion::V2 {
resolve.checksums().get(&id).and_then(|s| s.clone())
} else {
None
},
}
}
pub fn encodable_package_id(id: PackageId, state: &EncodeState<'_>) -> EncodablePackageId {
pub fn encodable_package_id(
id: PackageId,
state: &EncodeState<'_>,
resolve_version: ResolveVersion,
) -> EncodablePackageId {
let mut version = Some(id.version().to_string());
let mut source = encode_source(id.source_id()).map(|s| s.with_precise(None));
let mut id_to_encode = id.source_id();
if resolve_version <= ResolveVersion::V2 {
if let Some(GitReference::Branch(b)) = id_to_encode.git_reference() {
if b == "master" {
id_to_encode =
SourceId::for_git(id_to_encode.url(), GitReference::DefaultBranch).unwrap();
}
}
}
let mut source = encode_source(id_to_encode).map(|s| s.with_precise(None));
if let Some(counts) = &state.counts {
let version_counts = &counts[&id.name()];
if version_counts[&id.version()] == 1 {

View File

@ -1071,7 +1071,7 @@ fn check_duplicate_pkgs_in_lockfile(resolve: &Resolve) -> CargoResult<()> {
let mut unique_pkg_ids = HashMap::new();
let state = encode::EncodeState::new(resolve);
for pkg_id in resolve.iter() {
let encodable_pkd_id = encode::encodable_package_id(pkg_id, &state);
let encodable_pkd_id = encode::encodable_package_id(pkg_id, &state, resolve.version());
if let Some(prev_pkg_id) = unique_pkg_ids.insert(encodable_pkd_id, pkg_id) {
anyhow::bail!(
"package collision in the lockfile: packages {} and {} are different, \

View File

@ -66,6 +66,11 @@ pub enum ResolveVersion {
/// listed inline. Introduced in 2019 in version 1.38. New lockfiles use
/// V2 by default starting in 1.41.
V2,
/// A format that explicitly lists a `version` at the top of the file as
/// well as changing how git dependencies are encoded. Dependencies with
/// `branch = "master"` are no longer encoded the same way as those without
/// branch specifiers.
V3,
}
impl Resolve {
@ -374,8 +379,8 @@ unable to verify that `{0}` is the same as when the lockfile was generated
/// Returns the version of the encoding that's being used for this lock
/// file.
pub fn version(&self) -> &ResolveVersion {
&self.version
pub fn version(&self) -> ResolveVersion {
self.version
}
pub fn summary(&self, pkg_id: PackageId) -> &Summary {

View File

@ -555,9 +555,6 @@ impl GitReference {
pub fn pretty_ref(&self) -> Option<PrettyRef<'_>> {
match self {
GitReference::DefaultBranch => None,
// See module comments in src/cargo/sources/git/utils.rs for why
// `DefaultBranch` is treated specially here.
GitReference::Branch(m) if m == "master" => None,
_ => Some(PrettyRef { inner: self }),
}
}

View File

@ -120,6 +120,10 @@ fn resolve_to_string_orig(
}
}
if let Some(version) = toml.get("version") {
out.push_str(&format!("version = {}\n\n", version));
}
let deps = toml["package"].as_array().unwrap();
for dep in deps {
let dep = dep.as_table().unwrap();
@ -147,12 +151,9 @@ fn resolve_to_string_orig(
// encodings going forward, though, we want to be sure that our encoded lock
// file doesn't contain any trailing newlines so trim out the extra if
// necessary.
match resolve.version() {
ResolveVersion::V1 => {}
_ => {
while out.ends_with("\n\n") {
out.pop();
}
if resolve.version() >= ResolveVersion::V2 {
while out.ends_with("\n\n") {
out.pop();
}
}

View File

@ -2,7 +2,7 @@
use cargo_test_support::git;
use cargo_test_support::registry::Package;
use cargo_test_support::{basic_manifest, lines_match, project};
use cargo_test_support::{basic_lib_manifest, basic_manifest, lines_match, project};
#[cargo_test]
fn oldest_lockfile_still_works() {
@ -636,3 +636,89 @@ dependencies = [
let lock = p.read_lockfile();
assert_lockfiles_eq(&lockfile, &lock);
}
#[cargo_test]
fn v3_and_git() {
let (git_project, repo) = git::new_repo("dep1", |project| {
project
.file("Cargo.toml", &basic_lib_manifest("dep1"))
.file("src/lib.rs", "")
});
let head_id = repo.head().unwrap().target().unwrap();
let lockfile = format!(
r#"# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 1
[[package]]
name = "dep1"
version = "0.5.0"
source = "git+{}?branch=master#{}"
[[package]]
name = "foo"
version = "0.0.1"
dependencies = [
"dep1",
]
"#,
git_project.url(),
head_id,
);
let p = project()
.file(
"Cargo.toml",
&format!(
r#"
[project]
name = "foo"
version = "0.0.1"
authors = []
[dependencies]
dep1 = {{ git = '{}', branch = 'master' }}
"#,
git_project.url(),
),
)
.file("src/lib.rs", "")
.file("Cargo.lock", "version = 1")
.build();
p.cargo("fetch").run();
let lock = p.read_lockfile();
assert_lockfiles_eq(&lockfile, &lock);
}
#[cargo_test]
fn lock_from_the_future() {
let p = project()
.file(
"Cargo.toml",
r#"
[project]
name = "foo"
version = "0.0.1"
authors = []
"#,
)
.file("src/lib.rs", "")
.file("Cargo.lock", "version = 10000000")
.build();
p.cargo("fetch")
.with_stderr(
"\
error: failed to parse lock file at: [..]
Caused by:
lock file version `10000000` was found, but this version of Cargo does not \
understand this lock file, perhaps Cargo needs to be updated?
",
)
.with_status(101)
.run();
}