implement checksum freshness fingerprints for cargo

This commit is contained in:
Jacob Kiesel 2024-06-19 17:13:00 -06:00 committed by Weihang Lo
parent 2e309bd754
commit f4ca739073
No known key found for this signature in database
GPG Key ID: D7DBF189825E82E7
12 changed files with 615 additions and 108 deletions

33
Cargo.lock generated
View File

@ -134,6 +134,18 @@ version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
[[package]]
name = "arrayref"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
[[package]]
name = "arrayvec"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "autocfg"
version = "1.3.0"
@ -207,6 +219,19 @@ dependencies = [
"typenum",
]
[[package]]
name = "blake3"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d08263faac5cde2a4d52b513dadb80846023aade56fcd8fc99ba73ba8050e92"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
@ -273,6 +298,7 @@ dependencies = [
"anstyle",
"anyhow",
"base64",
"blake3",
"bytesize",
"cargo-credential",
"cargo-credential-libsecret",
@ -330,6 +356,7 @@ dependencies = [
"supports-unicode",
"tar",
"tempfile",
"thiserror",
"time",
"toml",
"toml_edit",
@ -618,6 +645,12 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "constant_time_eq"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
[[package]]
name = "content_inspector"
version = "0.2.4"

View File

@ -23,6 +23,7 @@ anstream = "0.6.15"
anstyle = "1.0.8"
anyhow = "1.0.86"
base64 = "0.22.1"
blake3 = "1.5.2"
bytesize = "1.3"
cargo = { path = "" }
cargo-credential = { version = "0.4.2", path = "credential/cargo-credential" }
@ -148,6 +149,7 @@ anstream.workspace = true
anstyle.workspace = true
anyhow.workspace = true
base64.workspace = true
blake3.workspace = true
bytesize.workspace = true
cargo-credential.workspace = true
cargo-platform.workspace = true
@ -197,6 +199,7 @@ shell-escape.workspace = true
supports-hyperlinks.workspace = true
tar.workspace = true
tempfile.workspace = true
thiserror.workspace = true
time.workspace = true
toml.workspace = true
toml_edit.workspace = true

View File

@ -16,7 +16,7 @@ use jobserver::Client;
use super::build_plan::BuildPlan;
use super::custom_build::{self, BuildDeps, BuildScriptOutputs, BuildScripts};
use super::fingerprint::Fingerprint;
use super::fingerprint::{Checksum, Fingerprint};
use super::job_queue::JobQueue;
use super::layout::Layout;
use super::lto::Lto;
@ -50,6 +50,8 @@ pub struct BuildRunner<'a, 'gctx> {
pub fingerprints: HashMap<Unit, Arc<Fingerprint>>,
/// Cache of file mtimes to reduce filesystem hits.
pub mtime_cache: HashMap<PathBuf, FileTime>,
/// Cache of file checksums to reduce filesystem reads.
pub checksum_cache: HashMap<PathBuf, Checksum>,
/// A set used to track which units have been compiled.
/// A unit may appear in the job graph multiple times as a dependency of
/// multiple packages, but it only needs to run once.
@ -113,6 +115,7 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
build_script_outputs: Arc::new(Mutex::new(BuildScriptOutputs::default())),
fingerprints: HashMap::new(),
mtime_cache: HashMap::new(),
checksum_cache: HashMap::new(),
compiled: HashSet::new(),
build_scripts: HashMap::new(),
build_explicit_deps: HashMap::new(),

View File

@ -34,6 +34,9 @@ pub enum DirtyReason {
old: String,
new: String,
},
ChecksumUseChanged {
old: bool,
},
DepInfoOutputChanged {
old: PathBuf,
new: PathBuf,
@ -183,6 +186,16 @@ impl DirtyReason {
DirtyReason::PrecalculatedComponentsChanged { .. } => {
s.dirty_because(unit, "the precalculated components changed")
}
DirtyReason::ChecksumUseChanged { old } => {
if *old {
s.dirty_because(
unit,
"the prior compilation used checksum freshness and this one does not",
)
} else {
s.dirty_because(unit, "checksum freshness requested, prior compilation did not use checksum freshness")
}
}
DirtyReason::DepInfoOutputChanged { .. } => {
s.dirty_because(unit, "the dependency info output changed")
}
@ -222,6 +235,20 @@ impl DirtyReason {
format_args!("the file `{}` is missing", file.display()),
)
}
StaleItem::UnableToReadFile(file) => {
let file = file.strip_prefix(root).unwrap_or(&file);
s.dirty_because(
unit,
format_args!("the file `{}` could not be read", file.display()),
)
}
StaleItem::FailedToReadMetadata(file) => {
let file = file.strip_prefix(root).unwrap_or(&file);
s.dirty_because(
unit,
format_args!("couldn't read metadata for file `{}`", file.display()),
)
}
StaleItem::ChangedFile {
stale,
stale_mtime,
@ -235,6 +262,41 @@ impl DirtyReason {
format_args!("the file `{}` has changed ({after})", file.display()),
)
}
StaleItem::ChangedChecksum {
source,
stored_checksum,
new_checksum,
} => {
let file = source.strip_prefix(root).unwrap_or(&source);
s.dirty_because(
unit,
format_args!(
"the file `{}` has changed (checksum didn't match, {stored_checksum} != {new_checksum})",
file.display(),
),
)
}
StaleItem::FileSizeChanged {
path,
old_size,
new_size,
} => {
let file = path.strip_prefix(root).unwrap_or(&path);
s.dirty_because(
unit,
format_args!(
"file size changed ({old_size} != {new_size}) for `{}`",
file.display()
),
)
}
StaleItem::MissingChecksum(path) => {
let file = path.strip_prefix(root).unwrap_or(&path);
s.dirty_because(
unit,
format_args!("the checksum for file `{}` is missing", file.display()),
)
}
StaleItem::ChangedEnv { var, .. } => s.dirty_because(
unit,
format_args!("the environment variable {var} changed"),

View File

@ -33,6 +33,12 @@
//! details. If any input files are missing, or are newer than the
//! dep-info, then the unit is dirty.
//!
//! - Alternatively if you're using the unstable feature `checksum-freshness`
//! mtimes are ignored entirely in favor of comparing first the file size, and
//! then the checksum with a known prior value emitted by rustc. Only nightly
//! rustc will emit the needed metadata at the time of writing. This is dependent
//! on the unstable feature `-Z checksum-hash-algorithm`.
//!
//! Note: Fingerprinting is not a perfect solution. Filesystem mtime tracking
//! is notoriously imprecise and problematic. Only a small part of the
//! environment is captured. This is a balance of performance, simplicity, and
@ -358,15 +364,17 @@ mod dirty_reason;
use std::collections::hash_map::{Entry, HashMap};
use std::env;
use std::fmt::{self, Display};
use std::fs::{self, File};
use std::hash::{self, Hash, Hasher};
use std::io;
use std::io::{self, Read};
use std::path::{Path, PathBuf};
use std::str;
use std::str::{self, from_utf8, FromStr};
use std::sync::{Arc, Mutex};
use std::time::SystemTime;
use anyhow::{bail, format_err, Context as _};
use cargo_util::{paths, ProcessBuilder};
use cargo_util::{paths, ProcessBuilder, Sha256};
use filetime::FileTime;
use serde::de;
use serde::ser;
@ -725,7 +733,10 @@ enum LocalFingerprint {
/// The `dep_info` file, when present, also lists a number of other files
/// for us to look at. If any of those files are newer than this file then
/// we need to recompile.
CheckDepInfo { dep_info: PathBuf },
///
/// If the `checksum` bool is true then the dep_info file is expected to
/// contain file checksums instead of file mtimes.
CheckDepInfo { dep_info: PathBuf, checksum: bool },
/// This represents a nonempty set of `rerun-if-changed` annotations printed
/// out by a build script. The `output` file is a relative file anchored at
@ -752,12 +763,25 @@ enum LocalFingerprint {
#[derive(Clone, Debug)]
pub enum StaleItem {
MissingFile(PathBuf),
UnableToReadFile(PathBuf),
FailedToReadMetadata(PathBuf),
FileSizeChanged {
path: PathBuf,
old_size: u64,
new_size: u64,
},
ChangedFile {
reference: PathBuf,
reference_mtime: FileTime,
stale: PathBuf,
stale_mtime: FileTime,
},
ChangedChecksum {
source: PathBuf,
stored_checksum: Checksum,
new_checksum: Checksum,
},
MissingChecksum(PathBuf),
ChangedEnv {
var: String,
previous: Option<String>,
@ -793,6 +817,7 @@ impl LocalFingerprint {
fn find_stale_item(
&self,
mtime_cache: &mut HashMap<PathBuf, FileTime>,
checksum_cache: &mut HashMap<PathBuf, Checksum>,
pkg_root: &Path,
target_root: &Path,
cargo_exe: &Path,
@ -805,7 +830,7 @@ impl LocalFingerprint {
// matches, and for each file we see if any of them are newer than
// the `dep_info` file itself whose mtime represents the start of
// rustc.
LocalFingerprint::CheckDepInfo { dep_info } => {
LocalFingerprint::CheckDepInfo { dep_info, checksum } => {
let dep_info = target_root.join(dep_info);
let Some(info) = parse_dep_info(pkg_root, target_root, &dep_info)? else {
return Ok(Some(StaleItem::MissingFile(dep_info)));
@ -835,15 +860,33 @@ impl LocalFingerprint {
current,
}));
}
Ok(find_stale_file(mtime_cache, &dep_info, info.files.iter()))
if *checksum {
Ok(find_stale_file(
mtime_cache,
checksum_cache,
&dep_info,
info.files.iter().map(|(file, checksum)| (file, *checksum)),
*checksum,
))
} else {
Ok(find_stale_file(
mtime_cache,
checksum_cache,
&dep_info,
info.files.into_keys().map(|p| (p, None)),
*checksum,
))
}
}
// We need to verify that no paths listed in `paths` are newer than
// the `output` path itself, or the last time the build script ran.
LocalFingerprint::RerunIfChanged { output, paths } => Ok(find_stale_file(
mtime_cache,
checksum_cache,
&target_root.join(output),
paths.iter().map(|p| pkg_root.join(p)),
paths.iter().map(|p| (pkg_root.join(p), None)),
false,
)),
// These have no dependencies on the filesystem, and their values
@ -965,8 +1008,14 @@ impl Fingerprint {
}
}
(
LocalFingerprint::CheckDepInfo { dep_info: adep },
LocalFingerprint::CheckDepInfo { dep_info: bdep },
LocalFingerprint::CheckDepInfo {
dep_info: adep,
checksum: checksum_a,
},
LocalFingerprint::CheckDepInfo {
dep_info: bdep,
checksum: checksum_b,
},
) => {
if adep != bdep {
return DirtyReason::DepInfoOutputChanged {
@ -974,6 +1023,9 @@ impl Fingerprint {
new: adep.clone(),
};
}
if checksum_a != checksum_b {
return DirtyReason::ChecksumUseChanged { old: *checksum_b };
}
}
(
LocalFingerprint::RerunIfChanged {
@ -1077,6 +1129,7 @@ impl Fingerprint {
fn check_filesystem(
&mut self,
mtime_cache: &mut HashMap<PathBuf, FileTime>,
checksum_cache: &mut HashMap<PathBuf, Checksum>,
pkg_root: &Path,
target_root: &Path,
cargo_exe: &Path,
@ -1181,9 +1234,14 @@ impl Fingerprint {
// files for this package itself. If we do find something log a helpful
// message and bail out so we stay stale.
for local in self.local.get_mut().unwrap().iter() {
if let Some(item) =
local.find_stale_item(mtime_cache, pkg_root, target_root, cargo_exe, gctx)?
{
if let Some(item) = local.find_stale_item(
mtime_cache,
checksum_cache,
pkg_root,
target_root,
cargo_exe,
gctx,
)? {
item.log();
self.fs_status = FsStatus::StaleItem(item);
return Ok(());
@ -1293,6 +1351,12 @@ impl StaleItem {
StaleItem::MissingFile(path) => {
info!("stale: missing {:?}", path);
}
StaleItem::UnableToReadFile(path) => {
info!("stale: unable to read {:?}", path);
}
StaleItem::FailedToReadMetadata(path) => {
info!("stale: couldn't read metadata {:?}", path);
}
StaleItem::ChangedFile {
reference,
reference_mtime,
@ -1303,6 +1367,27 @@ impl StaleItem {
info!(" (vs) {:?}", reference);
info!(" {:?} < {:?}", reference_mtime, stale_mtime);
}
StaleItem::FileSizeChanged {
path,
new_size,
old_size,
} => {
info!("stale: changed {:?}", path);
info!("prior file size {old_size}");
info!(" new file size {new_size}");
}
StaleItem::ChangedChecksum {
source,
stored_checksum,
new_checksum,
} => {
info!("stale: changed {:?}", source);
info!("prior checksum {stored_checksum}");
info!(" new checksum {new_checksum}");
}
StaleItem::MissingChecksum(path) => {
info!("stale: no prior checksum {:?}", path);
}
StaleItem::ChangedEnv {
var,
previous,
@ -1347,6 +1432,7 @@ fn calculate(build_runner: &mut BuildRunner<'_, '_>, unit: &Unit) -> CargoResult
let cargo_exe = build_runner.bcx.gctx.cargo_exe()?;
fingerprint.check_filesystem(
&mut build_runner.mtime_cache,
&mut build_runner.checksum_cache,
unit.pkg.root(),
&target_root,
cargo_exe,
@ -1399,7 +1485,10 @@ fn calculate_normal(
} else {
let dep_info = dep_info_loc(build_runner, unit);
let dep_info = dep_info.strip_prefix(&target_root).unwrap().to_path_buf();
vec![LocalFingerprint::CheckDepInfo { dep_info }]
vec![LocalFingerprint::CheckDepInfo {
dep_info,
checksum: build_runner.bcx.gctx.cli_unstable().checksum_freshness,
}]
};
// Figure out what the outputs of our unit is, and we'll be storing them
@ -1843,14 +1932,29 @@ pub fn parse_dep_info(
};
let mut ret = RustcDepInfo::default();
ret.env = info.env;
ret.files.extend(info.files.into_iter().map(|(ty, path)| {
ret.files
.extend(info.files.into_iter().map(|(ty, path, checksum_info)| {
(
make_absolute_path(ty, pkg_root, target_root, path),
checksum_info.and_then(|(file_len, checksum)| {
Checksum::from_str(&checksum).ok().map(|c| (file_len, c))
}),
)
}));
Ok(Some(ret))
}
fn make_absolute_path(
ty: DepInfoPathType,
pkg_root: &Path,
target_root: &Path,
path: PathBuf,
) -> PathBuf {
match ty {
DepInfoPathType::PackageRootRelative => pkg_root.join(path),
// N.B. path might be absolute here in which case the join will have no effect
DepInfoPathType::TargetRootRelative => target_root.join(path),
}
}));
Ok(Some(ret))
}
/// Calculates the fingerprint of a unit thats contains no dep-info files.
@ -1865,14 +1969,16 @@ fn pkg_fingerprint(bcx: &BuildContext<'_, '_>, pkg: &Package) -> CargoResult<Str
}
/// The `reference` file is considered as "stale" if any file from `paths` has a newer mtime.
fn find_stale_file<I>(
fn find_stale_file<I, P>(
mtime_cache: &mut HashMap<PathBuf, FileTime>,
checksum_cache: &mut HashMap<PathBuf, Checksum>,
reference: &Path,
paths: I,
use_checksums: bool,
) -> Option<StaleItem>
where
I: IntoIterator,
I::Item: AsRef<Path>,
I: IntoIterator<Item = (P, Option<(u64, Checksum)>)>,
P: AsRef<Path>,
{
let Ok(reference_mtime) = paths::mtime(reference) else {
return Some(StaleItem::MissingFile(reference.to_path_buf()));
@ -1887,8 +1993,7 @@ where
} else {
None
};
for path in paths {
for (path, prior_checksum) in paths {
let path = path.as_ref();
// Assuming anything in cargo_home/{git, registry} is immutable
@ -1900,6 +2005,43 @@ where
continue;
}
}
if use_checksums {
let Some((file_len, prior_checksum)) = prior_checksum else {
return Some(StaleItem::MissingChecksum(path.to_path_buf()));
};
let path_buf = path.to_path_buf();
let path_checksum = match checksum_cache.entry(path_buf) {
Entry::Occupied(o) => *o.get(),
Entry::Vacant(v) => {
let Ok(current_file_len) = fs::metadata(&path).map(|m| m.len()) else {
return Some(StaleItem::FailedToReadMetadata(path.to_path_buf()));
};
let Ok(file) = File::open(path) else {
return Some(StaleItem::MissingFile(path.to_path_buf()));
};
if current_file_len != file_len {
return Some(StaleItem::FileSizeChanged {
path: path.to_path_buf(),
new_size: current_file_len,
old_size: file_len,
});
}
let Ok(checksum) = Checksum::compute(prior_checksum.algo, file) else {
return Some(StaleItem::UnableToReadFile(path.to_path_buf()));
};
*v.insert(checksum)
}
};
if path_checksum == prior_checksum {
continue;
}
return Some(StaleItem::ChangedChecksum {
source: path.to_path_buf(),
stored_checksum: prior_checksum,
new_checksum: path_checksum,
});
} else {
let path_mtime = match mtime_cache.entry(path.to_path_buf()) {
Entry::Occupied(o) => *o.get(),
Entry::Vacant(v) => {
@ -1939,6 +2081,7 @@ where
stale_mtime: path_mtime,
});
}
}
debug!(
"all paths up-to-date relative to {:?} mtime={}",
@ -1949,6 +2092,7 @@ where
/// Tells the associated path in [`EncodedDepInfo::files`] is relative to package root,
/// target root, or absolute.
#[derive(Debug, Eq, PartialEq, Hash, Copy, Clone)]
enum DepInfoPathType {
/// src/, e.g. src/lib.rs
PackageRootRelative,
@ -2028,7 +2172,7 @@ pub fn translate_dep_info(
.env
.retain(|(key, _)| !rustc_cmd.get_envs().contains_key(key) || key == CARGO_ENV);
for file in depinfo.files {
let serialize_path = |file| {
// The path may be absolute or relative, canonical or not. Make sure
// it is canonicalized so we are comparing the same kinds of paths.
let abs_file = rustc_cwd.join(file);
@ -2041,7 +2185,7 @@ pub fn translate_dep_info(
(DepInfoPathType::TargetRootRelative, stripped)
} else if let Ok(stripped) = canon_file.strip_prefix(&pkg_root) {
if !allow_package {
continue;
return None;
}
(DepInfoPathType::PackageRootRelative, stripped)
} else {
@ -2050,7 +2194,18 @@ pub fn translate_dep_info(
// effect.
(DepInfoPathType::TargetRootRelative, &*abs_file)
};
on_disk_info.files.push((ty, path.to_owned()));
Some((ty, path.to_owned()))
};
for (file, checksum_info) in depinfo.files {
let Some((path_type, path)) = serialize_path(file) else {
continue;
};
on_disk_info.files.push((
path_type,
path,
checksum_info.map(|(len, checksum)| (len, checksum.to_string())),
));
}
paths::write(cargo_dep_info, on_disk_info.serialize()?)?;
Ok(())
@ -2060,7 +2215,7 @@ pub fn translate_dep_info(
#[derive(Default)]
pub struct RustcDepInfo {
/// The list of files that the main target in the dep-info file depends on.
pub files: Vec<PathBuf>,
pub files: HashMap<PathBuf, Option<(u64, Checksum)>>,
/// The list of environment variables we found that the rustc compilation
/// depends on.
///
@ -2078,7 +2233,7 @@ pub struct RustcDepInfo {
/// Cargo will read it for crates on all future compilations.
#[derive(Default)]
struct EncodedDepInfo {
files: Vec<(DepInfoPathType, PathBuf)>,
files: Vec<(DepInfoPathType, PathBuf, Option<(u64, String)>)>,
env: Vec<(String, Option<String>)>,
}
@ -2086,19 +2241,30 @@ impl EncodedDepInfo {
fn parse(mut bytes: &[u8]) -> Option<EncodedDepInfo> {
let bytes = &mut bytes;
let nfiles = read_usize(bytes)?;
let mut files = Vec::with_capacity(nfiles as usize);
let mut files = Vec::with_capacity(nfiles);
for _ in 0..nfiles {
let ty = match read_u8(bytes)? {
0 => DepInfoPathType::PackageRootRelative,
1 => DepInfoPathType::TargetRootRelative,
_ => return None,
};
let bytes = read_bytes(bytes)?;
files.push((ty, paths::bytes2path(bytes).ok()?));
let path_bytes = read_bytes(bytes)?;
let path = paths::bytes2path(path_bytes).ok()?;
let has_checksum = read_bool(bytes)?;
let checksum_info = has_checksum
.then(|| {
let file_len = read_u64(bytes);
let checksum_string = read_bytes(bytes)
.map(Vec::from)
.and_then(|v| String::from_utf8(v).ok());
file_len.zip(checksum_string)
})
.flatten();
files.push((ty, path, checksum_info));
}
let nenv = read_usize(bytes)?;
let mut env = Vec::with_capacity(nenv as usize);
let mut env = Vec::with_capacity(nenv);
for _ in 0..nenv {
let key = str::from_utf8(read_bytes(bytes)?).ok()?.to_string();
let val = match read_u8(bytes)? {
@ -2116,6 +2282,16 @@ impl EncodedDepInfo {
Some(u32::from_le_bytes(ret.try_into().unwrap()) as usize)
}
fn read_u64(bytes: &mut &[u8]) -> Option<u64> {
let ret = bytes.get(..8)?;
*bytes = &bytes[8..];
Some(u64::from_le_bytes(ret.try_into().unwrap()))
}
fn read_bool(bytes: &mut &[u8]) -> Option<bool> {
read_u8(bytes).map(|b| b != 0)
}
fn read_u8(bytes: &mut &[u8]) -> Option<u8> {
let ret = *bytes.get(0)?;
*bytes = &bytes[1..];
@ -2134,12 +2310,17 @@ impl EncodedDepInfo {
let mut ret = Vec::new();
let dst = &mut ret;
write_usize(dst, self.files.len());
for (ty, file) in self.files.iter() {
for (ty, file, checksum_info) in self.files.iter() {
match ty {
DepInfoPathType::PackageRootRelative => dst.push(0),
DepInfoPathType::TargetRootRelative => dst.push(1),
}
write_bytes(dst, paths::path2bytes(file)?);
write_bool(dst, checksum_info.is_some());
if let Some((len, checksum)) = checksum_info {
write_u64(dst, *len);
write_bytes(dst, checksum);
}
}
write_usize(dst, self.env.len());
@ -2164,6 +2345,14 @@ impl EncodedDepInfo {
fn write_usize(dst: &mut Vec<u8>, val: usize) {
dst.extend(&u32::to_le_bytes(val as u32));
}
fn write_u64(dst: &mut Vec<u8>, val: u64) {
dst.extend(&u64::to_le_bytes(val));
}
fn write_bool(dst: &mut Vec<u8>, val: bool) {
dst.push(u8::from(val));
}
}
}
@ -2200,8 +2389,24 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult<RustcDepInfo>
internal("malformed dep-info format, trailing \\".to_string())
})?);
}
ret.files.push(file.into());
ret.files.entry(file.into()).or_default();
}
} else if let Some(rest) = line.strip_prefix("# checksum:") {
let mut parts = rest.splitn(3, ' ');
let Some(checksum) = parts.next().map(Checksum::from_str).transpose()? else {
continue;
};
let Some(Ok(file_len)) = parts
.next()
.and_then(|s| s.strip_prefix("file_len:").map(|s| s.parse::<u64>()))
else {
continue;
};
let Some(path) = parts.next().map(PathBuf::from) else {
continue;
};
ret.files.insert(path, Some((file_len, checksum)));
}
}
return Ok(ret);
@ -2228,3 +2433,164 @@ pub fn parse_rustc_dep_info(rustc_dep_info: &Path) -> CargoResult<RustcDepInfo>
Ok(ret)
}
}
/// Some algorithms are here to ensure compatibility with possible rustc outputs.
/// The presence of an algorithm here is not a suggestion that it's fit for use.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ChecksumAlgo {
Sha256,
Blake3,
}
impl ChecksumAlgo {
fn hash_len(&self) -> usize {
match self {
ChecksumAlgo::Sha256 | ChecksumAlgo::Blake3 => 32,
}
}
}
impl FromStr for ChecksumAlgo {
type Err = InvalidChecksum;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"sha256" => Ok(Self::Sha256),
"blake3" => Ok(Self::Blake3),
_ => Err(InvalidChecksum::InvalidChecksumAlgo),
}
}
}
impl Display for ChecksumAlgo {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
ChecksumAlgo::Sha256 => "sha256",
ChecksumAlgo::Blake3 => "blake3",
})
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct Checksum {
algo: ChecksumAlgo,
/// If the algorithm uses fewer than 32 bytes, then the remaining bytes will be zero.
value: [u8; 32],
}
impl Checksum {
pub fn new(algo: ChecksumAlgo, value: [u8; 32]) -> Self {
Self { algo, value }
}
pub fn compute(algo: ChecksumAlgo, contents: impl Read) -> Result<Self, io::Error> {
// Buffer size is the recommended amount to fully leverage SIMD instructions on AVX-512 as per
// blake3 documentation.
let mut buf = vec![0; 16 * 1024];
let mut ret = Self {
algo,
value: [0; 32],
};
let len = algo.hash_len();
let value = &mut ret.value[..len];
fn digest<T>(
mut hasher: T,
mut update: impl FnMut(&mut T, &[u8]),
finish: impl FnOnce(T, &mut [u8]),
mut contents: impl Read,
buf: &mut [u8],
value: &mut [u8],
) -> Result<(), io::Error> {
loop {
let bytes_read = contents.read(buf)?;
if bytes_read == 0 {
break;
}
update(&mut hasher, &buf[0..bytes_read]);
}
finish(hasher, value);
Ok(())
}
match algo {
ChecksumAlgo::Sha256 => {
digest(
Sha256::new(),
|h, b| {
h.update(b);
},
|mut h, out| out.copy_from_slice(&h.finish()),
contents,
&mut buf,
value,
)?;
}
ChecksumAlgo::Blake3 => {
digest(
blake3::Hasher::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(h.finalize().as_bytes()),
contents,
&mut buf,
value,
)?;
}
}
Ok(ret)
}
pub fn algo(&self) -> ChecksumAlgo {
self.algo
}
pub fn value(&self) -> &[u8; 32] {
&self.value
}
}
impl FromStr for Checksum {
type Err = InvalidChecksum;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut parts = s.split('=');
let Some(algo) = parts.next().map(ChecksumAlgo::from_str).transpose()? else {
return Err(InvalidChecksum::InvalidFormat);
};
let Some(checksum) = parts.next() else {
return Err(InvalidChecksum::InvalidFormat);
};
let mut value = [0; 32];
if hex::decode_to_slice(checksum, &mut value[0..algo.hash_len()]).is_err() {
return Err(InvalidChecksum::InvalidChecksum(algo));
}
Ok(Self { algo, value })
}
}
impl Display for Checksum {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut checksum = [0; 64];
let hash_len = self.algo.hash_len();
hex::encode_to_slice(&self.value[0..hash_len], &mut checksum[0..(hash_len * 2)])
.map_err(|_| fmt::Error)?;
write!(
f,
"{}={}",
self.algo,
from_utf8(&checksum[0..(hash_len * 2)]).unwrap_or_default()
)
}
}
#[derive(Debug, thiserror::Error)]
pub enum InvalidChecksum {
#[error("algorithm portion incorrect, expected `sha256`, or `blake3`")]
InvalidChecksumAlgo,
#[error("expected {} hexadecimal digits in checksum portion", .0.hash_len() * 2)]
InvalidChecksum(ChecksumAlgo),
#[error("expected a string with format \"algorithm=hex_checksum\"")]
InvalidFormat,
}

View File

@ -704,6 +704,9 @@ fn prepare_rustc(build_runner: &BuildRunner<'_, '_>, unit: &Unit) -> CargoResult
if build_runner.bcx.gctx.cli_unstable().binary_dep_depinfo {
base.arg("-Z").arg("binary-dep-depinfo");
}
if build_runner.bcx.gctx.cli_unstable().checksum_freshness {
base.arg("-Z").arg("checksum-hash-algorithm=blake3");
}
if is_primary {
base.env("CARGO_PRIMARY_PACKAGE", "1");

View File

@ -61,7 +61,7 @@ fn add_deps_for_unit(
build_runner.files().host_root(),
&dep_info_loc,
)? {
for path in paths.files {
for path in paths.files.into_keys() {
deps.insert(path);
}
} else {
@ -154,7 +154,12 @@ pub fn output_depinfo(build_runner: &mut BuildRunner<'_, '_>, unit: &Unit) -> Ca
// If nothing changed don't recreate the file which could alter
// its mtime
if let Ok(previous) = fingerprint::parse_rustc_dep_info(&output_path) {
if previous.files.iter().eq(deps.iter().map(Path::new)) {
if previous
.files
.iter()
.map(|(path, _checksum)| path)
.eq(deps.iter().map(Path::new))
{
continue;
}
}

View File

@ -760,6 +760,7 @@ unstable_cli_options!(
build_std: Option<Vec<String>> = ("Enable Cargo to compile the standard library itself as part of a crate graph compilation"),
build_std_features: Option<Vec<String>> = ("Configure features enabled for the standard library itself when building the standard library"),
cargo_lints: bool = ("Enable the `[lints.cargo]` table"),
checksum_freshness: bool = ("Use a checksum to determine if output is fresh rather than filesystem mtime"),
codegen_backend: bool = ("Enable the `codegen-backend` option in profiles in .cargo/config.toml file"),
config_include: bool = ("Enable the `include` key in config files"),
direct_minimal_versions: bool = ("Resolve minimal dependency versions instead of maximum (direct dependencies only)"),
@ -1289,6 +1290,7 @@ impl CliUnstable {
"rustdoc-map" => self.rustdoc_map = parse_empty(k, v)?,
"rustdoc-scrape-examples" => self.rustdoc_scrape_examples = parse_empty(k, v)?,
"separate-nightlies" => self.separate_nightlies = parse_empty(k, v)?,
"checksum-freshness" => self.checksum_freshness = parse_empty(k, v)?,
"skip-rustdoc-fingerprint" => self.skip_rustdoc_fingerprint = parse_empty(k, v)?,
"script" => self.script = parse_empty(k, v)?,
"target-applies-to-host" => self.target_applies_to_host = parse_empty(k, v)?,

View File

@ -529,6 +529,17 @@ the crate will be rebuilt). The primary use case is for building the compiler
itself, which has implicit dependencies on the standard library that would
otherwise be untracked for change-detection.
## checksum-freshness
* Tracking issue: [#14136](https://github.com/rust-lang/cargo/issues/14136)
The `-Z checksum-freshness` flag will replace the use of file mtimes in cargo's
fingerprints with a file checksum value. This is most useful on systems with a poor
mtime implementation, or in CI/CD. The checksum algorithm can change without notice
between cargo versions. Fingerprints are used by cargo to determine when a crate needs to be rebuilt.
For the time being files ingested by build script will continue to use mtimes, even when `checksum-freshness`
is enabled. This is not intended as a long term solution.
## panic-abort-tests
* Tracking Issue: [#67650](https://github.com/rust-lang/rust/issues/67650)
* Original Pull Request: [#7460](https://github.com/rust-lang/cargo/pull/7460)

View File

@ -1,4 +1,4 @@
<svg width="1230px" height="722px" xmlns="http://www.w3.org/2000/svg">
<svg width="1230px" height="740px" xmlns="http://www.w3.org/2000/svg">
<style>
.fg { fill: #AAAAAA }
.bg { background: #000000 }
@ -36,63 +36,65 @@
</tspan>
<tspan x="10px" y="190px"><tspan> -Z cargo-lints Enable the `[lints.cargo]` table</tspan>
</tspan>
<tspan x="10px" y="208px"><tspan> -Z codegen-backend Enable the `codegen-backend` option in profiles in .cargo/config.toml file</tspan>
<tspan x="10px" y="208px"><tspan> -Z checksum-freshness Use a checksum to determine if output is fresh rather than filesystem mtime</tspan>
</tspan>
<tspan x="10px" y="226px"><tspan> -Z config-include Enable the `include` key in config files</tspan>
<tspan x="10px" y="226px"><tspan> -Z codegen-backend Enable the `codegen-backend` option in profiles in .cargo/config.toml file</tspan>
</tspan>
<tspan x="10px" y="244px"><tspan> -Z direct-minimal-versions Resolve minimal dependency versions instead of maximum (direct dependencies only)</tspan>
<tspan x="10px" y="244px"><tspan> -Z config-include Enable the `include` key in config files</tspan>
</tspan>
<tspan x="10px" y="262px"><tspan> -Z doctest-xcompile Compile and run doctests for non-host target using runner config</tspan>
<tspan x="10px" y="262px"><tspan> -Z direct-minimal-versions Resolve minimal dependency versions instead of maximum (direct dependencies only)</tspan>
</tspan>
<tspan x="10px" y="280px"><tspan> -Z dual-proc-macros Build proc-macros for both the host and the target</tspan>
<tspan x="10px" y="280px"><tspan> -Z doctest-xcompile Compile and run doctests for non-host target using runner config</tspan>
</tspan>
<tspan x="10px" y="298px"><tspan> -Z gc Track cache usage and "garbage collect" unused files</tspan>
<tspan x="10px" y="298px"><tspan> -Z dual-proc-macros Build proc-macros for both the host and the target</tspan>
</tspan>
<tspan x="10px" y="316px"><tspan> -Z git Enable support for shallow git fetch operations</tspan>
<tspan x="10px" y="316px"><tspan> -Z gc Track cache usage and "garbage collect" unused files</tspan>
</tspan>
<tspan x="10px" y="334px"><tspan> -Z gitoxide Use gitoxide for the given git interactions, or all of them if no argument is given</tspan>
<tspan x="10px" y="334px"><tspan> -Z git Enable support for shallow git fetch operations</tspan>
</tspan>
<tspan x="10px" y="352px"><tspan> -Z host-config Enable the `[host]` section in the .cargo/config.toml file</tspan>
<tspan x="10px" y="352px"><tspan> -Z gitoxide Use gitoxide for the given git interactions, or all of them if no argument is given</tspan>
</tspan>
<tspan x="10px" y="370px"><tspan> -Z minimal-versions Resolve minimal dependency versions instead of maximum</tspan>
<tspan x="10px" y="370px"><tspan> -Z host-config Enable the `[host]` section in the .cargo/config.toml file</tspan>
</tspan>
<tspan x="10px" y="388px"><tspan> -Z msrv-policy Enable rust-version aware policy within cargo</tspan>
<tspan x="10px" y="388px"><tspan> -Z minimal-versions Resolve minimal dependency versions instead of maximum</tspan>
</tspan>
<tspan x="10px" y="406px"><tspan> -Z mtime-on-use Configure Cargo to update the mtime of used files</tspan>
<tspan x="10px" y="406px"><tspan> -Z msrv-policy Enable rust-version aware policy within cargo</tspan>
</tspan>
<tspan x="10px" y="424px"><tspan> -Z no-index-update Do not update the registry index even if the cache is outdated</tspan>
<tspan x="10px" y="424px"><tspan> -Z mtime-on-use Configure Cargo to update the mtime of used files</tspan>
</tspan>
<tspan x="10px" y="442px"><tspan> -Z package-workspace Handle intra-workspace dependencies when packaging</tspan>
<tspan x="10px" y="442px"><tspan> -Z no-index-update Do not update the registry index even if the cache is outdated</tspan>
</tspan>
<tspan x="10px" y="460px"><tspan> -Z panic-abort-tests Enable support to run tests with -Cpanic=abort</tspan>
<tspan x="10px" y="460px"><tspan> -Z package-workspace Handle intra-workspace dependencies when packaging</tspan>
</tspan>
<tspan x="10px" y="478px"><tspan> -Z profile-rustflags Enable the `rustflags` option in profiles in .cargo/config.toml file</tspan>
<tspan x="10px" y="478px"><tspan> -Z panic-abort-tests Enable support to run tests with -Cpanic=abort</tspan>
</tspan>
<tspan x="10px" y="496px"><tspan> -Z public-dependency Respect a dependency's `public` field in Cargo.toml to control public/private dependencies</tspan>
<tspan x="10px" y="496px"><tspan> -Z profile-rustflags Enable the `rustflags` option in profiles in .cargo/config.toml file</tspan>
</tspan>
<tspan x="10px" y="514px"><tspan> -Z publish-timeout Enable the `publish.timeout` key in .cargo/config.toml file</tspan>
<tspan x="10px" y="514px"><tspan> -Z public-dependency Respect a dependency's `public` field in Cargo.toml to control public/private dependencies</tspan>
</tspan>
<tspan x="10px" y="532px"><tspan> -Z rustdoc-map Allow passing external documentation mappings to rustdoc</tspan>
<tspan x="10px" y="532px"><tspan> -Z publish-timeout Enable the `publish.timeout` key in .cargo/config.toml file</tspan>
</tspan>
<tspan x="10px" y="550px"><tspan> -Z rustdoc-scrape-examples Allows Rustdoc to scrape code examples from reverse-dependencies</tspan>
<tspan x="10px" y="550px"><tspan> -Z rustdoc-map Allow passing external documentation mappings to rustdoc</tspan>
</tspan>
<tspan x="10px" y="568px"><tspan> -Z script Enable support for single-file, `.rs` packages</tspan>
<tspan x="10px" y="568px"><tspan> -Z rustdoc-scrape-examples Allows Rustdoc to scrape code examples from reverse-dependencies</tspan>
</tspan>
<tspan x="10px" y="586px"><tspan> -Z target-applies-to-host Enable the `target-applies-to-host` key in the .cargo/config.toml file</tspan>
<tspan x="10px" y="586px"><tspan> -Z script Enable support for single-file, `.rs` packages</tspan>
</tspan>
<tspan x="10px" y="604px"><tspan> -Z trim-paths Enable the `trim-paths` option in profiles</tspan>
<tspan x="10px" y="604px"><tspan> -Z target-applies-to-host Enable the `target-applies-to-host` key in the .cargo/config.toml file</tspan>
</tspan>
<tspan x="10px" y="622px"><tspan> -Z unstable-options Allow the usage of unstable options</tspan>
<tspan x="10px" y="622px"><tspan> -Z trim-paths Enable the `trim-paths` option in profiles</tspan>
</tspan>
<tspan x="10px" y="640px">
<tspan x="10px" y="640px"><tspan> -Z unstable-options Allow the usage of unstable options</tspan>
</tspan>
<tspan x="10px" y="658px"><tspan>Run with `cargo -Z [FLAG] [COMMAND]`</tspan>
<tspan x="10px" y="658px">
</tspan>
<tspan x="10px" y="676px">
<tspan x="10px" y="676px"><tspan>Run with `cargo -Z [FLAG] [COMMAND]`</tspan>
</tspan>
<tspan x="10px" y="694px"><tspan>See https://doc.rust-lang.org/nightly/cargo/reference/unstable.html for more information about these flags.</tspan>
<tspan x="10px" y="694px">
</tspan>
<tspan x="10px" y="712px">
<tspan x="10px" y="712px"><tspan>See https://doc.rust-lang.org/nightly/cargo/reference/unstable.html for more information about these flags.</tspan>
</tspan>
<tspan x="10px" y="730px">
</tspan>
</text>

Before

Width:  |  Height:  |  Size: 5.5 KiB

After

Width:  |  Height:  |  Size: 5.7 KiB

View File

@ -31,10 +31,15 @@ fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[(
let dep_info = &mut &dep_info[..];
let deps = (0..read_usize(dep_info))
.map(|_| {
(
read_u8(dep_info),
str::from_utf8(read_bytes(dep_info)).unwrap(),
)
let ty = read_u8(dep_info);
let path = str::from_utf8(read_bytes(dep_info)).unwrap();
let checksum_present = read_bool(dep_info);
if checksum_present {
// Read out the checksum info without using it
let _file_len = read_u64(dep_info);
let _checksum = read_bytes(dep_info);
}
(ty, path)
})
.collect::<Vec<_>>();
test_cb(&info_path, &deps);
@ -52,6 +57,17 @@ fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[(
ret
}
fn read_bool(bytes: &mut &[u8]) -> bool {
read_u8(bytes) != 0
}
fn read_u64(bytes: &mut &[u8]) -> u64 {
let ret = &bytes[..8];
*bytes = &bytes[8..];
u64::from_le_bytes(ret.try_into().unwrap())
}
fn read_bytes<'a>(bytes: &mut &'a [u8]) -> &'a [u8] {
let n = read_usize(bytes);
let ret = &bytes[..n];

View File

@ -92,6 +92,7 @@ mod fetch;
mod fix;
mod fix_n_times;
mod freshness;
mod freshness_checksum;
mod future_incompat_report;
mod generate_lockfile;
mod git;