Skip to content

Commit

Permalink
Auto merge of rust-lang#14137 - Xaeroxe:checksum-freshness, r=weihanglo
Browse files Browse the repository at this point in the history
initial version of checksum based freshness

Implementation for rust-lang#14136 and resolves rust-lang#6529

This PR implements the use of checksums in cargo fingerprints as an alternative to using mtimes. This is most useful on systems with poor mtime implementations.

This has a dependency on rust-lang/rust#126930. It's expected this will increase the time it takes to declare a build to be fresh. Still this loss in performance may be preferable to the issues the ecosystem has had with the use of mtimes for determining freshness.
  • Loading branch information
bors committed Oct 8, 2024
2 parents ac39e69 + cf893c1 commit 15fbd2f
Show file tree
Hide file tree
Showing 15 changed files with 3,668 additions and 172 deletions.
33 changes: 33 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ anstream = "0.6.15"
anstyle = "1.0.8"
anyhow = "1.0.86"
base64 = "0.22.1"
blake3 = "1.5.2"
bytesize = "1.3"
cargo = { path = "" }
cargo-credential = { version = "0.4.2", path = "credential/cargo-credential" }
Expand Down Expand Up @@ -148,6 +149,7 @@ anstream.workspace = true
anstyle.workspace = true
anyhow.workspace = true
base64.workspace = true
blake3.workspace = true
bytesize.workspace = true
cargo-credential.workspace = true
cargo-platform.workspace = true
Expand Down Expand Up @@ -197,6 +199,7 @@ shell-escape.workspace = true
supports-hyperlinks.workspace = true
tar.workspace = true
tempfile.workspace = true
thiserror.workspace = true
time.workspace = true
toml.workspace = true
toml_edit.workspace = true
Expand Down
79 changes: 79 additions & 0 deletions crates/cargo-test-support/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1582,3 +1582,82 @@ where
let thread = std::thread::spawn(|| f());
thread_wait_timeout(n, thread)
}

// Helper for testing dep-info files in the fingerprint dir.
#[track_caller]
pub fn assert_deps(project: &Project, fingerprint: &str, test_cb: impl Fn(&Path, &[(u8, &str)])) {
let mut files = project
.glob(fingerprint)
.map(|f| f.expect("unwrap glob result"))
// Filter out `.json` entries.
.filter(|f| f.extension().is_none());
let info_path = files
.next()
.unwrap_or_else(|| panic!("expected 1 dep-info file at {}, found 0", fingerprint));
assert!(files.next().is_none(), "expected only 1 dep-info file");
let dep_info = fs::read(&info_path).unwrap();
let dep_info = &mut &dep_info[..];
let deps = (0..read_usize(dep_info))
.map(|_| {
let ty = read_u8(dep_info);
let path = std::str::from_utf8(read_bytes(dep_info)).unwrap();
let checksum_present = read_bool(dep_info);
if checksum_present {
// Read out the checksum info without using it
let _file_len = read_u64(dep_info);
let _checksum = read_bytes(dep_info);
}
(ty, path)
})
.collect::<Vec<_>>();
test_cb(&info_path, &deps);

fn read_usize(bytes: &mut &[u8]) -> usize {
let ret = &bytes[..4];
*bytes = &bytes[4..];

u32::from_le_bytes(ret.try_into().unwrap()) as usize
}

fn read_u8(bytes: &mut &[u8]) -> u8 {
let ret = bytes[0];
*bytes = &bytes[1..];
ret
}

fn read_bool(bytes: &mut &[u8]) -> bool {
read_u8(bytes) != 0
}

fn read_u64(bytes: &mut &[u8]) -> u64 {
let ret = &bytes[..8];
*bytes = &bytes[8..];

u64::from_le_bytes(ret.try_into().unwrap())
}

fn read_bytes<'a>(bytes: &mut &'a [u8]) -> &'a [u8] {
let n = read_usize(bytes);
let ret = &bytes[..n];
*bytes = &bytes[n..];
ret
}
}

pub fn assert_deps_contains(project: &Project, fingerprint: &str, expected: &[(u8, &str)]) {
assert_deps(project, fingerprint, |info_path, entries| {
for (e_kind, e_path) in expected {
let pattern = glob::Pattern::new(e_path).unwrap();
let count = entries
.iter()
.filter(|(kind, path)| kind == e_kind && pattern.matches(path))
.count();
if count != 1 {
panic!(
"Expected 1 match of {} {} in {:?}, got {}:\n{:#?}",
e_kind, e_path, info_path, count, entries
);
}
}
})
}
1 change: 1 addition & 0 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ allow = [
"MIT",
"MIT-0",
"Apache-2.0",
"BSD-2-Clause",
"BSD-3-Clause",
"MPL-2.0",
"Unicode-DFS-2016",
Expand Down
5 changes: 4 additions & 1 deletion src/cargo/core/compiler/build_runner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use jobserver::Client;

use super::build_plan::BuildPlan;
use super::custom_build::{self, BuildDeps, BuildScriptOutputs, BuildScripts};
use super::fingerprint::Fingerprint;
use super::fingerprint::{Checksum, Fingerprint};
use super::job_queue::JobQueue;
use super::layout::Layout;
use super::lto::Lto;
Expand Down Expand Up @@ -50,6 +50,8 @@ pub struct BuildRunner<'a, 'gctx> {
pub fingerprints: HashMap<Unit, Arc<Fingerprint>>,
/// Cache of file mtimes to reduce filesystem hits.
pub mtime_cache: HashMap<PathBuf, FileTime>,
/// Cache of file checksums to reduce filesystem reads.
pub checksum_cache: HashMap<PathBuf, Checksum>,
/// A set used to track which units have been compiled.
/// A unit may appear in the job graph multiple times as a dependency of
/// multiple packages, but it only needs to run once.
Expand Down Expand Up @@ -113,6 +115,7 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
build_script_outputs: Arc::new(Mutex::new(BuildScriptOutputs::default())),
fingerprints: HashMap::new(),
mtime_cache: HashMap::new(),
checksum_cache: HashMap::new(),
compiled: HashSet::new(),
build_scripts: HashMap::new(),
build_explicit_deps: HashMap::new(),
Expand Down
62 changes: 62 additions & 0 deletions src/cargo/core/compiler/fingerprint/dirty_reason.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ pub enum DirtyReason {
old: String,
new: String,
},
ChecksumUseChanged {
old: bool,
},
DepInfoOutputChanged {
old: PathBuf,
new: PathBuf,
Expand Down Expand Up @@ -183,6 +186,16 @@ impl DirtyReason {
DirtyReason::PrecalculatedComponentsChanged { .. } => {
s.dirty_because(unit, "the precalculated components changed")
}
DirtyReason::ChecksumUseChanged { old } => {
if *old {
s.dirty_because(
unit,
"the prior compilation used checksum freshness and this one does not",
)
} else {
s.dirty_because(unit, "checksum freshness requested, prior compilation did not use checksum freshness")
}
}
DirtyReason::DepInfoOutputChanged { .. } => {
s.dirty_because(unit, "the dependency info output changed")
}
Expand Down Expand Up @@ -222,6 +235,20 @@ impl DirtyReason {
format_args!("the file `{}` is missing", file.display()),
)
}
StaleItem::UnableToReadFile(file) => {
let file = file.strip_prefix(root).unwrap_or(&file);
s.dirty_because(
unit,
format_args!("the file `{}` could not be read", file.display()),
)
}
StaleItem::FailedToReadMetadata(file) => {
let file = file.strip_prefix(root).unwrap_or(&file);
s.dirty_because(
unit,
format_args!("couldn't read metadata for file `{}`", file.display()),
)
}
StaleItem::ChangedFile {
stale,
stale_mtime,
Expand All @@ -235,6 +262,41 @@ impl DirtyReason {
format_args!("the file `{}` has changed ({after})", file.display()),
)
}
StaleItem::ChangedChecksum {
source,
stored_checksum,
new_checksum,
} => {
let file = source.strip_prefix(root).unwrap_or(&source);
s.dirty_because(
unit,
format_args!(
"the file `{}` has changed (checksum didn't match, {stored_checksum} != {new_checksum})",
file.display(),
),
)
}
StaleItem::FileSizeChanged {
path,
old_size,
new_size,
} => {
let file = path.strip_prefix(root).unwrap_or(&path);
s.dirty_because(
unit,
format_args!(
"file size changed ({old_size} != {new_size}) for `{}`",
file.display()
),
)
}
StaleItem::MissingChecksum(path) => {
let file = path.strip_prefix(root).unwrap_or(&path);
s.dirty_because(
unit,
format_args!("the checksum for file `{}` is missing", file.display()),
)
}
StaleItem::ChangedEnv { var, .. } => s.dirty_because(
unit,
format_args!("the environment variable {var} changed"),
Expand Down
Loading

0 comments on commit 15fbd2f

Please sign in to comment.