Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve resolver speed again #14665

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ libgit2-sys = "0.17.0"
libloading = "0.8.5"
memchr = "2.7.4"
miow = "0.6.0"
nohash-hasher = "0.2.0"
opener = "0.7.1"
openssl = "=0.10.57" # See rust-lang/cargo#13546 and openssl/openssl#23376 for pinning
openssl-sys = "=0.9.92" # See rust-lang/cargo#13546 and openssl/openssl#23376 for pinning
Expand Down Expand Up @@ -122,6 +123,7 @@ private_intra_doc_links = "allow"
[workspace.lints.clippy]
all = { level = "allow", priority = -1 }
dbg_macro = "warn"
derive_ord_xor_partial_ord = "warn"
disallowed_methods = "warn"
print_stderr = "warn"
print_stdout = "warn"
Expand Down Expand Up @@ -181,6 +183,7 @@ jobserver.workspace = true
lazycell.workspace = true
libgit2-sys.workspace = true
memchr.workspace = true
nohash-hasher.workspace = true
opener.workspace = true
os_info.workspace = true
pasetors.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion crates/resolver-tests/src/sat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ impl SatResolver {
&mut solver,
var_for_is_packages_used
.iter()
.map(|(p, &v)| (p.as_activations_key(), v)),
.map(|(p, &v)| (p.activation_key(), v)),
);

for pkg in by_name.values().flatten() {
Expand Down
76 changes: 76 additions & 0 deletions src/cargo/core/activation_key.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use std::collections::HashSet;
use std::hash::{Hash, Hasher};
use std::num::NonZeroU64;
use std::sync::{Mutex, OnceLock};

use crate::core::SourceId;
use crate::util::interning::InternedString;

static ACTIVATION_KEY_CACHE: OnceLock<Mutex<HashSet<&'static ActivationKeyInner>>> =
OnceLock::new();

type ActivationKeyInner = (InternedString, SourceId, SemverCompatibility);

/// The activated version of a crate is based on the name, source, and semver compatibility.
#[derive(Clone, Copy, Eq)]
pub struct ActivationKey {
inner: &'static ActivationKeyInner,
}

impl From<ActivationKeyInner> for ActivationKey {
fn from(inner: ActivationKeyInner) -> Self {
let mut cache = ACTIVATION_KEY_CACHE
.get_or_init(|| Default::default())
.lock()
.unwrap();
let inner = cache.get(&inner).cloned().unwrap_or_else(|| {
let inner = Box::leak(Box::new(inner));
cache.insert(inner);
inner
});
Self { inner }
}
}

impl ActivationKey {
/// This function is used for the `Eq` and `Hash` impls to implement a "no hash" hashable value.
/// This is possible since all `ActivationKey` are already interned in a `HashSet`.
fn key(&self) -> u64 {
std::ptr::from_ref(self.inner) as u64
}
}

impl PartialEq for ActivationKey {
fn eq(&self, other: &Self) -> bool {
self.key() == other.key()
}
}

impl nohash_hasher::IsEnabled for ActivationKey {}

impl Hash for ActivationKey {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u64(self.key());
}
}

/// A type that represents when cargo treats two versions as compatible.
/// Versions `a` and `b` are compatible if their left-most nonzero digit is the same.
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)]
pub enum SemverCompatibility {
Major(NonZeroU64),
Minor(NonZeroU64),
Patch(u64),
}

impl From<&semver::Version> for SemverCompatibility {
fn from(ver: &semver::Version) -> Self {
if let Some(m) = NonZeroU64::new(ver.major) {
return SemverCompatibility::Major(m);
}
if let Some(m) = NonZeroU64::new(ver.minor) {
return SemverCompatibility::Minor(m);
}
SemverCompatibility::Patch(ver.patch)
}
}
2 changes: 2 additions & 0 deletions src/cargo/core/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub use self::activation_key::ActivationKey;
pub use self::dependency::{Dependency, SerializedDependency};
pub use self::features::{CliUnstable, Edition, Feature, Features};
pub use self::manifest::{EitherManifest, VirtualManifest};
Expand All @@ -16,6 +17,7 @@ pub use self::workspace::{
};
pub use cargo_util_schemas::core::{GitReference, PackageIdSpec, SourceKind};

pub mod activation_key;
pub mod compiler;
pub mod dependency;
pub mod features;
Expand Down
26 changes: 25 additions & 1 deletion src/cargo/core/package_id.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::cmp::Ordering;
use std::collections::HashSet;
use std::fmt::{self, Formatter};
use std::hash;
Expand All @@ -10,6 +11,7 @@ use std::sync::OnceLock;
use serde::de;
use serde::ser;

use crate::core::ActivationKey;
use crate::core::PackageIdSpec;
use crate::core::SourceId;
use crate::util::interning::InternedString;
Expand All @@ -23,13 +25,31 @@ pub struct PackageId {
inner: &'static PackageIdInner,
}

#[derive(PartialOrd, Eq, Ord)]
struct PackageIdInner {
name: InternedString,
version: semver::Version,
source_id: SourceId,
// This field is used as a cache to improve the resolver speed,
// and is not included in the `Eq`, `Hash` and `Ord` impls.
activation_key: ActivationKey,
}

impl Ord for PackageIdInner {
fn cmp(&self, other: &Self) -> Ordering {
let self_key = (self.name, &self.version, self.source_id);
let other_key = (other.name, &other.version, other.source_id);
self_key.cmp(&other_key)
}
}

impl PartialOrd for PackageIdInner {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(&other))
}
}

impl Eq for PackageIdInner {}

// Custom equality that uses full equality of SourceId, rather than its custom equality.
//
// The `build` part of the version is usually ignored (like a "comment").
Expand Down Expand Up @@ -135,6 +155,7 @@ impl PackageId {

pub fn new(name: InternedString, version: semver::Version, source_id: SourceId) -> PackageId {
let inner = PackageIdInner {
activation_key: (name, source_id, (&version).into()).into(),
name,
version,
source_id,
Expand All @@ -160,6 +181,9 @@ impl PackageId {
pub fn source_id(self) -> SourceId {
self.inner.source_id
}
pub fn activation_key(self) -> ActivationKey {
self.inner.activation_key
}

pub fn with_source_id(self, source: SourceId) -> PackageId {
PackageId::new(self.inner.name, self.inner.version.clone(), source)
Expand Down
67 changes: 20 additions & 47 deletions src/cargo/core/resolver/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ use super::dep_cache::RegistryQueryer;
use super::errors::ActivateResult;
use super::types::{ConflictMap, ConflictReason, FeaturesSet, ResolveOpts};
use super::RequestedFeatures;
use crate::core::{Dependency, PackageId, SourceId, Summary};
use crate::core::{ActivationKey, Dependency, PackageId, Summary};
use crate::util::interning::InternedString;
use crate::util::Graph;
use anyhow::format_err;
use std::collections::HashMap;
use std::num::NonZeroU64;
use tracing::debug;

// A `Context` is basically a bunch of local resolution information which is
Expand All @@ -22,56 +21,26 @@ pub struct ResolverContext {
pub resolve_features: im_rc::HashMap<PackageId, FeaturesSet, rustc_hash::FxBuildHasher>,
/// get the package that will be linking to a native library by its links attribute
pub links: im_rc::HashMap<InternedString, PackageId, rustc_hash::FxBuildHasher>,

/// a way to look up for a package in activations what packages required it
/// and all of the exact deps that it fulfilled.
pub parents: Graph<PackageId, im_rc::HashSet<Dependency, rustc_hash::FxBuildHasher>>,
}

/// By storing activation keys in a `HashMap` we ensure that there is only one
/// semver compatible version of each crate.
type Activations = im_rc::HashMap<
ActivationKey,
(Summary, ContextAge),
nohash_hasher::BuildNoHashHasher<ActivationKey>,
Copy link
Contributor

@Eh2406 Eh2406 Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self: This PR does a number of different things, and I'm trying to understand which ones are critical to the performance win. I tried switching this one nohash_hasher::BuildNoHashHasher back to rustc_hash::FxBuildHasher, and the Solana only benchmark I was running yesterday runs in 162.53min (or 164.63min there seems to be a surprising amount of run to run variability).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it seems that rustc-hash is already very optimized when hashing a single u64:
https://github.com/rust-lang/rustc-hash/blob/master/src/lib.rs#L99.

In this case the performance increase may only be due to fact that we don't hash the full ActivationKeyInner but only its address.

>;

/// When backtracking it can be useful to know how far back to go.
/// The `ContextAge` of a `Context` is a monotonically increasing counter of the number
/// of decisions made to get to this state.
/// Several structures store the `ContextAge` when it was added,
/// to be used in `find_candidate` for backtracking.
pub type ContextAge = usize;

/// Find the activated version of a crate based on the name, source, and semver compatibility.
/// By storing this in a hash map we ensure that there is only one
/// semver compatible version of each crate.
/// This all so stores the `ContextAge`.
pub type ActivationsKey = (InternedString, SourceId, SemverCompatibility);

pub type Activations =
im_rc::HashMap<ActivationsKey, (Summary, ContextAge), rustc_hash::FxBuildHasher>;

/// A type that represents when cargo treats two Versions as compatible.
/// Versions `a` and `b` are compatible if their left-most nonzero digit is the
/// same.
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug, PartialOrd, Ord)]
pub enum SemverCompatibility {
Major(NonZeroU64),
Minor(NonZeroU64),
Patch(u64),
}

impl From<&semver::Version> for SemverCompatibility {
fn from(ver: &semver::Version) -> Self {
if let Some(m) = NonZeroU64::new(ver.major) {
return SemverCompatibility::Major(m);
}
if let Some(m) = NonZeroU64::new(ver.minor) {
return SemverCompatibility::Minor(m);
}
SemverCompatibility::Patch(ver.patch)
}
}

impl PackageId {
pub fn as_activations_key(self) -> ActivationsKey {
(self.name(), self.source_id(), self.version().into())
}
}

impl ResolverContext {
pub fn new() -> ResolverContext {
ResolverContext {
Expand All @@ -98,7 +67,7 @@ impl ResolverContext {
) -> ActivateResult<bool> {
let id = summary.package_id();
let age: ContextAge = self.age;
match self.activations.entry(id.as_activations_key()) {
match self.activations.entry(id.activation_key()) {
im_rc::hashmap::Entry::Occupied(o) => {
debug_assert_eq!(
&o.get().0,
Expand Down Expand Up @@ -137,7 +106,7 @@ impl ResolverContext {
// versions came from a `[patch]` source.
if let Some((_, dep)) = parent {
if dep.source_id() != id.source_id() {
let key = (id.name(), dep.source_id(), id.version().into());
let key = (id.name(), dep.source_id(), id.version().into()).into();
let prev = self.activations.insert(key, (summary.clone(), age));
if let Some((previous_summary, _)) = prev {
return Err(
Expand Down Expand Up @@ -181,9 +150,13 @@ impl ResolverContext {

/// If the package is active returns the `ContextAge` when it was added
pub fn is_active(&self, id: PackageId) -> Option<ContextAge> {
self.activations
.get(&id.as_activations_key())
.and_then(|(s, l)| if s.package_id() == id { Some(*l) } else { None })
let (summary, age) = self.activations.get(&id.activation_key())?;

if summary.package_id() == id {
Some(*age)
} else {
None
}
}

/// Checks whether all of `parent` and the keys of `conflicting activations`
Expand All @@ -199,8 +172,8 @@ impl ResolverContext {
max = std::cmp::max(max, self.is_active(parent)?);
}

for id in conflicting_activations.keys() {
max = std::cmp::max(max, self.is_active(*id)?);
for &id in conflicting_activations.keys() {
max = std::cmp::max(max, self.is_active(id)?);
}
Some(max)
}
Expand Down
5 changes: 2 additions & 3 deletions src/cargo/core/resolver/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,7 @@ fn activate_deps_loop(
let mut backtrack_stack = Vec::new();
let mut remaining_deps = RemainingDeps::new();

// `past_conflicting_activations` is a cache of the reasons for each time we
// backtrack.
// `past_conflicting_activations` is a cache of the reasons for each time we backtrack.
let mut past_conflicting_activations = conflict_cache::ConflictCache::new();

// Activate all the initial summaries to kick off some work.
Expand Down Expand Up @@ -775,7 +774,7 @@ impl RemainingCandidates {
//
// Here we throw out our candidate if it's *compatible*, yet not
// equal, to all previously activated versions.
if let Some((a, _)) = cx.activations.get(&b_id.as_activations_key()) {
if let Some((a, _)) = cx.activations.get(&b_id.activation_key()) {
if *a != b {
conflicting_prev_active
.entry(a.package_id())
Expand Down