Skip to content

Commit

Permalink
Prune unreachable packages from lockfile (astral-sh#6959)
Browse files Browse the repository at this point in the history
In transformers, we have:

* `tensorflow-text`: `tensorflow-macos; python_full_version >= '3.13'
and platform_machine == 'arm64' and platform_system == 'Darwin'`
* `tensorflow-macos`: `tensorflow-cpu-aws; (python_full_version < '3.10'
and platform_machine == 'aarch64' and platform_system == 'Linux') or
(python_full_version >= '3.13' and platform_machine == 'aarch64' and
platform_system == 'Linux') or (python_full_version >= '3.13' and
platform_machine == 'arm64' and platform_system == 'Linux')`
* `tensorflow-macos`: `tensorflow-intel; python_full_version >= '3.13'
and platform_system == 'Windows'`

This means that `tensorflow-cpu-aws` and `tensorflow-intel` can never be
installed, and we can drop them from the lockfile.
  • Loading branch information
konstin authored Sep 4, 2024
1 parent 3d75df6 commit b9f2bd1
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 79 deletions.
20 changes: 20 additions & 0 deletions crates/uv-resolver/src/lock/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use itertools::Itertools;
use petgraph::visit::EdgeRef;
use rustc_hash::{FxHashMap, FxHashSet};
use toml_edit::{value, Array, ArrayOfTables, InlineTable, Item, Table, Value};
use tracing::debug;
use url::Url;

use cache_key::RepositoryUrl;
Expand Down Expand Up @@ -94,6 +95,10 @@ impl Lock {
if !dist.is_base() {
continue;
}
if graph.reachability[&node_index].is_false() {
debug!("Removing unreachable package: `{}`", dist.package_id());
continue;
}
let fork_markers = graph
.fork_markers(dist.name(), &dist.version, dist.dist.version_or_url().url())
.cloned()
Expand All @@ -108,6 +113,10 @@ impl Lock {
else {
continue;
};
// Prune edges leading to unreachable nodes.
if graph.reachability[&edge.target()].is_false() {
continue;
}
let marker = edge.weight().clone();
package.add_dependency(&requires_python, dependency_dist, marker, root)?;
}
Expand All @@ -126,6 +135,9 @@ impl Lock {
let ResolutionGraphNode::Dist(dist) = &graph.petgraph[node_index] else {
continue;
};
if graph.reachability[&node_index].is_false() {
continue;
}
if let Some(extra) = dist.extra.as_ref() {
let id = PackageId::from_annotated_dist(dist, root)?;
let Some(package) = packages.get_mut(&id) else {
Expand All @@ -140,6 +152,10 @@ impl Lock {
else {
continue;
};
// Prune edges leading to unreachable nodes.
if graph.reachability[&edge.target()].is_false() {
continue;
}
let marker = edge.weight().clone();
package.add_optional_dependency(
&requires_python,
Expand All @@ -164,6 +180,10 @@ impl Lock {
else {
continue;
};
// Prune edges leading to unreachable nodes.
if graph.reachability[&edge.target()].is_false() {
continue;
}
let marker = edge.weight().clone();
package.add_dev_dependency(
&requires_python,
Expand Down
105 changes: 98 additions & 7 deletions crates/uv-resolver/src/resolution/graph.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
use indexmap::IndexSet;
use petgraph::{
graph::{Graph, NodeIndex},
Directed, Direction,
};
use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet};

use distribution_types::{
Dist, DistributionMetadata, Name, ResolutionDiagnostic, ResolvedDist, VersionId,
VersionOrUrlRef,
};
use indexmap::IndexSet;
use pep440_rs::{Version, VersionSpecifier};
use pep508_rs::{MarkerEnvironment, MarkerTree, MarkerTreeKind, VerbatimUrl};
use petgraph::prelude::EdgeRef;
use petgraph::{
graph::{Graph, NodeIndex},
Directed, Direction,
};
use pypi_types::{HashDigest, ParsedUrlError, Requirement, VerbatimParsedUrl, Yanked};
use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet};
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use uv_configuration::{Constraints, Overrides};
use uv_distribution::Metadata;
use uv_git::GitResolver;
Expand Down Expand Up @@ -54,6 +57,8 @@ pub struct ResolutionGraph {
/// If there are multiple options for a package, track which fork they belong to so we
/// can write that to the lockfile and later get the correct preference per fork back.
pub(crate) package_markers: FxHashMap<PackageName, MarkersForDistribution>,
/// The markers under which a package is reachable in the dependency tree.
pub(crate) reachability: FxHashMap<NodeIndex, MarkerTree>,
}

#[derive(Debug)]
Expand All @@ -62,6 +67,15 @@ pub(crate) enum ResolutionGraphNode {
Dist(AnnotatedDist),
}

impl Display for ResolutionGraphNode {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
ResolutionGraphNode::Root => f.write_str("root"),
ResolutionGraphNode::Dist(dist) => Display::fmt(dist, f),
}
}
}

#[derive(Eq, PartialEq, Hash)]
struct PackageRef<'a> {
package_name: &'a PackageName,
Expand Down Expand Up @@ -197,6 +211,8 @@ impl ResolutionGraph {
.collect()
};

let reachability = Self::reachability(&petgraph, &fork_markers);

if matches!(resolution_strategy, ResolutionStrategy::Lowest) {
report_missing_lower_bounds(&petgraph, &mut diagnostics);
}
Expand All @@ -211,9 +227,84 @@ impl ResolutionGraph {
overrides: overrides.clone(),
options,
fork_markers,
reachability,
})
}

/// Determine the markers under which a package is reachable in the dependency tree.
///
/// The algorithm is a variant of Dijkstra's algorithm for not totally ordered distances:
/// Whenever we find a shorter distance to a node (a marker that is not a subset of the existing
/// marker), we re-queue the node and update all its children. This implicitly handles cycles,
/// whenever we re-reach a node through a cycle the marker we have is a more
/// specific marker/longer path, so we don't update the node and don't re-queue it.
fn reachability(
petgraph: &Graph<ResolutionGraphNode, MarkerTree>,
fork_markers: &[MarkerTree],
) -> HashMap<NodeIndex, MarkerTree, FxBuildHasher> {
// Note that we build including the virtual packages due to how we propagate markers through
// the graph, even though we then only read the markers for base packages.
let mut reachability = FxHashMap::default();

// Collect the root nodes.
//
// Besides the actual virtual root node, virtual dev dependencies packages are also root
// nodes since the edges don't cover dev dependencies.
let mut queue: Vec<_> = petgraph
.node_indices()
.filter(|node_index| {
petgraph
.edges_directed(*node_index, Direction::Incoming)
.next()
.is_none()
})
.collect();

// The root nodes are always applicable, unless the user has restricted resolver
// environments with `tool.uv.environments`.
let root_markers: MarkerTree = if fork_markers.is_empty() {
MarkerTree::TRUE
} else {
fork_markers
.iter()
.fold(MarkerTree::FALSE, |mut acc, marker| {
acc.or(marker.clone());
acc
})
};
for root_index in &queue {
reachability.insert(*root_index, root_markers.clone());
}

// Propagate all markers through the graph, so that the eventual marker for each node is the
// union of the markers of each path we can reach the node by.
while let Some(parent_index) = queue.pop() {
let marker = reachability[&parent_index].clone();
for child_edge in petgraph.edges_directed(parent_index, Direction::Outgoing) {
// The marker for all paths to the child through the parent.
let mut child_marker = child_edge.weight().clone();
child_marker.and(marker.clone());
match reachability.entry(child_edge.target()) {
Entry::Occupied(mut existing) => {
// If the marker is a subset of the existing marker (A ⊆ B exactly if
// A ∪ B = A), updating the child wouldn't change child's marker.
child_marker.or(existing.get().clone());
if &child_marker != existing.get() {
existing.insert(child_marker);
queue.push(child_edge.target());
}
}
Entry::Vacant(vacant) => {
vacant.insert(child_marker.clone());
queue.push(child_edge.target());
}
}
}
}

reachability
}

fn add_edge(
petgraph: &mut Graph<ResolutionGraphNode, MarkerTree>,
inverse: &mut FxHashMap<PackageRef<'_>, NodeIndex>,
Expand Down
Loading

0 comments on commit b9f2bd1

Please sign in to comment.