From b9f2bd155aed2cf0688b3ccb5c5c07dd07dabae5 Mon Sep 17 00:00:00 2001 From: konsti Date: Wed, 4 Sep 2024 10:57:42 +0200 Subject: [PATCH] Prune unreachable packages from lockfile (#6959) In transformers, we have: * `tensorflow-text`: `tensorflow-macos; python_full_version >= '3.13' and platform_machine == 'arm64' and platform_system == 'Darwin'` * `tensorflow-macos`: `tensorflow-cpu-aws; (python_full_version < '3.10' and platform_machine == 'aarch64' and platform_system == 'Linux') or (python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_system == 'Linux') or (python_full_version >= '3.13' and platform_machine == 'arm64' and platform_system == 'Linux')` * `tensorflow-macos`: `tensorflow-intel; python_full_version >= '3.13' and platform_system == 'Windows'` This means that `tensorflow-cpu-aws` and `tensorflow-intel` can never be installed, and we can drop them from the lockfile. --- crates/uv-resolver/src/lock/mod.rs | 20 ++++ crates/uv-resolver/src/resolution/graph.rs | 105 ++++++++++++++++-- .../ecosystem__transformers-lock-file.snap | 72 ------------ 3 files changed, 118 insertions(+), 79 deletions(-) diff --git a/crates/uv-resolver/src/lock/mod.rs b/crates/uv-resolver/src/lock/mod.rs index dab7eb54c026..5da3864856e4 100644 --- a/crates/uv-resolver/src/lock/mod.rs +++ b/crates/uv-resolver/src/lock/mod.rs @@ -11,6 +11,7 @@ use itertools::Itertools; use petgraph::visit::EdgeRef; use rustc_hash::{FxHashMap, FxHashSet}; use toml_edit::{value, Array, ArrayOfTables, InlineTable, Item, Table, Value}; +use tracing::debug; use url::Url; use cache_key::RepositoryUrl; @@ -94,6 +95,10 @@ impl Lock { if !dist.is_base() { continue; } + if graph.reachability[&node_index].is_false() { + debug!("Removing unreachable package: `{}`", dist.package_id()); + continue; + } let fork_markers = graph .fork_markers(dist.name(), &dist.version, dist.dist.version_or_url().url()) .cloned() @@ -108,6 +113,10 @@ impl Lock { else { continue; }; + // Prune edges leading to unreachable nodes. + if graph.reachability[&edge.target()].is_false() { + continue; + } let marker = edge.weight().clone(); package.add_dependency(&requires_python, dependency_dist, marker, root)?; } @@ -126,6 +135,9 @@ impl Lock { let ResolutionGraphNode::Dist(dist) = &graph.petgraph[node_index] else { continue; }; + if graph.reachability[&node_index].is_false() { + continue; + } if let Some(extra) = dist.extra.as_ref() { let id = PackageId::from_annotated_dist(dist, root)?; let Some(package) = packages.get_mut(&id) else { @@ -140,6 +152,10 @@ impl Lock { else { continue; }; + // Prune edges leading to unreachable nodes. + if graph.reachability[&edge.target()].is_false() { + continue; + } let marker = edge.weight().clone(); package.add_optional_dependency( &requires_python, @@ -164,6 +180,10 @@ impl Lock { else { continue; }; + // Prune edges leading to unreachable nodes. + if graph.reachability[&edge.target()].is_false() { + continue; + } let marker = edge.weight().clone(); package.add_dev_dependency( &requires_python, diff --git a/crates/uv-resolver/src/resolution/graph.rs b/crates/uv-resolver/src/resolution/graph.rs index 75da6a43bce7..97d637a6c43a 100644 --- a/crates/uv-resolver/src/resolution/graph.rs +++ b/crates/uv-resolver/src/resolution/graph.rs @@ -1,17 +1,20 @@ -use indexmap::IndexSet; -use petgraph::{ - graph::{Graph, NodeIndex}, - Directed, Direction, -}; -use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet}; - use distribution_types::{ Dist, DistributionMetadata, Name, ResolutionDiagnostic, ResolvedDist, VersionId, VersionOrUrlRef, }; +use indexmap::IndexSet; use pep440_rs::{Version, VersionSpecifier}; use pep508_rs::{MarkerEnvironment, MarkerTree, MarkerTreeKind, VerbatimUrl}; +use petgraph::prelude::EdgeRef; +use petgraph::{ + graph::{Graph, NodeIndex}, + Directed, Direction, +}; use pypi_types::{HashDigest, ParsedUrlError, Requirement, VerbatimParsedUrl, Yanked}; +use rustc_hash::{FxBuildHasher, FxHashMap, FxHashSet}; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::fmt::{Display, Formatter}; use uv_configuration::{Constraints, Overrides}; use uv_distribution::Metadata; use uv_git::GitResolver; @@ -54,6 +57,8 @@ pub struct ResolutionGraph { /// If there are multiple options for a package, track which fork they belong to so we /// can write that to the lockfile and later get the correct preference per fork back. pub(crate) package_markers: FxHashMap, + /// The markers under which a package is reachable in the dependency tree. + pub(crate) reachability: FxHashMap, } #[derive(Debug)] @@ -62,6 +67,15 @@ pub(crate) enum ResolutionGraphNode { Dist(AnnotatedDist), } +impl Display for ResolutionGraphNode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + ResolutionGraphNode::Root => f.write_str("root"), + ResolutionGraphNode::Dist(dist) => Display::fmt(dist, f), + } + } +} + #[derive(Eq, PartialEq, Hash)] struct PackageRef<'a> { package_name: &'a PackageName, @@ -197,6 +211,8 @@ impl ResolutionGraph { .collect() }; + let reachability = Self::reachability(&petgraph, &fork_markers); + if matches!(resolution_strategy, ResolutionStrategy::Lowest) { report_missing_lower_bounds(&petgraph, &mut diagnostics); } @@ -211,9 +227,84 @@ impl ResolutionGraph { overrides: overrides.clone(), options, fork_markers, + reachability, }) } + /// Determine the markers under which a package is reachable in the dependency tree. + /// + /// The algorithm is a variant of Dijkstra's algorithm for not totally ordered distances: + /// Whenever we find a shorter distance to a node (a marker that is not a subset of the existing + /// marker), we re-queue the node and update all its children. This implicitly handles cycles, + /// whenever we re-reach a node through a cycle the marker we have is a more + /// specific marker/longer path, so we don't update the node and don't re-queue it. + fn reachability( + petgraph: &Graph, + fork_markers: &[MarkerTree], + ) -> HashMap { + // Note that we build including the virtual packages due to how we propagate markers through + // the graph, even though we then only read the markers for base packages. + let mut reachability = FxHashMap::default(); + + // Collect the root nodes. + // + // Besides the actual virtual root node, virtual dev dependencies packages are also root + // nodes since the edges don't cover dev dependencies. + let mut queue: Vec<_> = petgraph + .node_indices() + .filter(|node_index| { + petgraph + .edges_directed(*node_index, Direction::Incoming) + .next() + .is_none() + }) + .collect(); + + // The root nodes are always applicable, unless the user has restricted resolver + // environments with `tool.uv.environments`. + let root_markers: MarkerTree = if fork_markers.is_empty() { + MarkerTree::TRUE + } else { + fork_markers + .iter() + .fold(MarkerTree::FALSE, |mut acc, marker| { + acc.or(marker.clone()); + acc + }) + }; + for root_index in &queue { + reachability.insert(*root_index, root_markers.clone()); + } + + // Propagate all markers through the graph, so that the eventual marker for each node is the + // union of the markers of each path we can reach the node by. + while let Some(parent_index) = queue.pop() { + let marker = reachability[&parent_index].clone(); + for child_edge in petgraph.edges_directed(parent_index, Direction::Outgoing) { + // The marker for all paths to the child through the parent. + let mut child_marker = child_edge.weight().clone(); + child_marker.and(marker.clone()); + match reachability.entry(child_edge.target()) { + Entry::Occupied(mut existing) => { + // If the marker is a subset of the existing marker (A ⊆ B exactly if + // A ∪ B = A), updating the child wouldn't change child's marker. + child_marker.or(existing.get().clone()); + if &child_marker != existing.get() { + existing.insert(child_marker); + queue.push(child_edge.target()); + } + } + Entry::Vacant(vacant) => { + vacant.insert(child_marker.clone()); + queue.push(child_edge.target()); + } + } + } + } + + reachability + } + fn add_edge( petgraph: &mut Graph, inverse: &mut FxHashMap, NodeIndex>, diff --git a/crates/uv/tests/snapshots/ecosystem__transformers-lock-file.snap b/crates/uv/tests/snapshots/ecosystem__transformers-lock-file.snap index 59aec34fd392..c59427a9d414 100644 --- a/crates/uv/tests/snapshots/ecosystem__transformers-lock-file.snap +++ b/crates/uv/tests/snapshots/ecosystem__transformers-lock-file.snap @@ -4793,40 +4793,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bd/cb/1358e60835aad684311cfab10e36375c09a8a627ed22f357ddc9f0556ca3/tensorflow_cpu-2.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:951d78693b61239464bee5ae9c20b6c845d82ae0a2092ee5abebb96b5e2db02e", size = 2133 }, ] -[[package]] -name = "tensorflow-cpu-aws" -version = "2.15.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "absl-py", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "astunparse", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "flatbuffers", version = "24.3.25", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "gast", version = "0.6.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "google-pasta", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "grpcio", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "h5py", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "keras", version = "2.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "libclang", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "ml-dtypes", version = "0.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "numpy", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "opt-einsum", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "packaging", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "protobuf", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "setuptools", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "six", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "tensorboard", version = "2.15.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "tensorflow-estimator", version = "2.15.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "tensorflow-io-gcs-filesystem", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "termcolor", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "typing-extensions", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, - { name = "wrapt", marker = "python_full_version >= '3.13' and platform_system != 'Darwin'" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/9c/74e7c37e2de31fb5ada8f3a166ceedacdb99fc9bcd88f606ec97bfc2b22e/tensorflow_cpu_aws-2.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c781d95cb8c58d47cb012b7b4e77b2f3e8d4d47b45926bc54976506fa0c037cc", size = 211831219 }, - { url = "https://files.pythonhosted.org/packages/06/d5/05cd02db299821fd68ef5f8857506c21aeeddd024daf519d8643f0260952/tensorflow_cpu_aws-2.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4c3a3a9363bf42999adedbbd514e3a133be2d62f61fee9cfa46aaefb087c09e", size = 211874120 }, - { url = "https://files.pythonhosted.org/packages/e0/b2/44b4492303ea458f1c97d1c5ebd412dd799827f6fafd7938dd45be8f70a6/tensorflow_cpu_aws-2.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9a25f2b9da4740074fdd89bd2a4cf280a9d40b1d26a973ef079e6673c1bf7de", size = 211831639 }, -] - [[package]] name = "tensorflow-estimator" version = "2.7.0" @@ -4876,40 +4842,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/50/00dba77925bf2a0a1e45d7bcf8a69a1d2534fb4bb277d9010bd148d2235e/tensorflow_hub-0.16.1-py2.py3-none-any.whl", hash = "sha256:e10c184b3d08daeafada11ffea2dd46781725b6bef01fad1f74d6634ad05311f", size = 30771 }, ] -[[package]] -name = "tensorflow-intel" -version = "2.15.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "absl-py", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "astunparse", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "flatbuffers", version = "24.3.25", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "gast", version = "0.6.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "google-pasta", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "grpcio", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "h5py", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "keras", version = "2.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "libclang", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "ml-dtypes", version = "0.3.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "numpy", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "opt-einsum", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "packaging", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "protobuf", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "setuptools", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "six", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "tensorboard", version = "2.15.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "tensorflow-estimator", version = "2.15.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "tensorflow-io-gcs-filesystem", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "termcolor", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "typing-extensions", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, - { name = "wrapt", marker = "(python_full_version >= '3.13' and platform_machine != 'aarch64' and platform_system != 'Darwin') or (python_full_version >= '3.13' and platform_system != 'Darwin' and platform_system != 'Linux')" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/02/13/681487f4f5241d8213fbf3f8940988054d97e213fcc3390921682dfc691f/tensorflow_intel-2.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f305142b3c5e239c82c463429b1f88726dd27d9f23523871f825493a9ffc5f4", size = 300872148 }, - { url = "https://files.pythonhosted.org/packages/90/32/d0ec8fe173e8e1c38cd13d23d640c46232d211fbd6f3485d17a1950f3c38/tensorflow_intel-2.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:4f05059493f8203285ac5cea3b1955887a7903c1ca6f7a29e4b6ef912b1f934b", size = 300918101 }, - { url = "https://files.pythonhosted.org/packages/a9/61/c746e82becb7f14aac327220d5dd1a086b94b605cfab24b3e5991fa26cdf/tensorflow_intel-2.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:921f18f7eb9cf59769e9668b3935fe178c990e2973d8013870dae5e3b58de079", size = 300800813 }, -] - [[package]] name = "tensorflow-io-gcs-filesystem" version = "0.37.1" @@ -4937,10 +4869,6 @@ wheels = [ name = "tensorflow-macos" version = "2.15.1" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "tensorflow-cpu-aws", marker = "(python_full_version >= '3.13' and platform_machine == 'aarch64' and platform_system == 'Linux') or (python_full_version >= '3.13' and platform_machine == 'arm64' and platform_system == 'Linux')" }, - { name = "tensorflow-intel", marker = "python_full_version >= '3.13' and platform_system == 'Windows'" }, -] wheels = [ { url = "https://files.pythonhosted.org/packages/b3/c8/b90dc41b1eefc2894801a120cf268b1f25440981fcf966fb055febce8348/tensorflow_macos-2.15.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:b8f01d7615fe4ff3b15a12f84471bd5344fed187543c4a091da3ddca51b6dc26", size = 2158 }, { url = "https://files.pythonhosted.org/packages/bc/11/b73387ad260614ec43c313a630d14fe5522455084abc207fce864aaa3d73/tensorflow_macos-2.15.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:58fca6399665f19e599c591c421672d9bc8b705409d43ececd0931d1d3bc6a7e", size = 2159 },