diff --git a/docs/source/api.rst b/docs/source/api.rst index 6dc8117e8..339424a33 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -155,6 +155,7 @@ Connectivity and Cycles retworkx.chain_decomposition retworkx.all_simple_paths retworkx.all_pairs_all_simple_paths + retworkx.densest_subgraph_of_size .. _graph-ops: diff --git a/releasenotes/notes/densest_subgraph-1b068f69f80facd4.yaml b/releasenotes/notes/densest_subgraph-1b068f69f80facd4.yaml new file mode 100644 index 000000000..cfa209d12 --- /dev/null +++ b/releasenotes/notes/densest_subgraph-1b068f69f80facd4.yaml @@ -0,0 +1,31 @@ +--- +features: + - | + Added a new function, :func:`~.densest_subgraph_of_size`, which is used to return a + subgraph of given size that has the highest degree of connecitivity between the nodes. + For example, if you wanted to find the subgraph of 5 nodes in a 19 node heavy hexagon + graph: + + .. jupyter-execute:: + + import retworkx + from retworkx.visualization import mpl_draw + + graph = retworkx.generators.hexagonal_lattice_graph(4, 5) + + subgraph, node_map = retworkx.densest_subgraph_of_size(graph, 5) + subgraph_edge_set = set(subgraph.edge_list()) + node_colors = [] + for node in graph.node_indices(): + if node in node_map: + node_colors.append('red') + else: + node_colors.append('blue') + graph[node] = node + edge_colors = [] + for edge in graph.edge_list(): + if edge[0] in node_map and edge[1] in node_map: + edge_colors.append('red') + else: + edge_colors.append('blue') + mpl_draw(graph, with_labels=True, node_color=node_colors, edge_color=edge_colors, labels=str) diff --git a/retworkx/__init__.py b/retworkx/__init__.py index 1be867d87..92036828f 100644 --- a/retworkx/__init__.py +++ b/retworkx/__init__.py @@ -2334,3 +2334,37 @@ def _digraph_all_pairs_bellman_ford_shortest_path(graph, edge_cost_fn): @all_pairs_bellman_ford_shortest_paths.register(PyGraph) def _graph_all_pairs_bellman_ford_shortest_path(graph, edge_cost_fn): return graph_all_pairs_bellman_ford_shortest_paths(graph, edge_cost_fn) + + +@functools.singledispatch +def densest_subgraph_of_size(graph, num_nodes, weight_callback=None): + """Find densest subgraph in a :class:`~.PyGraph` + + This method does not provide any guarantees on the approximation as it + does a naive search using BFS traversal. + + :param graph: The graph to find the densest subgraph in. This can be a + :class:`~retworkx.PyGraph` or a :class:`~retworkx.PyDiGraph`. + :param int num_nodes: The number of nodes in the subgraph to find + :param func weight_callback: An optional callable that if specified will be + passed the node indices of each edge in the graph and it is expected to + return a float value. If specified the lowest avg weight for edges in + a found subgraph will be a criteria for selection in addition to the + connectivity of the subgraph. + :returns: A tuple of the subgraph found and a :class:`~.NodeMap` of the + mapping of node indices in the input ``graph`` to the index in the + output subgraph. + + :rtype: (subgraph, node_map) + """ + raise TypeError("Invalid Input Type %s for graph" % type(graph)) + + +@densest_subgraph_of_size.register(PyDiGraph) +def _digraph_densest_subgraph_of_size(graph, num_nodes, weight_callback=None): + return digraph_densest_subgraph_of_size(graph, num_nodes, weight_callback=weight_callback) + + +@densest_subgraph_of_size.register(PyGraph) +def _graph_densest_subgraph_of_size(graph, num_nodes, weight_callback=None): + return graph_densest_subgraph_of_size(graph, num_nodes, weight_callback=weight_callback) diff --git a/src/dense_subgraph.rs b/src/dense_subgraph.rs new file mode 100644 index 000000000..b33eb9ff8 --- /dev/null +++ b/src/dense_subgraph.rs @@ -0,0 +1,213 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +use hashbrown::{HashMap, HashSet}; + +use petgraph::algo; +use petgraph::graph::NodeIndex; +use petgraph::prelude::*; +use petgraph::visit::{IntoEdgeReferences, NodeFiltered}; +use petgraph::EdgeType; + +use rayon::prelude::*; + +use pyo3::prelude::*; +use pyo3::Python; + +use retworkx_core::dictmap::*; + +use crate::digraph; +use crate::graph; +use crate::iterators::NodeMap; +use crate::StablePyGraph; + +struct SubsetResult { + pub count: usize, + pub error: f64, + pub map: Vec, + pub subgraph: Vec<[NodeIndex; 2]>, +} + +pub fn densest_subgraph( + py: Python, + graph: &StablePyGraph, + num_nodes: usize, + weight_callback: Option, +) -> PyResult<(StablePyGraph, NodeMap)> +where + Ty: EdgeType + Sync, +{ + let node_indices: Vec = graph.node_indices().collect(); + let float_callback = + |callback: PyObject, source_node: usize, target_node: usize| -> PyResult { + let res = callback.as_ref(py).call1((source_node, target_node))?; + res.extract() + }; + let mut weight_map: Option> = None; + + if weight_callback.is_some() { + let mut inner_weight_map: HashMap<[NodeIndex; 2], f64> = + HashMap::with_capacity(graph.edge_count()); + let callback = weight_callback.as_ref().unwrap(); + for edge in graph.edge_references() { + let source: NodeIndex = edge.source(); + let target: NodeIndex = edge.target(); + let weight = float_callback(callback.clone_ref(py), source.index(), target.index())?; + inner_weight_map.insert([source, target], weight); + } + weight_map = Some(inner_weight_map); + } + let reduce_identity_fn = || -> SubsetResult { + SubsetResult { + count: 0, + map: Vec::new(), + error: std::f64::INFINITY, + subgraph: Vec::new(), + } + }; + + let reduce_fn = |best: SubsetResult, curr: SubsetResult| -> SubsetResult { + if weight_callback.is_some() { + if curr.count >= best.count && curr.error <= best.error { + curr + } else { + best + } + } else if curr.count > best.count { + curr + } else { + best + } + }; + + let best_result = node_indices + .into_par_iter() + .map(|index| { + let mut subgraph: Vec<[NodeIndex; 2]> = Vec::with_capacity(num_nodes); + let mut bfs = Bfs::new(&graph, index); + let mut bfs_vec: Vec = Vec::with_capacity(num_nodes); + let mut bfs_set: HashSet = HashSet::with_capacity(num_nodes); + + let mut count = 0; + while let Some(node) = bfs.next(&graph) { + bfs_vec.push(node); + bfs_set.insert(node); + count += 1; + if count >= num_nodes { + break; + } + } + let mut connection_count = 0; + for node in &bfs_vec { + for j in graph.node_indices().filter(|j| bfs_set.contains(j)) { + if graph.contains_edge(*node, j) { + connection_count += 1; + subgraph.push([*node, j]); + } + } + } + let error = match &weight_map { + Some(map) => subgraph.iter().map(|edge| map[edge]).sum::() / num_nodes as f64, + None => 0., + }; + SubsetResult { + count: connection_count, + error, + map: bfs_vec, + subgraph, + } + }) + .reduce(reduce_identity_fn, reduce_fn); + + let mut subgraph = StablePyGraph::::with_capacity(num_nodes, best_result.subgraph.len()); + let mut node_map: DictMap = DictMap::with_capacity(num_nodes); + for node in best_result.map { + let new_index = subgraph.add_node(graph[node].clone_ref(py)); + node_map.insert(node.index(), new_index.index()); + } + let node_filter = |node: NodeIndex| -> bool { node_map.contains_key(&node.index()) }; + let filtered = NodeFiltered(graph, node_filter); + for edge in filtered.edge_references() { + let new_source = NodeIndex::new(*node_map.get(&edge.source().index()).unwrap()); + let new_target = NodeIndex::new(*node_map.get(&edge.target().index()).unwrap()); + subgraph.add_edge(new_source, new_target, edge.weight().clone_ref(py)); + } + Ok((subgraph, NodeMap { node_map })) +} + +/// Find densest subgraph in a :class:`~.PyGraph` +/// +/// This method does not provide any guarantees on the approximation as it +/// does a naive search using BFS traversal. +/// +/// :param PyGraph graph: The graph to find densest subgraph in. +/// :param int num_nodes: The number of nodes in the subgraph to find +/// :param func weight_callback: An optional callable that if specified will be +/// passed the node indices of each edge in the graph and it is expected to +/// return a float value. If specified the lowest avg weight for edges in +/// a found subgraph will be a criteria for selection in addition to the +/// connectivity of the subgraph. +/// :returns: A tuple of the subgraph found and a :class:`~.NodeMap` of the +/// mapping of node indices in the input ``graph`` to the index in the +/// output subgraph. +/// :rtype: (PyGraph, NodeMap) +#[pyfunction] +#[pyo3(text_signature = "(graph. num_nodes, /, weight_callback=None)")] +pub fn graph_densest_subgraph_of_size( + py: Python, + graph: &graph::PyGraph, + num_nodes: usize, + weight_callback: Option, +) -> PyResult<(graph::PyGraph, NodeMap)> { + let (inner_graph, node_map) = densest_subgraph(py, &graph.graph, num_nodes, weight_callback)?; + let out_graph = graph::PyGraph { + graph: inner_graph, + node_removed: false, + multigraph: graph.multigraph, + }; + Ok((out_graph, node_map)) +} + +/// Find densest subgraph in a :class:`~.PyDiGraph` +/// +/// This method does not provide any guarantees on the approximation as it +/// does a naive search using BFS traversal. +/// +/// :param PyDiGraph graph: The graph to find the densest subgraph in. +/// :param int num_nodes: The number of nodes in the subgraph to find +/// :param func weight_callback: An optional callable that if specified will be +/// passed the node indices of each edge in the graph and it is expected to +/// return a float value. If specified the lowest avg weight for edges in +/// a found subgraph will be a criteria for selection in addition to the +/// connectivity of the subgraph. +/// :returns: A tuple of the subgraph found and a :class:`~.NodeMap` of the +/// mapping of node indices in the input ``graph`` to the index in the +/// output subgraph. +/// :rtype: (PyDiGraph, NodeMap) +#[pyfunction] +#[pyo3(text_signature = "(graph. num_nodes, /, weight_callback=None)")] +pub fn digraph_densest_subgraph_of_size( + py: Python, + graph: &digraph::PyDiGraph, + num_nodes: usize, + weight_callback: Option, +) -> PyResult<(digraph::PyDiGraph, NodeMap)> { + let (inner_graph, node_map) = densest_subgraph(py, &graph.graph, num_nodes, weight_callback)?; + let out_graph = digraph::PyDiGraph { + graph: inner_graph, + node_removed: false, + cycle_state: algo::DfsSpace::default(), + check_cycle: graph.check_cycle, + multigraph: graph.multigraph, + }; + Ok((out_graph, node_map)) +} diff --git a/src/lib.rs b/src/lib.rs index de051ed7d..0b74ab6aa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ mod centrality; mod coloring; mod connectivity; mod dag_algo; +mod dense_subgraph; mod digraph; mod dot_utils; mod generators; @@ -39,6 +40,7 @@ use centrality::*; use coloring::*; use connectivity::*; use dag_algo::*; +use dense_subgraph::*; use graphml::*; use isomorphism::*; use layout::*; @@ -461,6 +463,8 @@ fn retworkx(py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(biconnected_components))?; m.add_wrapped(wrap_pyfunction!(chain_decomposition))?; m.add_wrapped(wrap_pyfunction!(read_graphml))?; + m.add_wrapped(wrap_pyfunction!(digraph_densest_subgraph_of_size))?; + m.add_wrapped(wrap_pyfunction!(graph_densest_subgraph_of_size))?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/tests/graph/test_densest_subgraph.py b/tests/graph/test_densest_subgraph.py new file mode 100644 index 000000000..b8041aa31 --- /dev/null +++ b/tests/graph/test_densest_subgraph.py @@ -0,0 +1,31 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import unittest + +import retworkx + + +class TestDensestSubgraph(unittest.TestCase): + def test_simple_grid_three_nodes(self): + graph = retworkx.generators.grid_graph(3, 3) + subgraph, node_map = retworkx.densest_subgraph_of_size(graph, 3) + expected_subgraph_edge_list = [(0, 2), (0, 1)] + self.assertEqual(expected_subgraph_edge_list, subgraph.edge_list()) + self.assertEqual(node_map, {0: 0, 1: 1, 3: 2}) + + def test_simple_grid_six_nodes(self): + graph = retworkx.generators.grid_graph(3, 3) + subgraph, node_map = retworkx.densest_subgraph_of_size(graph, 6) + expected_subgraph_edge_list = [(5, 2), (5, 3), (3, 0), (3, 4), (4, 1), (2, 0), (0, 1)] + self.assertEqual(expected_subgraph_edge_list, subgraph.edge_list()) + self.assertEqual(node_map, {7: 0, 8: 1, 6: 2, 4: 3, 5: 4, 3: 5})