From 944df0ff042f999de2dc5519b9d97364806f469c Mon Sep 17 00:00:00 2001 From: Matthew Treinish Date: Sun, 27 Mar 2022 13:43:34 -0400 Subject: [PATCH] Add function to find densest subgraph Fixes #570 --- src/dense_subgraph.rs | 199 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 src/dense_subgraph.rs diff --git a/src/dense_subgraph.rs b/src/dense_subgraph.rs new file mode 100644 index 0000000000..322b3da014 --- /dev/null +++ b/src/dense_subgraph.rs @@ -0,0 +1,199 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. You may obtain +// a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +use hashbrown::{HashMap, HashSet}; + +use petgraph::graph::NodeIndex; +use petgraph::prelude::*; +use petgraph::visit::IntoEdgeReferences; +use petgraph::EdgeType; + +use rayon::prelude::*; + +use numpy::ToPyArray; +use pyo3::prelude::*; +use pyo3::Python; + +use crate::digraph; +use crate::graph; +use crate::StablePyGraph; + +struct SubsetResult { + pub count: usize, + pub error: f64, + pub map: Vec, + pub subgraph: Vec<[NodeIndex; 2]>, +} + +pub fn densest_subgraph( + py: Python, + graph: &StablePyGraph, + num_nodes: usize, + weight_callback: Option, +) -> PyResult<(PyObject, PyObject, PyObject)> +where + Ty: EdgeType + Sync, +{ + let node_indices: Vec = graph.node_indices().collect(); + let float_callback = + |callback: PyObject, source_node: usize, target_node: usize| -> PyResult { + let res = callback.as_ref(py).call1((source_node, target_node))?; + res.extract() + }; + let mut weight_map: Option> = None; + + if weight_callback.is_some() { + let mut inner_weight_map: HashMap<[NodeIndex; 2], f64> = + HashMap::with_capacity(graph.edge_count()); + let callback = weight_callback.as_ref().unwrap(); + for edge in graph.edge_references() { + let source: NodeIndex = edge.source(); + let target: NodeIndex = edge.target(); + let weight = float_callback(callback.clone_ref(py), source.index(), target.index())?; + inner_weight_map.insert([source, target], weight); + } + weight_map = Some(inner_weight_map); + } + let reduce_identity_fn = || -> SubsetResult { + SubsetResult { + count: 0, + map: Vec::new(), + error: std::f64::INFINITY, + subgraph: Vec::new(), + } + }; + + let reduce_fn = |best: SubsetResult, curr: SubsetResult| -> SubsetResult { + if weight_callback.is_some() { + if curr.count >= best.count && curr.error <= best.error { + curr + } else { + best + } + } else if curr.count > best.count { + curr + } else { + best + } + }; + + let best_result = node_indices + .into_par_iter() + .map(|index| { + let mut subgraph: Vec<[NodeIndex; 2]> = Vec::with_capacity(num_nodes); + let mut bfs = Bfs::new(&graph, index); + let mut bfs_vec: Vec = Vec::with_capacity(num_nodes); + let mut bfs_set: HashSet = HashSet::with_capacity(num_nodes); + + let mut count = 0; + while let Some(node) = bfs.next(&graph) { + bfs_vec.push(node); + bfs_set.insert(node); + count += 1; + if count >= num_nodes { + break; + } + } + let mut connection_count = 0; + for node in &bfs_vec { + for j in graph.node_indices() { + if graph.contains_edge(*node, j) { + connection_count += 1; + subgraph.push([*node, j]); + } + } + } + let error = match &weight_map { + Some(map) => subgraph.iter().map(|edge| map[edge]).sum::() / num_nodes as f64, + None => 0., + }; + SubsetResult { + count: connection_count, + error, + map: bfs_vec, + subgraph, + } + }) + .reduce(reduce_identity_fn, reduce_fn); + let best_map: Vec = best_result.map.iter().map(|x| x.index()).collect(); + let mapping: HashMap = best_map + .iter() + .enumerate() + .map(|(best_edge, edge)| (*edge, best_edge)) + .collect(); + let new_cmap: Vec<[usize; 2]> = best_result + .subgraph + .iter() + .map(|c| [mapping[&c[0].index()], mapping[&c[1].index()]]) + .collect(); + let rows: Vec = new_cmap.iter().map(|edge| edge[0]).collect(); + let cols: Vec = new_cmap.iter().map(|edge| edge[1]).collect(); + Ok(( + rows.to_pyarray(py).into(), + cols.to_pyarray(py).into(), + best_map.to_pyarray(py).into(), + )) +} + +/// Find densest subgraph in a :class:`~.PyGraph` +/// +/// This method does not provide any guarantees on the approximation as it +/// does a naive search using BFS traversal. +/// +/// :param PyDigraph graph: The graph to find +/// :param int num_nodes: The number of nodes in the subgraph to find +/// :param func weight_callback: An optional callable that if specified will be +/// passed the node indices of each edge in the graph and it is expected to +/// return a float value. If specified the lowest avg weight for edges in +/// a found subgraph will be a criteria for selection in addition to the +/// connectivity of the subgraph. +/// :returns: A tuple of 3 numpy arrays for efficient sparse matrix creation +/// of the adjacency matrix of the subgraph mapping the values back to the +/// node indices on the original graph. +/// :rtype: (rows, cols, value) +#[pyfunction] +#[pyo3(text_signature = "(graph. num_nodes, /, weight_callback=None)")] +pub fn graph_dense_subgraph( + py: Python, + graph: &graph::PyGraph, + num_nodes: usize, + weight_callback: Option, +) -> PyResult<(PyObject, PyObject, PyObject)> { + densest_subgraph(py, &graph.graph, num_nodes, weight_callback) +} + +/// Find densest subgraph in a :class:`~.PyDiGraph` +/// +/// This method does not provide any guarantees on the approximation as it +/// does a naive search using BFS traversal. +/// +/// :param PyDigraph graph: The graph to find +/// :param int num_nodes: The number of nodes in the subgraph to find +/// :param func weight_callback: An optional callable that if specified will be +/// passed the node indices of each edge in the graph and it is expected to +/// return a float value. If specified the lowest avg weight for edges in +/// a found subgraph will be a criteria for selection in addition to the +/// connectivity of the subgraph. +/// :returns: A tuple of 3 numpy arrays for efficient sparse matrix creation +/// of the adjacency matrix of the subgraph mapping the values back to the +/// node indices on the original graph. +/// :rtype: (rows, cols, value) +#[pyfunction] +#[pyo3(text_signature = "(graph. num_nodes, /, weight_callback=None)")] +pub fn digraph_dense_subgraph( + py: Python, + graph: &digraph::PyDiGraph, + num_nodes: usize, + weight_callback: Option, +) -> PyResult<(PyObject, PyObject, PyObject)> { + densest_subgraph(py, &graph.graph, num_nodes, weight_callback) +}