Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable multiple edges in the same direction between two nodes for pandas #156

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/gurobi_optimods/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,7 @@ def load_berlin_metro_graph_data():


def _load_simple_graph_pandas(drop_pos=True, capacity=True, cost=True, demand=True):
edge_data = pd.read_csv(DATA_FILE_DIR / "graphs/simple_graph_edges.csv").set_index(
["source", "target"]
)
edge_data = pd.read_csv(DATA_FILE_DIR / "graphs/simple_graph_edges.csv")
node_data = pd.read_csv(
DATA_FILE_DIR / "graphs/simple_graph_nodes.csv", index_col=0
)
Expand Down Expand Up @@ -160,7 +158,7 @@ def _convert_pandas_to_scipy(
the graph structure, the capacity and cost values per edge, and the demand
values per node.
"""
coords = edge_data.index.to_numpy()
coords = edge_data[["source", "target"]].to_numpy()

a0 = np.array([c[0] for c in coords])
a1 = np.array([c[1] for c in coords])
Expand Down
15 changes: 7 additions & 8 deletions src/gurobi_optimods/max_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,30 +60,29 @@ def _remove_dummy_edge(graph, source, sink):
graph = graph.tolil()
graph = graph[:-1, :]
elif isinstance(graph, pd.Series) or isinstance(graph, pd.DataFrame):
graph.drop((sink, source), inplace=True)
f, t = ("source", "target")
graph.drop(graph[(graph[f] == sink) & (graph[t] == source)].index, inplace=True)
elif nx is not None and isinstance(graph, nx.Graph):
graph.remove_edge(sink, source)
return graph


def _max_flow_pandas(arc_data, source, sink, **kwargs):
f, t = arc_data.index.names
f, t = ("source", "target")
arc_data["cost"] = [0] * len(arc_data)
# Find maximum flow through (minumum of sum of all outgoing/incoming
# capacities at the source/sink)
max_flow = min(
arc_data.loc[([source], slice(None)),]["capacity"].sum(),
arc_data.loc[(slice(None), [sink]),]["capacity"].sum(),
arc_data[arc_data[f] == source]["capacity"].sum(),
arc_data[arc_data[t] == sink]["capacity"].sum(),
)
# Add dummy edge
arc_data = pd.concat(
[
arc_data,
pd.DataFrame(
[{f: sink, t: source, "capacity": max_flow, "cost": -1}]
).set_index([f, t]),
pd.DataFrame([{f: sink, t: source, "capacity": max_flow, "cost": -1}]),
]
)
).reset_index(drop=True)
demand_data = pd.DataFrame([{"node": source, "demand": 0}]).set_index("node")
# Solve
obj, flow = min_cost_flow_pandas(arc_data, demand_data, **kwargs)
Expand Down
23 changes: 14 additions & 9 deletions src/gurobi_optimods/min_cost_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ def min_cost_flow_pandas(

arc_data = pd.DataFrame(
[
{"from": 0, "to": 1, "capacity": 16, "cost": 0}, {"from": 1,
"to": 2, "capacity": 10, "cost": 0},
{"source": 0, "target": 1, "capacity": 16, "cost": 0}, {"source": 1,
"target": 2, "capacity": 10, "cost": 0},
]
).set_index(["from", "to"])
)

demand_data = pd.DataFrame(
[{"node": 0, "demand": -1}, {"node": 2, "demand": 1}]
Expand All @@ -45,7 +45,7 @@ def min_cost_flow_pandas(
----------
arc_data : DataFrame
DataFrame with graph and respective attributes. These must include
``"from"``, ``"to"`` nodes used as index as well as ``"capacity"``, and
``"source"``, ``"target"``, ``"capacity"``, and
``"cost"`` columns.
demand_data : DataFrame
DataFrame with node demand information. These must include indexed by
Expand All @@ -60,15 +60,19 @@ def min_cost_flow_pandas(
"""
with create_env() as env, gp.Model(env=env) as model:
model.ModelSense = GRB.MINIMIZE

arc_data = arc_data.reset_index().drop(columns=["index"], errors="ignore")
arc_df = arc_data.gppd.add_vars(model, ub="capacity", obj="cost", name="flow")

source_label, target_label = arc_data.index.names
source_label, target_label = ("source", "target")
balance_df = (
pd.DataFrame(
{
"inflow": arc_df["flow"].groupby(target_label).sum(),
"outflow": arc_df["flow"].groupby(source_label).sum(),
"inflow": arc_df[["flow", target_label]]
.groupby(target_label)
.sum()["flow"],
"outflow": arc_df[["flow", source_label]]
.groupby(source_label)
.sum()["flow"],
"demand": demand_data["demand"],
}
)
Expand All @@ -84,7 +88,8 @@ def min_cost_flow_pandas(
if model.Status in [GRB.INFEASIBLE, GRB.INF_OR_UNBD]:
raise ValueError("Unsatisfiable flows")

return model.ObjVal, arc_df["flow"].gppd.X
arc_df["flow"] = arc_df["flow"].gppd.X
return model.ObjVal, arc_df


@optimod()
Expand Down
34 changes: 21 additions & 13 deletions src/gurobi_optimods/min_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,31 +77,29 @@ def min_cut(graph, source, sink, *, create_env):


def _min_cut_pandas(arc_data, source, sink, create_env):
f, t = arc_data.index.names
f, t = ("source", "target")
arc_data["cost"] = [0] * len(arc_data)
# Create dummy edge to find maximum flow through (minimum of sum of all
# outgoing/incoming capacities at the source/sink)
max_flow = min(
arc_data.loc[([source], slice(None)),]["capacity"].sum(),
arc_data.loc[(slice(None), [sink]),]["capacity"].sum(),
arc_data[arc_data[f] == source]["capacity"].sum(),
arc_data[arc_data[t] == sink]["capacity"].sum(),
)
arc_data = pd.concat(
[
arc_data,
pd.DataFrame(
[{f: sink, t: source, "capacity": max_flow, "cost": 1}]
).set_index([f, t]),
pd.DataFrame([{f: sink, t: source, "capacity": max_flow, "cost": 1}]),
]
)
).reset_index(drop=True)

with create_env() as env, gp.Model(env=env) as model:
# Solve max-flow problem
model.ModelSense = GRB.MAXIMIZE
arc_df = arc_data.gppd.add_vars(model, obj="cost", name="flow")
balance_df = pd.DataFrame(
{
"inflow": arc_df["flow"].groupby(t).sum(),
"outflow": arc_df["flow"].groupby(f).sum(),
"inflow": arc_df.groupby(t)["flow"].sum(),
"outflow": arc_df.groupby(f)["flow"].sum(),
}
).gppd.add_constrs(model, "inflow == outflow", name="balance")
capacity_constrs = gppd.add_constrs(
Expand All @@ -125,7 +123,13 @@ def _min_cut_pandas(arc_data, source, sink, create_env):
cap_pi = capacity_constrs.gppd.Pi

# Find edges in the cutset (excluding the dummy (sink, source) edge.
cutset = set([a for a in arc_data.index if cap_pi[a] > 1e-3 if a[0] != sink])
cutset = set(
[
tuple(arc_data.loc[a, ["source", "target"]])
for a in arc_data.index
if cap_pi[a] > 1e-3
]
)

if len(cutset) == 0: # No arc in the cutset
return MinCutResult(0.0, (set(), set()), set())
Expand All @@ -138,9 +142,13 @@ def _min_cut_pandas(arc_data, source, sink, create_env):
# Add successors of `node` that are not in the cutset
queue.extend(
[
a[1]
for a in arc_data.loc[([node], slice(None)),].index
if a not in cutset and a[1] not in p1 and a[1] not in queue
a["target"]
for _, a in arc_data.loc[arc_data["source"] == node][
["source", "target"]
].iterrows()
if tuple(a) not in cutset
and a["target"] not in p1
and a["target"] not in queue
]
)
p2 = set([n for n in balance_df.index if n not in p1])
Expand Down
5 changes: 4 additions & 1 deletion tests/test_graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
def check_solution_pandas(solution, candidates):
# Checks whether the solution (`pd.Series`) matches any of the list of
# candidates (containing `dict`)
if any(solution.to_dict() == c for c in candidates):
if any(
solution.reset_index(drop=True).equals(c.reset_index(drop=True))
for c in candidates
):
return True
return False

Expand Down
57 changes: 29 additions & 28 deletions tests/test_max_flow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest

import numpy as np
import pandas as pd

try:
import networkx as nx
Expand Down Expand Up @@ -31,23 +32,25 @@ def test_pandas(self):
obj, sol = max_flow(edge_data, 0, 5)
sol = sol[sol > 0]
self.assertEqual(obj, self.expected_max_flow)
candidate = {
(0, 1): 1.0,
(0, 2): 2.0,
(1, 3): 1.0,
(2, 3): 1.0,
(2, 4): 1.0,
(3, 5): 2.0,
(4, 5): 1.0,
}
candidate2 = {
(0, 1): 1.0,
(0, 2): 2.0,
(1, 3): 1.0,
(2, 4): 2.0,
(3, 5): 1.0,
(4, 5): 2.0,
}

candidate = pd.DataFrame(
{
"source": [np.NaN, np.NaN, 1.0, 2.0, 2.0, 3.0, 4.0],
"target": [1, 2, 3, 3, 4, 5, 5],
"capacity": [2, 2, 1, 1, 2, 2, 2],
"cost": [np.NaN] * 7,
"flow": [1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0],
}
)
candidate2 = pd.DataFrame(
{
"source": [np.NaN, np.NaN, 1.0, 2.0, 3.0, 4.0],
"target": [1, 2, 3, 4, 5, 5],
"capacity": [2, 2, 1, 2, 2, 2],
"cost": [np.NaN] * 6,
"flow": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
}
)
self.assertTrue(check_solution_pandas(sol, [candidate, candidate2]))

def test_empty_pandas(self):
Expand Down Expand Up @@ -118,18 +121,16 @@ def setUp(self):
def test_pandas(self):
edge_data, _ = load_graph2_pandas()
obj, sol = max_flow(edge_data, 0, 4)
sol = sol[sol > 0]
sol = sol[sol > 0].drop(columns=["cost"])
self.assertEqual(obj, self.expected_max_flow)
candidate = {
(0, 1): 15.0,
(0, 2): 8.0,
(1, 3): 4.0,
(1, 2): 1.0,
(1, 4): 10.0,
(2, 3): 4.0,
(2, 4): 5.0,
(3, 4): 8.0,
}
candidate = pd.DataFrame(
{
"source": [np.NaN, np.NaN, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0],
"target": [1, 2, 3, 2, 4, 3, 4, 4],
"capacity": [15, 8, 4, 20, 10, 15, 5, 20],
"flow": [15.0, 8.0, 4.0, 1.0, 10.0, 4.0, 5.0, 8.0],
}
)
self.assertTrue(check_solution_pandas(sol, [candidate]))

def test_scipy(self):
Expand Down
58 changes: 34 additions & 24 deletions tests/test_min_cost_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

def load_graph2_pandas():
return (
pd.read_csv(io.StringIO(edge_data2)).set_index(["source", "target"]),
pd.read_csv(io.StringIO(edge_data2)),
pd.read_csv(io.StringIO(node_data2), index_col=0),
)

Expand All @@ -63,10 +63,19 @@ class TestMinCostFlow(unittest.TestCase):
def test_pandas(self):
edge_data, node_data = datasets.simple_graph_pandas()
cost, sol = mcf.min_cost_flow_pandas(edge_data, node_data)
sol = sol[sol > 0]
sol = sol[sol["flow"] > 0]
self.assertEqual(cost, 31)
candidate = {(0, 1): 1.0, (0, 2): 1.0, (1, 3): 1.0, (2, 4): 2.0, (4, 5): 2.0}
self.assertIsInstance(sol, pd.Series)
candidate = pd.DataFrame(
{
"source": [0, 0, 1, 2, 4],
"target": [1, 2, 3, 4, 5],
"capacity": [2, 2, 1, 2, 2],
"cost": [9, 7, 1, 6, 1],
"flow": [1.0, 1.0, 1.0, 2.0, 2.0],
}
)

self.assertIsInstance(sol, pd.DataFrame)
self.assertTrue(check_solution_pandas(sol, [candidate]))

def test_infeasible(self):
Expand Down Expand Up @@ -128,27 +137,28 @@ class TestMinCostFlow2(unittest.TestCase):
def test_pandas(self):
edge_data, node_data = load_graph2_pandas()
cost, sol = mcf.min_cost_flow_pandas(edge_data, node_data)
sol = sol[sol > 0]
sol = sol[sol["flow"] > 0]
self.assertEqual(cost, 150)
candidate = {
(0, 1): 12.0,
(0, 2): 8.0,
(1, 3): 4.0,
(1, 2): 8.0,
(2, 3): 15.0,
(2, 4): 1.0,
(3, 4): 14.0,
}
candidate2 = {
(0, 1): 12.0,
(0, 2): 8.0,
(1, 3): 4.0,
(1, 2): 8.0,
(2, 3): 11.0,
(2, 4): 5.0,
(3, 4): 10.0,
}
self.assertTrue(check_solution_pandas(sol, [candidate, candidate2]))
candidate1 = pd.DataFrame(
{
"source": [0, 0, 1, 1, 2, 2, 3],
"target": [1, 2, 3, 2, 3, 4, 4],
"capacity": [15, 8, 4, 20, 15, 5, 20],
"cost": [4, 4, 2, 2, 1, 3, 2],
"flow": [12.0, 8.0, 4.0, 8.0, 15.0, 1.0, 14.0],
}
)
candidate2 = pd.DataFrame(
{
"source": [0, 0, 1, 1, 2, 2, 3],
"target": [1, 2, 3, 2, 3, 4, 4],
"capacity": [15, 8, 4, 20, 15, 5, 20],
"cost": [4, 4, 2, 2, 1, 3, 2],
"flow": [12.0, 8.0, 4.0, 8.0, 11.0, 5.0, 10.0],
}
)

self.assertTrue(check_solution_pandas(sol, [candidate1, candidate2]))

def test_scipy(self):
G, cap, cost, demands = load_graph2_scipy()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_min_cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def setUp(self):
{"source": 3, "target": 6, "capacity": 2.0},
{"source": 5, "target": 6, "capacity": 3.0},
]
).set_index(["source", "target"])
)

def test_pandas(self):
res = min_cut(self.arc_data, 0, 6)
Expand Down
Loading