Gurobi · jsfreischuetz · Jul 15, 2024 · Jul 15, 2024 · Jul 15, 2024 · Jul 15, 2024
diff --git a/src/gurobi_optimods/datasets.py b/src/gurobi_optimods/datasets.py
@@ -92,9 +92,7 @@ def load_berlin_metro_graph_data():
 
 
 def _load_simple_graph_pandas(drop_pos=True, capacity=True, cost=True, demand=True):
-    edge_data = pd.read_csv(DATA_FILE_DIR / "graphs/simple_graph_edges.csv").set_index(
-        ["source", "target"]
-    )
+    edge_data = pd.read_csv(DATA_FILE_DIR / "graphs/simple_graph_edges.csv")
     node_data = pd.read_csv(
         DATA_FILE_DIR / "graphs/simple_graph_nodes.csv", index_col=0
     )
@@ -160,7 +158,7 @@ def _convert_pandas_to_scipy(
     the graph structure, the capacity and cost values per edge, and the demand
     values per node.
     """
-    coords = edge_data.index.to_numpy()
+    coords = edge_data[["source", "target"]].to_numpy()
 
     a0 = np.array([c[0] for c in coords])
     a1 = np.array([c[1] for c in coords])

diff --git a/src/gurobi_optimods/max_flow.py b/src/gurobi_optimods/max_flow.py
@@ -60,30 +60,29 @@ def _remove_dummy_edge(graph, source, sink):
         graph = graph.tolil()
         graph = graph[:-1, :]
     elif isinstance(graph, pd.Series) or isinstance(graph, pd.DataFrame):
-        graph.drop((sink, source), inplace=True)
+        f, t = ("source", "target")
+        graph.drop(graph[(graph[f] == sink) & (graph[t] == source)].index, inplace=True)
     elif nx is not None and isinstance(graph, nx.Graph):
         graph.remove_edge(sink, source)
     return graph
 
 
 def _max_flow_pandas(arc_data, source, sink, **kwargs):
-    f, t = arc_data.index.names
+    f, t = ("source", "target")
     arc_data["cost"] = [0] * len(arc_data)
     # Find maximum flow through (minumum of sum of all outgoing/incoming
     # capacities at the source/sink)
     max_flow = min(
-        arc_data.loc[([source], slice(None)),]["capacity"].sum(),
-        arc_data.loc[(slice(None), [sink]),]["capacity"].sum(),
+        arc_data[arc_data[f] == source]["capacity"].sum(),
+        arc_data[arc_data[t] == sink]["capacity"].sum(),
     )
     # Add dummy edge
     arc_data = pd.concat(
         [
             arc_data,
-            pd.DataFrame(
-                [{f: sink, t: source, "capacity": max_flow, "cost": -1}]
-            ).set_index([f, t]),
+            pd.DataFrame([{f: sink, t: source, "capacity": max_flow, "cost": -1}]),
         ]
-    )
+    ).reset_index(drop=True)
     demand_data = pd.DataFrame([{"node": source, "demand": 0}]).set_index("node")
     # Solve
     obj, flow = min_cost_flow_pandas(arc_data, demand_data, **kwargs)

diff --git a/src/gurobi_optimods/min_cost_flow.py b/src/gurobi_optimods/min_cost_flow.py
@@ -32,10 +32,10 @@ def min_cost_flow_pandas(
 
         arc_data = pd.DataFrame(
             [
-                {"from": 0, "to": 1, "capacity": 16, "cost": 0}, {"from": 1,
-                "to": 2, "capacity": 10, "cost": 0},
+                {"source": 0, "target": 1, "capacity": 16, "cost": 0}, {"source": 1,
+                "target": 2, "capacity": 10, "cost": 0},
             ]
-        ).set_index(["from", "to"])
+        )
 
         demand_data = pd.DataFrame(
             [{"node": 0, "demand": -1}, {"node": 2, "demand": 1}]
@@ -45,7 +45,7 @@ def min_cost_flow_pandas(
     ----------
     arc_data : DataFrame
         DataFrame with graph and respective attributes. These must include
-        ``"from"``, ``"to"`` nodes used as index as well as ``"capacity"``, and
+        ``"source"``, ``"target"``, ``"capacity"``, and
         ``"cost"`` columns.
     demand_data : DataFrame
         DataFrame with node demand information. These must include indexed by
@@ -60,15 +60,19 @@ def min_cost_flow_pandas(
     """
     with create_env() as env, gp.Model(env=env) as model:
         model.ModelSense = GRB.MINIMIZE
-
+        arc_data = arc_data.reset_index().drop(columns=["index"], errors="ignore")
         arc_df = arc_data.gppd.add_vars(model, ub="capacity", obj="cost", name="flow")
 
-        source_label, target_label = arc_data.index.names
+        source_label, target_label = ("source", "target")
         balance_df = (
             pd.DataFrame(
                 {
-                    "inflow": arc_df["flow"].groupby(target_label).sum(),
-                    "outflow": arc_df["flow"].groupby(source_label).sum(),
+                    "inflow": arc_df[["flow", target_label]]
+                    .groupby(target_label)
+                    .sum()["flow"],
+                    "outflow": arc_df[["flow", source_label]]
+                    .groupby(source_label)
+                    .sum()["flow"],
                     "demand": demand_data["demand"],
                 }
             )
@@ -84,7 +88,8 @@ def min_cost_flow_pandas(
         if model.Status in [GRB.INFEASIBLE, GRB.INF_OR_UNBD]:
             raise ValueError("Unsatisfiable flows")
 
-        return model.ObjVal, arc_df["flow"].gppd.X
+        arc_df["flow"] = arc_df["flow"].gppd.X
+        return model.ObjVal, arc_df
 
 
 @optimod()

diff --git a/src/gurobi_optimods/min_cut.py b/src/gurobi_optimods/min_cut.py
@@ -77,31 +77,29 @@ def min_cut(graph, source, sink, *, create_env):
 
 
 def _min_cut_pandas(arc_data, source, sink, create_env):
-    f, t = arc_data.index.names
+    f, t = ("source", "target")
     arc_data["cost"] = [0] * len(arc_data)
     # Create dummy edge to find maximum flow through (minimum of sum of all
     # outgoing/incoming capacities at the source/sink)
     max_flow = min(
-        arc_data.loc[([source], slice(None)),]["capacity"].sum(),
-        arc_data.loc[(slice(None), [sink]),]["capacity"].sum(),
+        arc_data[arc_data[f] == source]["capacity"].sum(),
+        arc_data[arc_data[t] == sink]["capacity"].sum(),
     )
     arc_data = pd.concat(
         [
             arc_data,
-            pd.DataFrame(
-                [{f: sink, t: source, "capacity": max_flow, "cost": 1}]
-            ).set_index([f, t]),
+            pd.DataFrame([{f: sink, t: source, "capacity": max_flow, "cost": 1}]),
         ]
-    )
+    ).reset_index(drop=True)
 
     with create_env() as env, gp.Model(env=env) as model:
         # Solve max-flow problem
         model.ModelSense = GRB.MAXIMIZE
         arc_df = arc_data.gppd.add_vars(model, obj="cost", name="flow")
         balance_df = pd.DataFrame(
             {
-                "inflow": arc_df["flow"].groupby(t).sum(),
-                "outflow": arc_df["flow"].groupby(f).sum(),
+                "inflow": arc_df.groupby(t)["flow"].sum(),
+                "outflow": arc_df.groupby(f)["flow"].sum(),
             }
         ).gppd.add_constrs(model, "inflow == outflow", name="balance")
         capacity_constrs = gppd.add_constrs(
@@ -125,7 +123,13 @@ def _min_cut_pandas(arc_data, source, sink, create_env):
         cap_pi = capacity_constrs.gppd.Pi
 
         # Find edges in the cutset (excluding the dummy (sink, source) edge.
-        cutset = set([a for a in arc_data.index if cap_pi[a] > 1e-3 if a[0] != sink])
+        cutset = set(
+            [
+                tuple(arc_data.loc[a, ["source", "target"]])
+                for a in arc_data.index
+                if cap_pi[a] > 1e-3
+            ]
+        )
 
         if len(cutset) == 0:  # No arc in the cutset
             return MinCutResult(0.0, (set(), set()), set())
@@ -138,9 +142,13 @@ def _min_cut_pandas(arc_data, source, sink, create_env):
             # Add successors of `node` that are not in the cutset
             queue.extend(
                 [
-                    a[1]
-                    for a in arc_data.loc[([node], slice(None)),].index
-                    if a not in cutset and a[1] not in p1 and a[1] not in queue
+                    a["target"]
+                    for _, a in arc_data.loc[arc_data["source"] == node][
+                        ["source", "target"]
+                    ].iterrows()
+                    if tuple(a) not in cutset
+                    and a["target"] not in p1
+                    and a["target"] not in queue
                 ]
             )
         p2 = set([n for n in balance_df.index if n not in p1])

diff --git a/tests/test_graph_utils.py b/tests/test_graph_utils.py
@@ -4,7 +4,10 @@
 def check_solution_pandas(solution, candidates):
     # Checks whether the solution (`pd.Series`) matches any of the list of
     # candidates (containing `dict`)
-    if any(solution.to_dict() == c for c in candidates):
+    if any(
+        solution.reset_index(drop=True).equals(c.reset_index(drop=True))
+        for c in candidates
+    ):
         return True
     return False
 

diff --git a/tests/test_max_flow.py b/tests/test_max_flow.py
@@ -1,6 +1,7 @@
 import unittest
 
 import numpy as np
+import pandas as pd
 
 try:
     import networkx as nx
@@ -31,23 +32,25 @@ def test_pandas(self):
         obj, sol = max_flow(edge_data, 0, 5)
         sol = sol[sol > 0]
         self.assertEqual(obj, self.expected_max_flow)
-        candidate = {
-            (0, 1): 1.0,
-            (0, 2): 2.0,
-            (1, 3): 1.0,
-            (2, 3): 1.0,
-            (2, 4): 1.0,
-            (3, 5): 2.0,
-            (4, 5): 1.0,
-        }
-        candidate2 = {
-            (0, 1): 1.0,
-            (0, 2): 2.0,
-            (1, 3): 1.0,
-            (2, 4): 2.0,
-            (3, 5): 1.0,
-            (4, 5): 2.0,
-        }
+
+        candidate = pd.DataFrame(
+            {
+                "source": [np.NaN, np.NaN, 1.0, 2.0, 2.0, 3.0, 4.0],
+                "target": [1, 2, 3, 3, 4, 5, 5],
+                "capacity": [2, 2, 1, 1, 2, 2, 2],
+                "cost": [np.NaN] * 7,
+                "flow": [1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0],
+            }
+        )
+        candidate2 = pd.DataFrame(
+            {
+                "source": [np.NaN, np.NaN, 1.0, 2.0, 3.0, 4.0],
+                "target": [1, 2, 3, 4, 5, 5],
+                "capacity": [2, 2, 1, 2, 2, 2],
+                "cost": [np.NaN] * 6,
+                "flow": [1.0, 2.0, 1.0, 2.0, 1.0, 2.0],
+            }
+        )
         self.assertTrue(check_solution_pandas(sol, [candidate, candidate2]))
 
     def test_empty_pandas(self):
@@ -118,18 +121,16 @@ def setUp(self):
     def test_pandas(self):
         edge_data, _ = load_graph2_pandas()
         obj, sol = max_flow(edge_data, 0, 4)
-        sol = sol[sol > 0]
+        sol = sol[sol > 0].drop(columns=["cost"])
         self.assertEqual(obj, self.expected_max_flow)
-        candidate = {
-            (0, 1): 15.0,
-            (0, 2): 8.0,
-            (1, 3): 4.0,
-            (1, 2): 1.0,
-            (1, 4): 10.0,
-            (2, 3): 4.0,
-            (2, 4): 5.0,
-            (3, 4): 8.0,
-        }
+        candidate = pd.DataFrame(
+            {
+                "source": [np.NaN, np.NaN, 1.0, 1.0, 1.0, 2.0, 2.0, 3.0],
+                "target": [1, 2, 3, 2, 4, 3, 4, 4],
+                "capacity": [15, 8, 4, 20, 10, 15, 5, 20],
+                "flow": [15.0, 8.0, 4.0, 1.0, 10.0, 4.0, 5.0, 8.0],
+            }
+        )
         self.assertTrue(check_solution_pandas(sol, [candidate]))
 
     def test_scipy(self):

diff --git a/tests/test_min_cost_flow.py b/tests/test_min_cost_flow.py
@@ -44,7 +44,7 @@
 
 def load_graph2_pandas():
     return (
-        pd.read_csv(io.StringIO(edge_data2)).set_index(["source", "target"]),
+        pd.read_csv(io.StringIO(edge_data2)),
         pd.read_csv(io.StringIO(node_data2), index_col=0),
     )
 
@@ -63,10 +63,19 @@ class TestMinCostFlow(unittest.TestCase):
     def test_pandas(self):
         edge_data, node_data = datasets.simple_graph_pandas()
         cost, sol = mcf.min_cost_flow_pandas(edge_data, node_data)
-        sol = sol[sol > 0]
+        sol = sol[sol["flow"] > 0]
         self.assertEqual(cost, 31)
-        candidate = {(0, 1): 1.0, (0, 2): 1.0, (1, 3): 1.0, (2, 4): 2.0, (4, 5): 2.0}
-        self.assertIsInstance(sol, pd.Series)
+        candidate = pd.DataFrame(
+            {
+                "source": [0, 0, 1, 2, 4],
+                "target": [1, 2, 3, 4, 5],
+                "capacity": [2, 2, 1, 2, 2],
+                "cost": [9, 7, 1, 6, 1],
+                "flow": [1.0, 1.0, 1.0, 2.0, 2.0],
+            }
+        )
+
+        self.assertIsInstance(sol, pd.DataFrame)
         self.assertTrue(check_solution_pandas(sol, [candidate]))
 
     def test_infeasible(self):
@@ -128,27 +137,28 @@ class TestMinCostFlow2(unittest.TestCase):
     def test_pandas(self):
         edge_data, node_data = load_graph2_pandas()
         cost, sol = mcf.min_cost_flow_pandas(edge_data, node_data)
-        sol = sol[sol > 0]
+        sol = sol[sol["flow"] > 0]
         self.assertEqual(cost, 150)
-        candidate = {
-            (0, 1): 12.0,
-            (0, 2): 8.0,
-            (1, 3): 4.0,
-            (1, 2): 8.0,
-            (2, 3): 15.0,
-            (2, 4): 1.0,
-            (3, 4): 14.0,
-        }
-        candidate2 = {
-            (0, 1): 12.0,
-            (0, 2): 8.0,
-            (1, 3): 4.0,
-            (1, 2): 8.0,
-            (2, 3): 11.0,
-            (2, 4): 5.0,
-            (3, 4): 10.0,
-        }
-        self.assertTrue(check_solution_pandas(sol, [candidate, candidate2]))
+        candidate1 = pd.DataFrame(
+            {
+                "source": [0, 0, 1, 1, 2, 2, 3],
+                "target": [1, 2, 3, 2, 3, 4, 4],
+                "capacity": [15, 8, 4, 20, 15, 5, 20],
+                "cost": [4, 4, 2, 2, 1, 3, 2],
+                "flow": [12.0, 8.0, 4.0, 8.0, 15.0, 1.0, 14.0],
+            }
+        )
+        candidate2 = pd.DataFrame(
+            {
+                "source": [0, 0, 1, 1, 2, 2, 3],
+                "target": [1, 2, 3, 2, 3, 4, 4],
+                "capacity": [15, 8, 4, 20, 15, 5, 20],
+                "cost": [4, 4, 2, 2, 1, 3, 2],
+                "flow": [12.0, 8.0, 4.0, 8.0, 11.0, 5.0, 10.0],
+            }
+        )
+
+        self.assertTrue(check_solution_pandas(sol, [candidate1, candidate2]))
 
     def test_scipy(self):
         G, cap, cost, demands = load_graph2_scipy()

diff --git a/tests/test_min_cut.py b/tests/test_min_cut.py
@@ -138,7 +138,7 @@ def setUp(self):
                 {"source": 3, "target": 6, "capacity": 2.0},
                 {"source": 5, "target": 6, "capacity": 3.0},
             ]
-        ).set_index(["source", "target"])
+        )
 
     def test_pandas(self):
         res = min_cut(self.arc_data, 0, 6)