Skip to content

Commit

Permalink
Merge pull request #1123 from rezunli96:stackelberg
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 574116764
Change-Id: Ic1d18d4ecd7de9c54cf40cd8f09f7b6f00e32191
  • Loading branch information
lanctot committed Oct 17, 2023
2 parents 41bdb68 + 40aad58 commit 411ad80
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 30 deletions.
54 changes: 25 additions & 29 deletions open_spiel/python/algorithms/stackelberg_lp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
EC'06
"""

import cvxpy as cp
import numpy as np

from open_spiel.python.algorithms import lp_solver
from open_spiel.python.algorithms.projected_replicator_dynamics import _simplex_projection
from open_spiel.python.egt.utils import game_payoffs_array

Expand Down Expand Up @@ -51,39 +51,35 @@ def solve_stackelberg(game, is_first_leader=True):
follower_eq_strategy = None

for t in range(num_follower_strategies):
lp = lp_solver.LinearProgram(objective=lp_solver.OBJ_MAX)
for s in range(num_leader_strategies):
lp.add_or_reuse_variable("s_{}".format(s))
lp.set_obj_coeff("s_{}".format(s), leader_payoff[s, t])

p_s = cp.Variable(num_leader_strategies, nonneg=True)
constraints = [p_s <= 1, cp.sum(p_s) == 1]
for t_ in range(num_follower_strategies):
if t_ == t:
continue
lp.add_or_reuse_constraint("t_{}".format(t_), lp_solver.CONS_TYPE_GEQ)
for s in range(num_leader_strategies):
lp.set_cons_coeff("t_{}".format(t_), "s_{}".format(s),
follower_payoff[s, t] - follower_payoff[s, t_])
lp.set_cons_rhs("t_{}".format(t_), 0.0)
lp.add_or_reuse_constraint("sum_to_one", lp_solver.CONS_TYPE_EQ)
for s in range(num_leader_strategies):
lp.set_cons_coeff("sum_to_one", "s_{}".format(s), 1.0)
lp.set_cons_rhs("sum_to_one", 1.0)
try:
leader_strategy = np.array(lp.solve())
leader_strategy = _simplex_projection(
leader_strategy.reshape(-1)).reshape(-1, 1)
leader_value = leader_strategy.T.dot(leader_payoff)[0, t]
if leader_value > leader_eq_value:
leader_eq_strategy = leader_strategy
follower_eq_strategy = t
leader_eq_value = leader_value
follower_eq_value = leader_strategy.T.dot(follower_payoff)[0, t]
except: # pylint: disable=bare-except
constraints.append(
p_s @ follower_payoff[:, t_] <= p_s @ follower_payoff[:, t]
)
prob = cp.Problem(cp.Maximize(p_s @ leader_payoff[:, t]), constraints)
prob.solve()
p_s_value = p_s.value
if p_s_value is None:
continue
leader_strategy = _simplex_projection(p_s.value.reshape(-1)).reshape(-1, 1)
leader_value = leader_strategy.T.dot(leader_payoff)[0, t]
if leader_value > leader_eq_value:
leader_eq_strategy = leader_strategy
follower_eq_strategy = t
leader_eq_value = leader_value
follower_eq_value = leader_strategy.T.dot(follower_payoff)[0, t]

assert leader_eq_strategy is not None, p_mat
if is_first_leader:
return (leader_eq_strategy.reshape(-1), np.identity(
num_follower_strategies)[follower_eq_strategy],
leader_eq_value, follower_eq_value)
return (
leader_eq_strategy.reshape(-1),
np.identity(num_follower_strategies)[follower_eq_strategy],
leader_eq_value,
follower_eq_value,
)
else:
return (np.identity(num_follower_strategies)[follower_eq_strategy],
leader_eq_strategy.reshape(-1), follower_eq_value, leader_eq_value)
11 changes: 10 additions & 1 deletion open_spiel/python/algorithms/stackelberg_lp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from open_spiel.python.egt.utils import game_payoffs_array
import pyspiel

# Numerical tolerance for tests.
EPS = 1e-6

# game instances based on Conitzer & Sandholm'06 paper
game0 = pyspiel.create_matrix_game([[2, 4], [1, 3]], [[1, 0], [0, 1]])
commit_strategy0 = np.array([0.5, 0.5])
Expand All @@ -32,12 +35,18 @@
commit_strategy1 = np.array([1 / 3, 2 / 3])
commit_value1 = 4 / 3

# a game with dominated strategy
game2 = pyspiel.create_matrix_game([[3, 9], [9, 1]], [[0, 0], [1, 8]])
commit_strategy2 = np.array([1.0, 0.0])
commit_value2 = 9.0


class StackelbergLPTest(parameterized.TestCase):

@parameterized.named_parameters(
("game0", game0, commit_strategy0, commit_value0),
("game1", game1, commit_strategy1, commit_value1),
("game2", game2, commit_strategy2, commit_value2),
)
def test_simple_games(self, game, commit_strategy, commit_value):
leader_eq_strategy, _, leader_eq_value, _ = solve_stackelberg(game)
Expand All @@ -53,7 +62,7 @@ def test_simple_games(self, game, commit_strategy, commit_value):
leader_nash_value = eq[0].reshape(1,
-1).dot(p_mat[0]).dot(eq[1].reshape(
-1, 1))
self.assertGreaterEqual(leader_eq_value, leader_nash_value)
self.assertGreaterEqual(leader_eq_value - leader_nash_value, -EPS)


if __name__ == "__main__":
Expand Down

0 comments on commit 411ad80

Please sign in to comment.