Skip to content

Commit

Permalink
Merge pull request #74 from intelligent-systems-course/minimax
Browse files Browse the repository at this point in the history
Implementation of minimax.
  • Loading branch information
miselico authored Nov 14, 2023
2 parents 2802313 + d525cd0 commit 742f014
Show file tree
Hide file tree
Showing 5 changed files with 611 additions and 5 deletions.
3 changes: 2 additions & 1 deletion src/schnapsen/bots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
from .rdeep import RdeepBot
from .ml_bot import MLDataBot, MLPlayingBot, train_ML_model
from .gui.guibot import SchnapsenServer
from .minimax import MiniMaxBot

__all__ = ["RandBot", "AlphaBetaBot", "RdeepBot", "MLDataBot", "MLPlayingBot", "train_ML_model", "SchnapsenServer"]
__all__ = ["RandBot", "AlphaBetaBot", "RdeepBot", "MLDataBot", "MLPlayingBot", "train_ML_model", "SchnapsenServer", "MiniMaxBot"]
130 changes: 126 additions & 4 deletions src/schnapsen/bots/alphabeta.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,133 @@
from typing import Optional

from schnapsen.game import Bot, Move, PlayerPerspective
from schnapsen.game import (
Bot,
Move,
PlayerPerspective,
GamePhase,
GameState,
FollowerPerspective,
LeaderPerspective,
GamePlayEngine,
SchnapsenTrickScorer,
)


class AlphaBetaBot(Bot):
def __init__(self, name: Optional[str] = None) -> None:
super().__init__(name)
"""
A bot playing the alphabeta strategy in the second phase of the game.
It cannot be used for the first phase. What you can do is delegate from your own bot
to this one in the second phase.
This would look something like:
class YourBot(Bot):
def __init__(self):
self.delegate_phase2 = MiniMaxBot()
def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move:
if state.get_phase() == GamePhase.TWO:
return self.delegate_phase2.get_move(state, leader_move)
else:
# The logic of your bot
"""

def __init__(self) -> None:
super().__init__()

def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
assert (perspective.get_phase() == GamePhase.TWO), "AlphaBetaBot can only work in the second phase of the game."
_, move = self.value(
perspective.get_state_in_phase_two(),
perspective.get_engine(),
leader_move=leader_move,
maximizing=True,
)
return move

def value(
self,
state: GameState,
engine: GamePlayEngine,
leader_move: Optional[Move],
maximizing: bool,
alpha: float = float("-inf"),
beta: float = float("inf"),
) -> tuple[float, Move]:
my_perspective: PlayerPerspective
if leader_move is None:
# we are the leader
my_perspective = LeaderPerspective(state, engine)
else:
my_perspective = FollowerPerspective(state, engine, leader_move)
valid_moves = my_perspective.valid_moves()

best_value = float("-inf") if maximizing else float("inf")
best_move: Optional[Move] = None
for move in valid_moves:
leader: Bot
follower: Bot
if leader_move is None:
# we are leader, call self to get the follower to play
value, _ = self.value(
state=state,
engine=engine,
leader_move=move,
maximizing=not maximizing,
alpha=alpha,
beta=beta,
)
else:
# We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader
leader = OneFixedMoveBot(leader_move)
follower = OneFixedMoveBot(move)
new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower)
winning_info = SchnapsenTrickScorer().declare_winner(new_game_state)
if winning_info:
winner = winning_info[0].implementation
points = winning_info[1]
follower_wins = winner == follower

if not follower_wins:
points = -points
if not maximizing:
points = -points
value = points
else:
# play the next round by doing a recursive call
leader_stayed = leader == new_game_state.leader.implementation

if leader_stayed:
# At the next step, the leader is our opponent, and it will be doing the opposite of what we do.
next_maximizing = not maximizing
else: # if not leader_stayed
# At the next step we will have become the leader, so we will keep doing what we did
next_maximizing = maximizing
# implementation note: the previous two case could be written with a xor, but this seemed more readable
value, _ = self.value(new_game_state, engine, None, next_maximizing, alpha, beta)
if maximizing:
if value > best_value:
best_move = move
best_value = value
alpha = max(alpha, best_value) # alphabeta pruning
if beta <= alpha:
break
else:
if value < best_value:
best_move = move
best_value = value
beta = min(beta, best_value) # alphabeta pruning
if beta <= alpha:
break
assert best_move # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake
return best_value, best_move


class OneFixedMoveBot(Bot):
def __init__(self, move: Move) -> None:
self.first_move: Optional[Move] = move

def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
raise NotImplementedError()
assert self.first_move, "This bot can only play one move, after that it ends"
move = self.first_move
self.first_move = None
return move
130 changes: 130 additions & 0 deletions src/schnapsen/bots/minimax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from typing import Optional

from schnapsen.game import (
Bot,
Move,
PlayerPerspective,
GamePhase,
GameState,
FollowerPerspective,
LeaderPerspective,
GamePlayEngine,
SchnapsenTrickScorer,
)


class MiniMaxBot(Bot):
"""
A bot playing the minimax strategy in the second phase of the game.
It cannot be used for the first phase. What you can do is delegate from your own bot to this one in the second phase.
This would look something like:
class YourBot(Bot):
def __init__(self):
self.delegate_phase2 = MiniMaxBot()
def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move:
if state.get_phase() == GamePhase.TWO:
return self.delegate_phase2.get_move(state, leader_move)
else:
# The logic of your bot
"""

def __init__(self) -> None:
super().__init__()

def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
assert (perspective.get_phase() == GamePhase.TWO), "MiniMaxBot can only work in the second phase of the game."
_, move = self.value(
perspective.get_state_in_phase_two(),
perspective.get_engine(),
leader_move=leader_move,
maximizing=True,
)
return move

def value(
self,
state: GameState,
engine: GamePlayEngine,
leader_move: Optional[Move],
maximizing: bool,
) -> tuple[float, Move]:
"""Get the score and the corresponding move which eithers maxmizes or minimizes the objective.
Args:
state (GameState): The current state of the game
engine (GamePlayEngine): _description_
leader_move (Optional[Move]): _description_
maximizing (bool): _description_
Returns:
tuple[float, Optional[Move]]: _description_
"""
my_perspective: PlayerPerspective
if leader_move is None:
# we are the leader
my_perspective = LeaderPerspective(state, engine)
else:
my_perspective = FollowerPerspective(state, engine, leader_move)
valid_moves = my_perspective.valid_moves()

best_value = float("-inf") if maximizing else float("inf")
best_move: Optional[Move] = None
for move in valid_moves:
leader: Bot
follower: Bot
if leader_move is None:
# we are leader, call self to get the follower to play
value, _ = self.value(
state=state,
engine=engine,
leader_move=move,
maximizing=not maximizing,
)
else:
# We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader
leader = OneFixedMoveBot(leader_move)
follower = OneFixedMoveBot(move)
new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower)
winning_info = SchnapsenTrickScorer().declare_winner(new_game_state)
if winning_info:
winner = winning_info[0].implementation
points = winning_info[1]
follower_wins = winner == follower

if not follower_wins:
points = -points
if not maximizing:
points = -points
value = points
else:
# play the next round by doing a recursive call
leader_stayed = leader == new_game_state.leader.implementation

if leader_stayed:
# At the next step, the leader is our opponent, and it will be doing the opposite of what we do.
next_maximizing = not maximizing
else: # if not leader_stayed
# At the next step we will have become the leader, so we will keep doing what we did
next_maximizing = maximizing
# implementation note: the previous two case could be written with a xor, but this seemed more readable
value, _ = self.value(new_game_state, engine, None, next_maximizing)
if maximizing and value > best_value:
best_move = move
best_value = value
elif not maximizing and value < best_value:
best_move = move
best_value = value
assert best_move # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake
return best_value, best_move


class OneFixedMoveBot(Bot):
def __init__(self, move: Move) -> None:
self.first_move: Optional[Move] = move

def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
assert self.first_move, "This bot can only play one move, after that it ends"
move = self.first_move
self.first_move = None
return move
52 changes: 52 additions & 0 deletions src/schnapsen/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -1525,6 +1525,40 @@ def play_game(self, bot1: Bot, bot2: Bot, rng: Random) -> tuple[Bot, int, Score]
winner, points, score = self.play_game_from_state(game_state=game_state, leader_move=None)
return winner, points, score

def get_random_phase_two_state(self, rng: Random) -> GameState:
class RandBot(Bot):
def __init__(self, rand: Random, name: Optional[str] = None) -> None:
super().__init__(name)
self.rng = rand

def get_move(
self,
perspective: PlayerPerspective,
leader_move: Optional[Move],
) -> Move:
moves: list[Move] = perspective.valid_moves()
move = self.rng.choice(moves)
return move

while True:
cards = self.deck_generator.get_initial_deck()
shuffled = self.deck_generator.shuffle_deck(cards, rng)
hand1, hand2, talon = self.hand_generator.generateHands(shuffled)
leader_state = BotState(implementation=RandBot(rand=rng), hand=hand1)
follower_state = BotState(implementation=RandBot(rand=rng), hand=hand2)
game_state = GameState(
leader=leader_state,
follower=follower_state,
talon=talon,
previous=None
)
second_phase_state, _ = self.play_at_most_n_tricks(game_state, RandBot(rand=rng), RandBot(rand=rng), 5)
winner = self.trick_scorer.declare_winner(second_phase_state)
if winner:
continue
if second_phase_state.game_phase() == GamePhase.TWO:
return second_phase_state

def play_game_from_state_with_new_bots(self, game_state: GameState, new_leader: Bot, new_follower: Bot, leader_move: Optional[Move]) -> tuple[Bot, int, Score]:
"""
Continue a game which might have started before with other bots, with new bots.
Expand Down Expand Up @@ -1571,6 +1605,23 @@ def play_game_from_state(self, game_state: GameState, leader_move: Optional[Move

return winner.implementation, points, winner.score

def play_one_trick(self, game_state: GameState, new_leader: Bot, new_follower: Bot) -> GameState:
"""
Plays one tricks (including the one started by the leader, if provided) on a game which might have started before.
The new bots are new_leader and new_follower.
This method does not make changes to the provided game_state.
:param game_state: The state of the game to start from
:param new_leader: The bot which will take the leader role in the game.
:param new_follower: The bot which will take the follower in the game.
:returns: The GameState reached and the number of steps actually taken.
"""
state, rounds = self.play_at_most_n_tricks(game_state, new_leader, new_follower, 1)
assert rounds == 1
return state

def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_follower: Bot, n: int) -> tuple[GameState, int]:
"""
Plays up to n tricks (including the one started by the leader, if provided) on a game which might have started before.
Expand All @@ -1582,6 +1633,7 @@ def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_foll
:param game_state: The state of the game to start from
:param new_leader: The bot which will take the leader role in the game.
:param new_follower: The bot which will take the follower in the game.
:param n: the maximum number of tricks to play
:returns: The GameState reached and the number of steps actually taken.
"""
Expand Down
Loading

0 comments on commit 742f014

Please sign in to comment.