Merge pull request #74 from intelligent-systems-course/minimax

Implementation of minimax.
intelligent-systems-course · Nov 14, 2023 · 742f014 · 742f014
2 parents 2802313 + d525cd0
commit 742f014
Show file tree

Hide file tree

Showing 5 changed files with 611 additions and 5 deletions.
diff --git a/src/schnapsen/bots/__init__.py b/src/schnapsen/bots/__init__.py
@@ -6,5 +6,6 @@
 from .rdeep import RdeepBot
 from .ml_bot import MLDataBot, MLPlayingBot, train_ML_model
 from .gui.guibot import SchnapsenServer
+from .minimax import MiniMaxBot
 
-__all__ = ["RandBot", "AlphaBetaBot", "RdeepBot", "MLDataBot", "MLPlayingBot", "train_ML_model", "SchnapsenServer"]
+__all__ = ["RandBot", "AlphaBetaBot", "RdeepBot", "MLDataBot", "MLPlayingBot", "train_ML_model", "SchnapsenServer", "MiniMaxBot"]
diff --git a/src/schnapsen/bots/alphabeta.py b/src/schnapsen/bots/alphabeta.py
@@ -1,11 +1,133 @@
 from typing import Optional
 
-from schnapsen.game import Bot, Move, PlayerPerspective
+from schnapsen.game import (
+    Bot,
+    Move,
+    PlayerPerspective,
+    GamePhase,
+    GameState,
+    FollowerPerspective,
+    LeaderPerspective,
+    GamePlayEngine,
+    SchnapsenTrickScorer,
+)
 
 
 class AlphaBetaBot(Bot):
-    def __init__(self, name: Optional[str] = None) -> None:
-        super().__init__(name)
+    """
+    A bot playing the alphabeta strategy in the second phase of the game.
+    It cannot be used for the first phase. What you can do is delegate from your own bot
+    to this one in the second phase.
+
+    This would look something like:
+
+    class YourBot(Bot):
+        def __init__(self):
+            self.delegate_phase2 = MiniMaxBot()
+        def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+            if state.get_phase() == GamePhase.TWO:
+                return self.delegate_phase2.get_move(state, leader_move)
+            else:
+                # The logic of your bot
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+        assert (perspective.get_phase() == GamePhase.TWO), "AlphaBetaBot can only work in the second phase of the game."
+        _, move = self.value(
+            perspective.get_state_in_phase_two(),
+            perspective.get_engine(),
+            leader_move=leader_move,
+            maximizing=True,
+        )
+        return move
+
+    def value(
+        self,
+        state: GameState,
+        engine: GamePlayEngine,
+        leader_move: Optional[Move],
+        maximizing: bool,
+        alpha: float = float("-inf"),
+        beta: float = float("inf"),
+    ) -> tuple[float, Move]:
+        my_perspective: PlayerPerspective
+        if leader_move is None:
+            # we are the leader
+            my_perspective = LeaderPerspective(state, engine)
+        else:
+            my_perspective = FollowerPerspective(state, engine, leader_move)
+        valid_moves = my_perspective.valid_moves()
+
+        best_value = float("-inf") if maximizing else float("inf")
+        best_move: Optional[Move] = None
+        for move in valid_moves:
+            leader: Bot
+            follower: Bot
+            if leader_move is None:
+                # we are leader, call self to get the follower to play
+                value, _ = self.value(
+                    state=state,
+                    engine=engine,
+                    leader_move=move,
+                    maximizing=not maximizing,
+                    alpha=alpha,
+                    beta=beta,
+                )
+            else:
+                # We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader
+                leader = OneFixedMoveBot(leader_move)
+                follower = OneFixedMoveBot(move)
+                new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower)
+                winning_info = SchnapsenTrickScorer().declare_winner(new_game_state)
+                if winning_info:
+                    winner = winning_info[0].implementation
+                    points = winning_info[1]
+                    follower_wins = winner == follower
+
+                    if not follower_wins:
+                        points = -points
+                    if not maximizing:
+                        points = -points
+                    value = points
+                else:
+                    # play the next round by doing a recursive call
+                    leader_stayed = leader == new_game_state.leader.implementation
+
+                    if leader_stayed:
+                        # At the next step, the leader is our opponent, and it will be doing the opposite of what we do.
+                        next_maximizing = not maximizing
+                    else:  # if not leader_stayed
+                        # At the next step we will have become the leader, so we will keep doing what we did
+                        next_maximizing = maximizing
+                    # implementation note: the previous two case could be written with a xor, but this seemed more readable
+                    value, _ = self.value(new_game_state, engine, None, next_maximizing, alpha, beta)
+            if maximizing:
+                if value > best_value:
+                    best_move = move
+                    best_value = value
+                alpha = max(alpha, best_value)  # alphabeta pruning
+                if beta <= alpha:
+                    break
+            else:
+                if value < best_value:
+                    best_move = move
+                    best_value = value
+                beta = min(beta, best_value)  # alphabeta pruning
+                if beta <= alpha:
+                    break
+        assert best_move  # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake
+        return best_value, best_move
+
+
+class OneFixedMoveBot(Bot):
+    def __init__(self, move: Move) -> None:
+        self.first_move: Optional[Move] = move
 
     def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
-        raise NotImplementedError()
+        assert self.first_move, "This bot can only play one move, after that it ends"
+        move = self.first_move
+        self.first_move = None
+        return move
diff --git a/src/schnapsen/bots/minimax.py b/src/schnapsen/bots/minimax.py
@@ -0,0 +1,130 @@
+from typing import Optional
+
+from schnapsen.game import (
+    Bot,
+    Move,
+    PlayerPerspective,
+    GamePhase,
+    GameState,
+    FollowerPerspective,
+    LeaderPerspective,
+    GamePlayEngine,
+    SchnapsenTrickScorer,
+)
+
+
+class MiniMaxBot(Bot):
+    """
+    A bot playing the minimax strategy in the second phase of the game.
+    It cannot be used for the first phase. What you can do is delegate from your own bot to this one in the second phase.
+    This would look something like:
+
+    class YourBot(Bot):
+        def __init__(self):
+            self.delegate_phase2 = MiniMaxBot()
+        def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+            if state.get_phase() == GamePhase.TWO:
+                return self.delegate_phase2.get_move(state, leader_move)
+            else:
+                # The logic of your bot
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+        assert (perspective.get_phase() == GamePhase.TWO), "MiniMaxBot can only work in the second phase of the game."
+        _, move = self.value(
+            perspective.get_state_in_phase_two(),
+            perspective.get_engine(),
+            leader_move=leader_move,
+            maximizing=True,
+        )
+        return move
+
+    def value(
+        self,
+        state: GameState,
+        engine: GamePlayEngine,
+        leader_move: Optional[Move],
+        maximizing: bool,
+    ) -> tuple[float, Move]:
+        """Get the score and the corresponding move which eithers maxmizes or minimizes the objective.
+
+        Args:
+            state (GameState): The current state of the game
+            engine (GamePlayEngine): _description_
+            leader_move (Optional[Move]): _description_
+            maximizing (bool): _description_
+
+        Returns:
+            tuple[float, Optional[Move]]: _description_
+        """
+        my_perspective: PlayerPerspective
+        if leader_move is None:
+            # we are the leader
+            my_perspective = LeaderPerspective(state, engine)
+        else:
+            my_perspective = FollowerPerspective(state, engine, leader_move)
+        valid_moves = my_perspective.valid_moves()
+
+        best_value = float("-inf") if maximizing else float("inf")
+        best_move: Optional[Move] = None
+        for move in valid_moves:
+            leader: Bot
+            follower: Bot
+            if leader_move is None:
+                # we are leader, call self to get the follower to play
+                value, _ = self.value(
+                    state=state,
+                    engine=engine,
+                    leader_move=move,
+                    maximizing=not maximizing,
+                )
+            else:
+                # We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader
+                leader = OneFixedMoveBot(leader_move)
+                follower = OneFixedMoveBot(move)
+                new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower)
+                winning_info = SchnapsenTrickScorer().declare_winner(new_game_state)
+                if winning_info:
+                    winner = winning_info[0].implementation
+                    points = winning_info[1]
+                    follower_wins = winner == follower
+
+                    if not follower_wins:
+                        points = -points
+                    if not maximizing:
+                        points = -points
+                    value = points
+                else:
+                    # play the next round by doing a recursive call
+                    leader_stayed = leader == new_game_state.leader.implementation
+
+                    if leader_stayed:
+                        # At the next step, the leader is our opponent, and it will be doing the opposite of what we do.
+                        next_maximizing = not maximizing
+                    else:  # if not leader_stayed
+                        # At the next step we will have become the leader, so we will keep doing what we did
+                        next_maximizing = maximizing
+                    # implementation note: the previous two case could be written with a xor, but this seemed more readable
+                    value, _ = self.value(new_game_state, engine, None, next_maximizing)
+            if maximizing and value > best_value:
+                best_move = move
+                best_value = value
+            elif not maximizing and value < best_value:
+                best_move = move
+                best_value = value
+        assert best_move  # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake
+        return best_value, best_move
+
+
+class OneFixedMoveBot(Bot):
+    def __init__(self, move: Move) -> None:
+        self.first_move: Optional[Move] = move
+
+    def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+        assert self.first_move, "This bot can only play one move, after that it ends"
+        move = self.first_move
+        self.first_move = None
+        return move
diff --git a/src/schnapsen/game.py b/src/schnapsen/game.py
@@ -1525,6 +1525,40 @@ def play_game(self, bot1: Bot, bot2: Bot, rng: Random) -> tuple[Bot, int, Score]
         winner, points, score = self.play_game_from_state(game_state=game_state, leader_move=None)
         return winner, points, score
 
+    def get_random_phase_two_state(self, rng: Random) -> GameState:
+        class RandBot(Bot):
+            def __init__(self, rand: Random, name: Optional[str] = None) -> None:
+                super().__init__(name)
+                self.rng = rand
+
+            def get_move(
+                self,
+                perspective: PlayerPerspective,
+                leader_move: Optional[Move],
+            ) -> Move:
+                moves: list[Move] = perspective.valid_moves()
+                move = self.rng.choice(moves)
+                return move
+
+        while True:
+            cards = self.deck_generator.get_initial_deck()
+            shuffled = self.deck_generator.shuffle_deck(cards, rng)
+            hand1, hand2, talon = self.hand_generator.generateHands(shuffled)
+            leader_state = BotState(implementation=RandBot(rand=rng), hand=hand1)
+            follower_state = BotState(implementation=RandBot(rand=rng), hand=hand2)
+            game_state = GameState(
+                leader=leader_state,
+                follower=follower_state,
+                talon=talon,
+                previous=None
+            )
+            second_phase_state, _ = self.play_at_most_n_tricks(game_state, RandBot(rand=rng), RandBot(rand=rng), 5)
+            winner = self.trick_scorer.declare_winner(second_phase_state)
+            if winner:
+                continue
+            if second_phase_state.game_phase() == GamePhase.TWO:
+                return second_phase_state
+
     def play_game_from_state_with_new_bots(self, game_state: GameState, new_leader: Bot, new_follower: Bot, leader_move: Optional[Move]) -> tuple[Bot, int, Score]:
         """
         Continue a game  which might have started before with other bots, with new bots.
@@ -1571,6 +1605,23 @@ def play_game_from_state(self, game_state: GameState, leader_move: Optional[Move
 
         return winner.implementation, points, winner.score
 
+    def play_one_trick(self, game_state: GameState, new_leader: Bot, new_follower: Bot) -> GameState:
+        """
+        Plays one tricks (including the one started by the leader, if provided) on a game which might have started before.
+        The new bots are new_leader and new_follower.
+
+        This method does not make changes to the provided game_state.
+
+        :param game_state: The state of the game to start from
+        :param new_leader: The bot which will take the leader role in the game.
+        :param new_follower: The bot which will take the follower in the game.
+
+        :returns: The GameState reached and the number of steps actually taken.
+        """
+        state, rounds = self.play_at_most_n_tricks(game_state, new_leader, new_follower, 1)
+        assert rounds == 1
+        return state
+
     def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_follower: Bot, n: int) -> tuple[GameState, int]:
         """
         Plays up to n tricks (including the one started by the leader, if provided) on a game which might have started before.
@@ -1582,6 +1633,7 @@ def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_foll
         :param game_state: The state of the game to start from
         :param new_leader: The bot which will take the leader role in the game.
         :param new_follower: The bot which will take the follower in the game.
+        :param n: the maximum number of tricks to play
 
         :returns: The GameState reached and the number of steps actually taken.
         """