Cleaned up the implementation of minimax and alphabeta. Added one mor…

…e test. Trigger CI
intelligent-systems-course · Nov 14, 2023 · d525cd0 · d525cd0
1 parent dabf4be
commit d525cd0
Show file tree

Hide file tree

Showing 4 changed files with 130 additions and 89 deletions.
diff --git a/src/schnapsen/bots/alphabeta.py b/src/schnapsen/bots/alphabeta.py
@@ -11,7 +11,7 @@
     GamePlayEngine,
     SchnapsenTrickScorer,
 )
-from .minimax import OneFixedMoveBot
+
 
 class AlphaBetaBot(Bot):
     """
@@ -35,16 +35,13 @@ def __init__(self) -> None:
         super().__init__()
 
     def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
-        assert (
-            perspective.get_phase() == GamePhase.TWO
-        ), "AlphaBetaBot can only work in the second phase of the game."
+        assert (perspective.get_phase() == GamePhase.TWO), "AlphaBetaBot can only work in the second phase of the game."
         _, move = self.value(
             perspective.get_state_in_phase_two(),
             perspective.get_engine(),
             leader_move=leader_move,
             maximizing=True,
         )
-        assert move
         return move
 
     def value(
@@ -55,7 +52,7 @@ def value(
         maximizing: bool,
         alpha: float = float("-inf"),
         beta: float = float("inf"),
-    ) -> tuple[float, Optional[Move]]:
+    ) -> tuple[float, Move]:
         my_perspective: PlayerPerspective
         if leader_move is None:
             # we are the leader
@@ -70,7 +67,7 @@ def value(
             leader: Bot
             follower: Bot
             if leader_move is None:
-                # we are leader,
+                # we are leader, call self to get the follower to play
                 value, _ = self.value(
                     state=state,
                     engine=engine,
@@ -80,20 +77,16 @@ def value(
                     beta=beta,
                 )
             else:
-                # We are the follower.
+                # We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader
                 leader = OneFixedMoveBot(leader_move)
                 follower = OneFixedMoveBot(move)
-                new_game_state, rounds = engine.play_at_most_n_tricks(
-                    game_state=state, new_leader=leader, new_follower=follower, n=1
-                )
-                assert rounds == 1
+                new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower)
                 winning_info = SchnapsenTrickScorer().declare_winner(new_game_state)
                 if winning_info:
                     winner = winning_info[0].implementation
                     points = winning_info[1]
                     follower_wins = winner == follower
-                    if not follower_wins:
-                        assert winner == leader
+
                     if not follower_wins:
                         points = -points
                     if not maximizing:
@@ -104,12 +97,13 @@ def value(
                     leader_stayed = leader == new_game_state.leader.implementation
 
                     if leader_stayed:
+                        # At the next step, the leader is our opponent, and it will be doing the opposite of what we do.
                         next_maximizing = not maximizing
                     else:  # if not leader_stayed
+                        # At the next step we will have become the leader, so we will keep doing what we did
                         next_maximizing = maximizing
-                    value, _ = self.value(
-                        new_game_state, engine, None, next_maximizing, alpha, beta
-                    )
+                    # implementation note: the previous two case could be written with a xor, but this seemed more readable
+                    value, _ = self.value(new_game_state, engine, None, next_maximizing, alpha, beta)
             if maximizing:
                 if value > best_value:
                     best_move = move
@@ -124,5 +118,16 @@ def value(
                 beta = min(beta, best_value)  # alphabeta pruning
                 if beta <= alpha:
                     break
+        assert best_move  # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake
         return best_value, best_move
 
+
+class OneFixedMoveBot(Bot):
+    def __init__(self, move: Move) -> None:
+        self.first_move: Optional[Move] = move
+
+    def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+        assert self.first_move, "This bot can only play one move, after that it ends"
+        move = self.first_move
+        self.first_move = None
+        return move
diff --git a/src/schnapsen/bots/minimax.py b/src/schnapsen/bots/minimax.py
@@ -12,6 +12,7 @@
     SchnapsenTrickScorer,
 )
 
+
 class MiniMaxBot(Bot):
     """
     A bot playing the minimax strategy in the second phase of the game.
@@ -39,7 +40,6 @@ def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move])
             leader_move=leader_move,
             maximizing=True,
         )
-        assert move
         return move
 
     def value(
@@ -48,7 +48,7 @@ def value(
         engine: GamePlayEngine,
         leader_move: Optional[Move],
         maximizing: bool,
-    ) -> tuple[float, Optional[Move]]:
+    ) -> tuple[float, Move]:
         """Get the score and the corresponding move which eithers maxmizes or minimizes the objective.
 
         Args:
@@ -74,7 +74,7 @@ def value(
             leader: Bot
             follower: Bot
             if leader_move is None:
-                # call self to get the follower to play
+                # we are leader, call self to get the follower to play
                 value, _ = self.value(
                     state=state,
                     engine=engine,
@@ -85,17 +85,13 @@ def value(
                 # We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader
                 leader = OneFixedMoveBot(leader_move)
                 follower = OneFixedMoveBot(move)
-                new_game_state, rounds = engine.play_at_most_n_tricks(
-                    game_state=state, new_leader=leader, new_follower=follower, n=1
-                )
-                assert rounds == 1
+                new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower)
                 winning_info = SchnapsenTrickScorer().declare_winner(new_game_state)
                 if winning_info:
                     winner = winning_info[0].implementation
                     points = winning_info[1]
                     follower_wins = winner == follower
-                    if not follower_wins:
-                        assert winner == leader
+
                     if not follower_wins:
                         points = -points
                     if not maximizing:
@@ -119,6 +115,7 @@ def value(
             elif not maximizing and value < best_value:
                 best_move = move
                 best_value = value
+        assert best_move  # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake
         return best_value, best_move
 
 
@@ -131,4 +128,3 @@ def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move])
         move = self.first_move
         self.first_move = None
         return move
-
diff --git a/src/schnapsen/game.py b/src/schnapsen/game.py
@@ -1525,6 +1525,40 @@ def play_game(self, bot1: Bot, bot2: Bot, rng: Random) -> tuple[Bot, int, Score]
         winner, points, score = self.play_game_from_state(game_state=game_state, leader_move=None)
         return winner, points, score
 
+    def get_random_phase_two_state(self, rng: Random) -> GameState:
+        class RandBot(Bot):
+            def __init__(self, rand: Random, name: Optional[str] = None) -> None:
+                super().__init__(name)
+                self.rng = rand
+
+            def get_move(
+                self,
+                perspective: PlayerPerspective,
+                leader_move: Optional[Move],
+            ) -> Move:
+                moves: list[Move] = perspective.valid_moves()
+                move = self.rng.choice(moves)
+                return move
+
+        while True:
+            cards = self.deck_generator.get_initial_deck()
+            shuffled = self.deck_generator.shuffle_deck(cards, rng)
+            hand1, hand2, talon = self.hand_generator.generateHands(shuffled)
+            leader_state = BotState(implementation=RandBot(rand=rng), hand=hand1)
+            follower_state = BotState(implementation=RandBot(rand=rng), hand=hand2)
+            game_state = GameState(
+                leader=leader_state,
+                follower=follower_state,
+                talon=talon,
+                previous=None
+            )
+            second_phase_state, _ = self.play_at_most_n_tricks(game_state, RandBot(rand=rng), RandBot(rand=rng), 5)
+            winner = self.trick_scorer.declare_winner(second_phase_state)
+            if winner:
+                continue
+            if second_phase_state.game_phase() == GamePhase.TWO:
+                return second_phase_state
+
     def play_game_from_state_with_new_bots(self, game_state: GameState, new_leader: Bot, new_follower: Bot, leader_move: Optional[Move]) -> tuple[Bot, int, Score]:
         """
         Continue a game  which might have started before with other bots, with new bots.
@@ -1571,6 +1605,23 @@ def play_game_from_state(self, game_state: GameState, leader_move: Optional[Move
 
         return winner.implementation, points, winner.score
 
+    def play_one_trick(self, game_state: GameState, new_leader: Bot, new_follower: Bot) -> GameState:
+        """
+        Plays one tricks (including the one started by the leader, if provided) on a game which might have started before.
+        The new bots are new_leader and new_follower.
+
+        This method does not make changes to the provided game_state.
+
+        :param game_state: The state of the game to start from
+        :param new_leader: The bot which will take the leader role in the game.
+        :param new_follower: The bot which will take the follower in the game.
+
+        :returns: The GameState reached and the number of steps actually taken.
+        """
+        state, rounds = self.play_at_most_n_tricks(game_state, new_leader, new_follower, 1)
+        assert rounds == 1
+        return state
+
     def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_follower: Bot, n: int) -> tuple[GameState, int]:
         """
         Plays up to n tricks (including the one started by the leader, if provided) on a game which might have started before.
@@ -1582,6 +1633,7 @@ def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_foll
         :param game_state: The state of the game to start from
         :param new_leader: The bot which will take the leader role in the game.
         :param new_follower: The bot which will take the follower in the game.
+        :param n: the maximum number of tricks to play
 
         :returns: The GameState reached and the number of steps actually taken.
         """