From d525cd05831c6f5ca2edbddeaf2b9f00d17d3c50 Mon Sep 17 00:00:00 2001
From: Michael Cochez <miselico@users.noreply.github.com>
Date: Tue, 14 Nov 2023 21:14:48 +0100
Subject: [PATCH] Cleaned up the implementation of minimax and alphabeta. Added
 one more test. Trigger CI

---
 src/schnapsen/bots/alphabeta.py          |  39 ++++----
 src/schnapsen/bots/minimax.py            |  16 ++--
 src/schnapsen/game.py                    |  52 +++++++++++
 tests/bots/test_minimax_alphabeta_bot.py | 112 ++++++++++-------------
 4 files changed, 130 insertions(+), 89 deletions(-)

diff --git a/src/schnapsen/bots/alphabeta.py b/src/schnapsen/bots/alphabeta.py
index 29043ca..11d3d7a 100644
--- a/src/schnapsen/bots/alphabeta.py
+++ b/src/schnapsen/bots/alphabeta.py
@@ -11,7 +11,7 @@
     GamePlayEngine,
     SchnapsenTrickScorer,
 )
-from .minimax import OneFixedMoveBot
+
 
 class AlphaBetaBot(Bot):
     """
@@ -35,16 +35,13 @@ def __init__(self) -> None:
         super().__init__()
 
     def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
-        assert (
-            perspective.get_phase() == GamePhase.TWO
-        ), "AlphaBetaBot can only work in the second phase of the game."
+        assert (perspective.get_phase() == GamePhase.TWO), "AlphaBetaBot can only work in the second phase of the game."
         _, move = self.value(
             perspective.get_state_in_phase_two(),
             perspective.get_engine(),
             leader_move=leader_move,
             maximizing=True,
         )
-        assert move
         return move
 
     def value(
@@ -55,7 +52,7 @@ def value(
         maximizing: bool,
         alpha: float = float("-inf"),
         beta: float = float("inf"),
-    ) -> tuple[float, Optional[Move]]:
+    ) -> tuple[float, Move]:
         my_perspective: PlayerPerspective
         if leader_move is None:
             # we are the leader
@@ -70,7 +67,7 @@ def value(
             leader: Bot
             follower: Bot
             if leader_move is None:
-                # we are leader,
+                # we are leader, call self to get the follower to play
                 value, _ = self.value(
                     state=state,
                     engine=engine,
@@ -80,20 +77,16 @@ def value(
                     beta=beta,
                 )
             else:
-                # We are the follower.
+                # We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader
                 leader = OneFixedMoveBot(leader_move)
                 follower = OneFixedMoveBot(move)
-                new_game_state, rounds = engine.play_at_most_n_tricks(
-                    game_state=state, new_leader=leader, new_follower=follower, n=1
-                )
-                assert rounds == 1
+                new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower)
                 winning_info = SchnapsenTrickScorer().declare_winner(new_game_state)
                 if winning_info:
                     winner = winning_info[0].implementation
                     points = winning_info[1]
                     follower_wins = winner == follower
-                    if not follower_wins:
-                        assert winner == leader
+
                     if not follower_wins:
                         points = -points
                     if not maximizing:
@@ -104,12 +97,13 @@ def value(
                     leader_stayed = leader == new_game_state.leader.implementation
 
                     if leader_stayed:
+                        # At the next step, the leader is our opponent, and it will be doing the opposite of what we do.
                         next_maximizing = not maximizing
                     else:  # if not leader_stayed
+                        # At the next step we will have become the leader, so we will keep doing what we did
                         next_maximizing = maximizing
-                    value, _ = self.value(
-                        new_game_state, engine, None, next_maximizing, alpha, beta
-                    )
+                    # implementation note: the previous two case could be written with a xor, but this seemed more readable
+                    value, _ = self.value(new_game_state, engine, None, next_maximizing, alpha, beta)
             if maximizing:
                 if value > best_value:
                     best_move = move
@@ -124,5 +118,16 @@ def value(
                 beta = min(beta, best_value)  # alphabeta pruning
                 if beta <= alpha:
                     break
+        assert best_move  # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake
         return best_value, best_move
 
+
+class OneFixedMoveBot(Bot):
+    def __init__(self, move: Move) -> None:
+        self.first_move: Optional[Move] = move
+
+    def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+        assert self.first_move, "This bot can only play one move, after that it ends"
+        move = self.first_move
+        self.first_move = None
+        return move
diff --git a/src/schnapsen/bots/minimax.py b/src/schnapsen/bots/minimax.py
index d2ece12..0574bd2 100644
--- a/src/schnapsen/bots/minimax.py
+++ b/src/schnapsen/bots/minimax.py
@@ -12,6 +12,7 @@
     SchnapsenTrickScorer,
 )
 
+
 class MiniMaxBot(Bot):
     """
     A bot playing the minimax strategy in the second phase of the game.
@@ -39,7 +40,6 @@ def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move])
             leader_move=leader_move,
             maximizing=True,
         )
-        assert move
         return move
 
     def value(
@@ -48,7 +48,7 @@ def value(
         engine: GamePlayEngine,
         leader_move: Optional[Move],
         maximizing: bool,
-    ) -> tuple[float, Optional[Move]]:
+    ) -> tuple[float, Move]:
         """Get the score and the corresponding move which eithers maxmizes or minimizes the objective.
 
         Args:
@@ -74,7 +74,7 @@ def value(
             leader: Bot
             follower: Bot
             if leader_move is None:
-                # call self to get the follower to play
+                # we are leader, call self to get the follower to play
                 value, _ = self.value(
                     state=state,
                     engine=engine,
@@ -85,17 +85,13 @@ def value(
                 # We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader
                 leader = OneFixedMoveBot(leader_move)
                 follower = OneFixedMoveBot(move)
-                new_game_state, rounds = engine.play_at_most_n_tricks(
-                    game_state=state, new_leader=leader, new_follower=follower, n=1
-                )
-                assert rounds == 1
+                new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower)
                 winning_info = SchnapsenTrickScorer().declare_winner(new_game_state)
                 if winning_info:
                     winner = winning_info[0].implementation
                     points = winning_info[1]
                     follower_wins = winner == follower
-                    if not follower_wins:
-                        assert winner == leader
+
                     if not follower_wins:
                         points = -points
                     if not maximizing:
@@ -119,6 +115,7 @@ def value(
             elif not maximizing and value < best_value:
                 best_move = move
                 best_value = value
+        assert best_move  # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake
         return best_value, best_move
 
 
@@ -131,4 +128,3 @@ def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move])
         move = self.first_move
         self.first_move = None
         return move
-
diff --git a/src/schnapsen/game.py b/src/schnapsen/game.py
index b31c8be..bc8418a 100644
--- a/src/schnapsen/game.py
+++ b/src/schnapsen/game.py
@@ -1525,6 +1525,40 @@ def play_game(self, bot1: Bot, bot2: Bot, rng: Random) -> tuple[Bot, int, Score]
         winner, points, score = self.play_game_from_state(game_state=game_state, leader_move=None)
         return winner, points, score
 
+    def get_random_phase_two_state(self, rng: Random) -> GameState:
+        class RandBot(Bot):
+            def __init__(self, rand: Random, name: Optional[str] = None) -> None:
+                super().__init__(name)
+                self.rng = rand
+
+            def get_move(
+                self,
+                perspective: PlayerPerspective,
+                leader_move: Optional[Move],
+            ) -> Move:
+                moves: list[Move] = perspective.valid_moves()
+                move = self.rng.choice(moves)
+                return move
+
+        while True:
+            cards = self.deck_generator.get_initial_deck()
+            shuffled = self.deck_generator.shuffle_deck(cards, rng)
+            hand1, hand2, talon = self.hand_generator.generateHands(shuffled)
+            leader_state = BotState(implementation=RandBot(rand=rng), hand=hand1)
+            follower_state = BotState(implementation=RandBot(rand=rng), hand=hand2)
+            game_state = GameState(
+                leader=leader_state,
+                follower=follower_state,
+                talon=talon,
+                previous=None
+            )
+            second_phase_state, _ = self.play_at_most_n_tricks(game_state, RandBot(rand=rng), RandBot(rand=rng), 5)
+            winner = self.trick_scorer.declare_winner(second_phase_state)
+            if winner:
+                continue
+            if second_phase_state.game_phase() == GamePhase.TWO:
+                return second_phase_state
+
     def play_game_from_state_with_new_bots(self, game_state: GameState, new_leader: Bot, new_follower: Bot, leader_move: Optional[Move]) -> tuple[Bot, int, Score]:
         """
         Continue a game  which might have started before with other bots, with new bots.
@@ -1571,6 +1605,23 @@ def play_game_from_state(self, game_state: GameState, leader_move: Optional[Move
 
         return winner.implementation, points, winner.score
 
+    def play_one_trick(self, game_state: GameState, new_leader: Bot, new_follower: Bot) -> GameState:
+        """
+        Plays one tricks (including the one started by the leader, if provided) on a game which might have started before.
+        The new bots are new_leader and new_follower.
+
+        This method does not make changes to the provided game_state.
+
+        :param game_state: The state of the game to start from
+        :param new_leader: The bot which will take the leader role in the game.
+        :param new_follower: The bot which will take the follower in the game.
+
+        :returns: The GameState reached and the number of steps actually taken.
+        """
+        state, rounds = self.play_at_most_n_tricks(game_state, new_leader, new_follower, 1)
+        assert rounds == 1
+        return state
+
     def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_follower: Bot, n: int) -> tuple[GameState, int]:
         """
         Plays up to n tricks (including the one started by the leader, if provided) on a game which might have started before.
@@ -1582,6 +1633,7 @@ def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_foll
         :param game_state: The state of the game to start from
         :param new_leader: The bot which will take the leader role in the game.
         :param new_follower: The bot which will take the follower in the game.
+        :param n: the maximum number of tricks to play
 
         :returns: The GameState reached and the number of steps actually taken.
         """
diff --git a/tests/bots/test_minimax_alphabeta_bot.py b/tests/bots/test_minimax_alphabeta_bot.py
index 2094308..1e84a19 100644
--- a/tests/bots/test_minimax_alphabeta_bot.py
+++ b/tests/bots/test_minimax_alphabeta_bot.py
@@ -20,76 +20,53 @@
 from schnapsen.deck import Card, Suit
 
 
-class RandMiniMaxBot(Bot):
+class TwoStageBot(Bot):
+    """Bot which plays first the one, than the other startegy"""
+
+    def __init__(self, name: str, bot1: Bot, bot2: Bot) -> None:
+        super().__init__(name)
+        self.bot_phase1: Bot = bot1
+        self.bot_phase2: Bot = bot2
+
+    def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move:
+        if perspective.get_phase() == GamePhase.ONE:
+            return self.bot_phase1.get_move(perspective, leader_move)
+        elif perspective.get_phase() == GamePhase.TWO:
+            return self.bot_phase2.get_move(perspective, leader_move)
+        else:
+            raise AssertionError("Phase ain't right.")
+
+
+class RandMiniMaxBot(TwoStageBot):
     """In the phase1, this bot plays random, and in the phase2, it plays minimax.
     The opponent is random."""
 
     def __init__(self, rand: random.Random, name: str = "rand_minimax_bot") -> None:
-        super().__init__(name)
-        self.bot_phase1 = RandBot(rand=rand)
-        self.bot_phase2 = MiniMaxBot()
-
-    def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move:
-        if state.get_phase() == GamePhase.ONE:
-            return self.bot_phase1.get_move(state, leader_move)
-        elif state.get_phase() == GamePhase.TWO:
-            return self.bot_phase2.get_move(state, leader_move)
-        else:
-            raise ValueError("Phase ain't right.")
+        super().__init__(name, RandBot(rand=rand), MiniMaxBot())
 
 
-class RdeepMiniMaxBot(Bot):
+class RdeepMiniMaxBot(TwoStageBot):
     """In the phase1, this bot plays rdeep, and in the phase2, it plays minimax.
     The opponent is random."""
 
     def __init__(self, rand: random.Random, name: str = "rdeep_minimax_bot") -> None:
-        super().__init__(name)
-        self.bot_phase1 = RdeepBot(num_samples=16, depth=4, rand=rand)
-        self.bot_phase2 = MiniMaxBot()
-
-    def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move:
-        if state.get_phase() == GamePhase.ONE:
-            return self.bot_phase1.get_move(state, leader_move)
-        elif state.get_phase() == GamePhase.TWO:
-            return self.bot_phase2.get_move(state, leader_move)
-        else:
-            raise ValueError("Phase ain't right.")
+        super().__init__(name, RdeepBot(num_samples=16, depth=4, rand=rand), MiniMaxBot())
 
 
-class RandAlphaBetaBot(Bot):
+class RandAlphaBetaBot(TwoStageBot):
     """In the phase1, this bot plays random, and in the phase2, it plays AlphaBeta.
     The opponent is random."""
 
     def __init__(self, rand: random.Random, name: str = "rand_alphabeta_bot") -> None:
-        super().__init__(name)
-        self.bot_phase1 = RandBot(rand=rand)
-        self.bot_phase2 = AlphaBetaBot()
-
-    def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move:
-        if state.get_phase() == GamePhase.ONE:
-            return self.bot_phase1.get_move(state, leader_move)
-        elif state.get_phase() == GamePhase.TWO:
-            return self.bot_phase2.get_move(state, leader_move)
-        else:
-            raise ValueError("Phase ain't right.")
+        super().__init__(name, RandBot(rand=rand), AlphaBetaBot())
 
 
-class RdeepAlphaBetaBot(Bot):
+class RdeepAlphaBetaBot(TwoStageBot):
     """In the phase1, this bot plays rdeep, and in the phase2, it plays alphabeta.
     The opponent is random."""
 
     def __init__(self, rand: random.Random, name: str = "rdeep_alphabeta_bot") -> None:
-        super().__init__(name)
-        self.bot_phase1 = RdeepBot(num_samples=16, depth=4, rand=rand)
-        self.bot_phase2 = AlphaBetaBot()
-
-    def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move:
-        if state.get_phase() == GamePhase.ONE:
-            return self.bot_phase1.get_move(state, leader_move)
-        elif state.get_phase() == GamePhase.TWO:
-            return self.bot_phase2.get_move(state, leader_move)
-        else:
-            raise ValueError("Phase ain't right.")
+        super().__init__(name, RdeepBot(num_samples=16, depth=4, rand=rand), AlphaBetaBot())
 
 
 class MiniMaxBotTest(TestCase):
@@ -106,7 +83,7 @@ def test_run_1(self) -> None:
         winners = {str(self.bot1): 0, str(self.bot3): 0}
         num_games = 50
         for i in range(num_games):
-            winner, points, score = self.engine.play_game(
+            winner, _, _ = self.engine.play_game(
                 self.bot1, self.bot3, random.Random(i)
             )
 
@@ -115,22 +92,10 @@ def test_run_1(self) -> None:
         self.assertTrue(winners[str(self.bot1)] > num_games // 2)
 
     def test_run_2(self) -> None:
-        winners = {str(self.bot2): 0, str(self.bot3): 0}
-        num_games = 50
-        for i in range(num_games):
-            winner, points, score = self.engine.play_game(
-                self.bot2, self.bot3, random.Random(i)
-            )
-
-            winners[str(winner)] += 1
-
-        self.assertTrue(winners[str(self.bot2)] > num_games // 2)
-
-    def test_run_3(self) -> None:
         winners = {str(self.bot1): 0, str(self.bot2): 0}
         num_games = 50
         for i in range(num_games):
-            winner, points, score = self.engine.play_game(
+            winner, _, _ = self.engine.play_game(
                 self.bot1, self.bot2, random.Random(i)
             )
 
@@ -138,6 +103,29 @@ def test_run_3(self) -> None:
 
         self.assertTrue(winners[str(self.bot2)] > num_games // 2)
 
+    def test_second_phase(self) -> None:
+        num_games = 10
+        engine = SchnapsenGamePlayEngine()
+
+        for i in range(num_games):
+            state = engine.get_random_phase_two_state(random.Random(i))
+            # We play two games from this state, one minimaxA  vs minimaxB.
+            # We let the winning side play against many games against rand. This winning side must never lose
+
+            minimaxA = MiniMaxBot()
+            minimaxB = MiniMaxBot()
+            outcome = engine.play_game_from_state_with_new_bots(state, new_leader=minimaxA, new_follower=minimaxB, leader_move=None)
+
+            for j in range(10):
+                randbot = RandBot(random.Random(j))
+                if outcome[0] == minimaxA:
+                    outcome2 = engine.play_game_from_state_with_new_bots(state, new_leader=minimaxA, new_follower=randbot, leader_move=None)
+                    assert outcome2[0] == minimaxA
+                else:
+                    # minimaxB won
+                    outcome2 = engine.play_game_from_state_with_new_bots(state, new_leader=randbot, new_follower=minimaxB, leader_move=None)
+                    assert outcome2[0] == minimaxB
+
 
 class MiniMaxBotAlphaBetaPhaseTwoEasy(TestCase):
     def setUp(self) -> None: