From d525cd05831c6f5ca2edbddeaf2b9f00d17d3c50 Mon Sep 17 00:00:00 2001 From: Michael Cochez Date: Tue, 14 Nov 2023 21:14:48 +0100 Subject: [PATCH] Cleaned up the implementation of minimax and alphabeta. Added one more test. Trigger CI --- src/schnapsen/bots/alphabeta.py | 39 ++++---- src/schnapsen/bots/minimax.py | 16 ++-- src/schnapsen/game.py | 52 +++++++++++ tests/bots/test_minimax_alphabeta_bot.py | 112 ++++++++++------------- 4 files changed, 130 insertions(+), 89 deletions(-) diff --git a/src/schnapsen/bots/alphabeta.py b/src/schnapsen/bots/alphabeta.py index 29043ca..11d3d7a 100644 --- a/src/schnapsen/bots/alphabeta.py +++ b/src/schnapsen/bots/alphabeta.py @@ -11,7 +11,7 @@ GamePlayEngine, SchnapsenTrickScorer, ) -from .minimax import OneFixedMoveBot + class AlphaBetaBot(Bot): """ @@ -35,16 +35,13 @@ def __init__(self) -> None: super().__init__() def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move: - assert ( - perspective.get_phase() == GamePhase.TWO - ), "AlphaBetaBot can only work in the second phase of the game." + assert (perspective.get_phase() == GamePhase.TWO), "AlphaBetaBot can only work in the second phase of the game." _, move = self.value( perspective.get_state_in_phase_two(), perspective.get_engine(), leader_move=leader_move, maximizing=True, ) - assert move return move def value( @@ -55,7 +52,7 @@ def value( maximizing: bool, alpha: float = float("-inf"), beta: float = float("inf"), - ) -> tuple[float, Optional[Move]]: + ) -> tuple[float, Move]: my_perspective: PlayerPerspective if leader_move is None: # we are the leader @@ -70,7 +67,7 @@ def value( leader: Bot follower: Bot if leader_move is None: - # we are leader, + # we are leader, call self to get the follower to play value, _ = self.value( state=state, engine=engine, @@ -80,20 +77,16 @@ def value( beta=beta, ) else: - # We are the follower. + # We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader leader = OneFixedMoveBot(leader_move) follower = OneFixedMoveBot(move) - new_game_state, rounds = engine.play_at_most_n_tricks( - game_state=state, new_leader=leader, new_follower=follower, n=1 - ) - assert rounds == 1 + new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower) winning_info = SchnapsenTrickScorer().declare_winner(new_game_state) if winning_info: winner = winning_info[0].implementation points = winning_info[1] follower_wins = winner == follower - if not follower_wins: - assert winner == leader + if not follower_wins: points = -points if not maximizing: @@ -104,12 +97,13 @@ def value( leader_stayed = leader == new_game_state.leader.implementation if leader_stayed: + # At the next step, the leader is our opponent, and it will be doing the opposite of what we do. next_maximizing = not maximizing else: # if not leader_stayed + # At the next step we will have become the leader, so we will keep doing what we did next_maximizing = maximizing - value, _ = self.value( - new_game_state, engine, None, next_maximizing, alpha, beta - ) + # implementation note: the previous two case could be written with a xor, but this seemed more readable + value, _ = self.value(new_game_state, engine, None, next_maximizing, alpha, beta) if maximizing: if value > best_value: best_move = move @@ -124,5 +118,16 @@ def value( beta = min(beta, best_value) # alphabeta pruning if beta <= alpha: break + assert best_move # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake return best_value, best_move + +class OneFixedMoveBot(Bot): + def __init__(self, move: Move) -> None: + self.first_move: Optional[Move] = move + + def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move: + assert self.first_move, "This bot can only play one move, after that it ends" + move = self.first_move + self.first_move = None + return move diff --git a/src/schnapsen/bots/minimax.py b/src/schnapsen/bots/minimax.py index d2ece12..0574bd2 100644 --- a/src/schnapsen/bots/minimax.py +++ b/src/schnapsen/bots/minimax.py @@ -12,6 +12,7 @@ SchnapsenTrickScorer, ) + class MiniMaxBot(Bot): """ A bot playing the minimax strategy in the second phase of the game. @@ -39,7 +40,6 @@ def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) leader_move=leader_move, maximizing=True, ) - assert move return move def value( @@ -48,7 +48,7 @@ def value( engine: GamePlayEngine, leader_move: Optional[Move], maximizing: bool, - ) -> tuple[float, Optional[Move]]: + ) -> tuple[float, Move]: """Get the score and the corresponding move which eithers maxmizes or minimizes the objective. Args: @@ -74,7 +74,7 @@ def value( leader: Bot follower: Bot if leader_move is None: - # call self to get the follower to play + # we are leader, call self to get the follower to play value, _ = self.value( state=state, engine=engine, @@ -85,17 +85,13 @@ def value( # We are the follower. We need to complete the trick and then call self to play the next trick, with the correct maximizing, depending on who is the new leader leader = OneFixedMoveBot(leader_move) follower = OneFixedMoveBot(move) - new_game_state, rounds = engine.play_at_most_n_tricks( - game_state=state, new_leader=leader, new_follower=follower, n=1 - ) - assert rounds == 1 + new_game_state = engine.play_one_trick(game_state=state, new_leader=leader, new_follower=follower) winning_info = SchnapsenTrickScorer().declare_winner(new_game_state) if winning_info: winner = winning_info[0].implementation points = winning_info[1] follower_wins = winner == follower - if not follower_wins: - assert winner == leader + if not follower_wins: points = -points if not maximizing: @@ -119,6 +115,7 @@ def value( elif not maximizing and value < best_value: best_move = move best_value = value + assert best_move # We are sure the best_move can no longer be None. We assert to make sure we did not make a logical mistake return best_value, best_move @@ -131,4 +128,3 @@ def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) move = self.first_move self.first_move = None return move - diff --git a/src/schnapsen/game.py b/src/schnapsen/game.py index b31c8be..bc8418a 100644 --- a/src/schnapsen/game.py +++ b/src/schnapsen/game.py @@ -1525,6 +1525,40 @@ def play_game(self, bot1: Bot, bot2: Bot, rng: Random) -> tuple[Bot, int, Score] winner, points, score = self.play_game_from_state(game_state=game_state, leader_move=None) return winner, points, score + def get_random_phase_two_state(self, rng: Random) -> GameState: + class RandBot(Bot): + def __init__(self, rand: Random, name: Optional[str] = None) -> None: + super().__init__(name) + self.rng = rand + + def get_move( + self, + perspective: PlayerPerspective, + leader_move: Optional[Move], + ) -> Move: + moves: list[Move] = perspective.valid_moves() + move = self.rng.choice(moves) + return move + + while True: + cards = self.deck_generator.get_initial_deck() + shuffled = self.deck_generator.shuffle_deck(cards, rng) + hand1, hand2, talon = self.hand_generator.generateHands(shuffled) + leader_state = BotState(implementation=RandBot(rand=rng), hand=hand1) + follower_state = BotState(implementation=RandBot(rand=rng), hand=hand2) + game_state = GameState( + leader=leader_state, + follower=follower_state, + talon=talon, + previous=None + ) + second_phase_state, _ = self.play_at_most_n_tricks(game_state, RandBot(rand=rng), RandBot(rand=rng), 5) + winner = self.trick_scorer.declare_winner(second_phase_state) + if winner: + continue + if second_phase_state.game_phase() == GamePhase.TWO: + return second_phase_state + def play_game_from_state_with_new_bots(self, game_state: GameState, new_leader: Bot, new_follower: Bot, leader_move: Optional[Move]) -> tuple[Bot, int, Score]: """ Continue a game which might have started before with other bots, with new bots. @@ -1571,6 +1605,23 @@ def play_game_from_state(self, game_state: GameState, leader_move: Optional[Move return winner.implementation, points, winner.score + def play_one_trick(self, game_state: GameState, new_leader: Bot, new_follower: Bot) -> GameState: + """ + Plays one tricks (including the one started by the leader, if provided) on a game which might have started before. + The new bots are new_leader and new_follower. + + This method does not make changes to the provided game_state. + + :param game_state: The state of the game to start from + :param new_leader: The bot which will take the leader role in the game. + :param new_follower: The bot which will take the follower in the game. + + :returns: The GameState reached and the number of steps actually taken. + """ + state, rounds = self.play_at_most_n_tricks(game_state, new_leader, new_follower, 1) + assert rounds == 1 + return state + def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_follower: Bot, n: int) -> tuple[GameState, int]: """ Plays up to n tricks (including the one started by the leader, if provided) on a game which might have started before. @@ -1582,6 +1633,7 @@ def play_at_most_n_tricks(self, game_state: GameState, new_leader: Bot, new_foll :param game_state: The state of the game to start from :param new_leader: The bot which will take the leader role in the game. :param new_follower: The bot which will take the follower in the game. + :param n: the maximum number of tricks to play :returns: The GameState reached and the number of steps actually taken. """ diff --git a/tests/bots/test_minimax_alphabeta_bot.py b/tests/bots/test_minimax_alphabeta_bot.py index 2094308..1e84a19 100644 --- a/tests/bots/test_minimax_alphabeta_bot.py +++ b/tests/bots/test_minimax_alphabeta_bot.py @@ -20,76 +20,53 @@ from schnapsen.deck import Card, Suit -class RandMiniMaxBot(Bot): +class TwoStageBot(Bot): + """Bot which plays first the one, than the other startegy""" + + def __init__(self, name: str, bot1: Bot, bot2: Bot) -> None: + super().__init__(name) + self.bot_phase1: Bot = bot1 + self.bot_phase2: Bot = bot2 + + def get_move(self, perspective: PlayerPerspective, leader_move: Optional[Move]) -> Move: + if perspective.get_phase() == GamePhase.ONE: + return self.bot_phase1.get_move(perspective, leader_move) + elif perspective.get_phase() == GamePhase.TWO: + return self.bot_phase2.get_move(perspective, leader_move) + else: + raise AssertionError("Phase ain't right.") + + +class RandMiniMaxBot(TwoStageBot): """In the phase1, this bot plays random, and in the phase2, it plays minimax. The opponent is random.""" def __init__(self, rand: random.Random, name: str = "rand_minimax_bot") -> None: - super().__init__(name) - self.bot_phase1 = RandBot(rand=rand) - self.bot_phase2 = MiniMaxBot() - - def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move: - if state.get_phase() == GamePhase.ONE: - return self.bot_phase1.get_move(state, leader_move) - elif state.get_phase() == GamePhase.TWO: - return self.bot_phase2.get_move(state, leader_move) - else: - raise ValueError("Phase ain't right.") + super().__init__(name, RandBot(rand=rand), MiniMaxBot()) -class RdeepMiniMaxBot(Bot): +class RdeepMiniMaxBot(TwoStageBot): """In the phase1, this bot plays rdeep, and in the phase2, it plays minimax. The opponent is random.""" def __init__(self, rand: random.Random, name: str = "rdeep_minimax_bot") -> None: - super().__init__(name) - self.bot_phase1 = RdeepBot(num_samples=16, depth=4, rand=rand) - self.bot_phase2 = MiniMaxBot() - - def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move: - if state.get_phase() == GamePhase.ONE: - return self.bot_phase1.get_move(state, leader_move) - elif state.get_phase() == GamePhase.TWO: - return self.bot_phase2.get_move(state, leader_move) - else: - raise ValueError("Phase ain't right.") + super().__init__(name, RdeepBot(num_samples=16, depth=4, rand=rand), MiniMaxBot()) -class RandAlphaBetaBot(Bot): +class RandAlphaBetaBot(TwoStageBot): """In the phase1, this bot plays random, and in the phase2, it plays AlphaBeta. The opponent is random.""" def __init__(self, rand: random.Random, name: str = "rand_alphabeta_bot") -> None: - super().__init__(name) - self.bot_phase1 = RandBot(rand=rand) - self.bot_phase2 = AlphaBetaBot() - - def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move: - if state.get_phase() == GamePhase.ONE: - return self.bot_phase1.get_move(state, leader_move) - elif state.get_phase() == GamePhase.TWO: - return self.bot_phase2.get_move(state, leader_move) - else: - raise ValueError("Phase ain't right.") + super().__init__(name, RandBot(rand=rand), AlphaBetaBot()) -class RdeepAlphaBetaBot(Bot): +class RdeepAlphaBetaBot(TwoStageBot): """In the phase1, this bot plays rdeep, and in the phase2, it plays alphabeta. The opponent is random.""" def __init__(self, rand: random.Random, name: str = "rdeep_alphabeta_bot") -> None: - super().__init__(name) - self.bot_phase1 = RdeepBot(num_samples=16, depth=4, rand=rand) - self.bot_phase2 = AlphaBetaBot() - - def get_move(self, state: PlayerPerspective, leader_move: Optional[Move]) -> Move: - if state.get_phase() == GamePhase.ONE: - return self.bot_phase1.get_move(state, leader_move) - elif state.get_phase() == GamePhase.TWO: - return self.bot_phase2.get_move(state, leader_move) - else: - raise ValueError("Phase ain't right.") + super().__init__(name, RdeepBot(num_samples=16, depth=4, rand=rand), AlphaBetaBot()) class MiniMaxBotTest(TestCase): @@ -106,7 +83,7 @@ def test_run_1(self) -> None: winners = {str(self.bot1): 0, str(self.bot3): 0} num_games = 50 for i in range(num_games): - winner, points, score = self.engine.play_game( + winner, _, _ = self.engine.play_game( self.bot1, self.bot3, random.Random(i) ) @@ -115,22 +92,10 @@ def test_run_1(self) -> None: self.assertTrue(winners[str(self.bot1)] > num_games // 2) def test_run_2(self) -> None: - winners = {str(self.bot2): 0, str(self.bot3): 0} - num_games = 50 - for i in range(num_games): - winner, points, score = self.engine.play_game( - self.bot2, self.bot3, random.Random(i) - ) - - winners[str(winner)] += 1 - - self.assertTrue(winners[str(self.bot2)] > num_games // 2) - - def test_run_3(self) -> None: winners = {str(self.bot1): 0, str(self.bot2): 0} num_games = 50 for i in range(num_games): - winner, points, score = self.engine.play_game( + winner, _, _ = self.engine.play_game( self.bot1, self.bot2, random.Random(i) ) @@ -138,6 +103,29 @@ def test_run_3(self) -> None: self.assertTrue(winners[str(self.bot2)] > num_games // 2) + def test_second_phase(self) -> None: + num_games = 10 + engine = SchnapsenGamePlayEngine() + + for i in range(num_games): + state = engine.get_random_phase_two_state(random.Random(i)) + # We play two games from this state, one minimaxA vs minimaxB. + # We let the winning side play against many games against rand. This winning side must never lose + + minimaxA = MiniMaxBot() + minimaxB = MiniMaxBot() + outcome = engine.play_game_from_state_with_new_bots(state, new_leader=minimaxA, new_follower=minimaxB, leader_move=None) + + for j in range(10): + randbot = RandBot(random.Random(j)) + if outcome[0] == minimaxA: + outcome2 = engine.play_game_from_state_with_new_bots(state, new_leader=minimaxA, new_follower=randbot, leader_move=None) + assert outcome2[0] == minimaxA + else: + # minimaxB won + outcome2 = engine.play_game_from_state_with_new_bots(state, new_leader=randbot, new_follower=minimaxB, leader_move=None) + assert outcome2[0] == minimaxB + class MiniMaxBotAlphaBetaPhaseTwoEasy(TestCase): def setUp(self) -> None: