Implement ExpanderAgent

Agent which prioritizes expanding moves, that is capturing opponent or neutral tiles. Signed-off-by: Jonas Dujava <[email protected]> Co-authored-by: Matej Straka <[email protected]>
strakam · Sep 23, 2024 · 553b243 · 553b243
1 parent 14f6f98
commit 553b243
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 5 deletions.
diff --git a/examples/pettingzoo_example.py b/examples/pettingzoo_example.py
@@ -1,18 +1,18 @@
 from generals.env import pz_generals
-from generals.agents import RandomAgent
+from generals.agents import ExpanderAgent, RandomAgent
 from generals.config import GameConfig
 
 # Initialize agents - their names are then called for actions
 agents = {
-    "Red": RandomAgent("Red"),
-    "Blue": RandomAgent("Blue")
+    "Random": RandomAgent("Random"),
+    "Expander": ExpanderAgent("Expander")
 }
 
 game_config = GameConfig(
     grid_size=16,
     mountain_density=0.2,
     city_density=0.05,
-    general_positions=[(2, 12), (8, 9)],
+    general_positions=[(4, 12), (12, 4)],
     agent_names=list(agents.keys()),
 )
 
@@ -21,7 +21,7 @@
 observations, info = env.reset(options={"replay_file": "test"})
 
 # How fast we want rendering to be
-actions_per_second = 2
+actions_per_second = 6
 
 while not env.game.is_done():
     actions = {}

diff --git a/generals/agents.py b/generals/agents.py
@@ -1,5 +1,7 @@
 import numpy as np
 
+from generals.config import DIRECTIONS
+
 class Agent:
     """
     Base class for all agents.
@@ -32,3 +34,57 @@ def play(self, observation):
         # append 1 or 0 randomly to the action (to say whether to send half of troops or all troops)
         action = np.append(valid_actions[action_index], np.random.choice([0, 1]))
         return action
+
+class ExpanderAgent(Agent):
+    def __init__(self, name):
+        super().__init__(name)
+
+    def play(self, observation):
+        """
+        Heuristically selects a valid (expanding) action.
+        Prioritizes capturing opponent and then neutral cells.
+        """
+        mask = observation["action_mask"]
+        army = observation["army"]
+
+        valid_actions = np.argwhere(mask == 1)
+        actions_with_more_than_1_army = (
+            army[valid_actions[:, 0], valid_actions[:, 1]] > 1
+        )
+        if np.sum(actions_with_more_than_1_army) == 0:
+            return [-1, -1, 0, 0]  # IDLE move
+
+        valid_actions = valid_actions[actions_with_more_than_1_army]
+
+        opponent = observation["ownership_opponent"]
+        neutral = observation["ownership_neutral"]
+
+        # find actions that capture opponent or neutral cells
+        actions_to_opponent = np.zeros(len(valid_actions))
+        actions_to_neutral = np.zeros(len(valid_actions))
+        for i, action in enumerate(valid_actions):
+            destination = action[:-1] + DIRECTIONS[action[-1]]
+            if army[action[0], action[1]] <= army[destination[0], destination[1]] + 1:
+                continue
+            elif opponent[destination[0], destination[1]]:
+                actions_to_opponent[i] = 1
+            if neutral[destination[0], destination[1]]:
+                actions_to_neutral[i] = 1
+
+        actions_to_neutral_indices = np.argwhere(actions_to_neutral == 1).flatten()
+        actions_to_opponent_indices = np.argwhere(actions_to_opponent == 1).flatten()
+        if len(actions_to_opponent_indices) > 0:
+            # pick random action that captures an opponent cell
+            action_index = np.random.choice(len(actions_to_opponent_indices))
+            action = valid_actions[actions_to_opponent_indices[action_index]]
+        elif len(actions_to_neutral_indices) > 0:
+            # or pick random action that captures a neutral cell
+            action_index = np.random.choice(len(actions_to_neutral_indices))
+            action = valid_actions[actions_to_neutral_indices[action_index]]
+        else:  # otherwise pick a random action
+            action_index = np.random.choice(len(valid_actions))
+            action = valid_actions[action_index]
+
+        # append 0 to the action (to send all available troops)
+        action = np.append(action, 0)
+        return action