diff --git a/examples/ray_example.py b/examples/ray_example.py deleted file mode 100644 index 65af950..0000000 --- a/examples/ray_example.py +++ /dev/null @@ -1,36 +0,0 @@ -from generals.agents import RandomAgent, ExpanderAgent -from ray.rllib.algorithms.ppo import PPOConfig -import gymnasium as gym -from ray import tune -from pprint import pprint - -######################################################################################### -# Currently, it seems like RLLIB uses only gymnasium 0.x, but we support gymnasium 1.0+.# -# Therefore this example may not always work. # -######################################################################################### - - -def env_creator(env_config): - agent = RandomAgent() # Initialize your custom agent - npc = ExpanderAgent() # Initialize an NPC agent - env = gym.make("gym-generals-rllib-v0", agent=agent, npc=npc) # Create the environment - return env - -tune.register_env("generals_env", env_creator) - -config = ( - PPOConfig() - .api_stack( - enable_rl_module_and_learner=True, - enable_env_runner_and_connector_v2=True, - ) - .environment("generals_env") # Use the generals environment - .env_runners(num_env_runners=1) -) - -algo = config.build() - -for i in range(10): - result = algo.train() - result.pop("config") - pprint(result) diff --git a/examples/record_replay_example.py b/examples/record_replay_example.py index 8b0bcde..a3485c7 100644 --- a/examples/record_replay_example.py +++ b/examples/record_replay_example.py @@ -1,6 +1,4 @@ -import gymnasium as gym - -from generals import GridFactory +from generals import GridFactory, PettingZooGenerals from generals.agents import RandomAgent, ExpanderAgent # Initialize agents @@ -17,11 +15,15 @@ # seed=38, # Seed to generate the same map every time ) -env = gym.make( - "gym-generals-v0", # Environment name - grid_factory=grid_factory, # Grid factory - agent=agent, - npc=npc, # NPC that will play against the agent +agents ={ + npc.id: npc, + agent.id: agent +} + +env = PettingZooGenerals( + agents=[npc.id, agent.id], + grid_factory=grid_factory, + render_mode=None ) # Options are used only for the next game @@ -29,10 +31,12 @@ "replay_file": "my_replay", # Save replay as my_replay.pkl } -observation, info = env.reset(options=options) - +observations, info = env.reset(options=options) terminated = truncated = False while not (terminated or truncated): - action = agent.act(observation) - observation, reward, terminated, truncated, info = env.step(action) - env.render() + actions = {} + for agent in env.agents: + # Ask agent for action + actions[agent] = agents[agent].act(observations[agent]) + # All agents perform their actions + observations, rewards, terminated, truncated, info = env.step(actions) diff --git a/generals/envs/gymnasium_generals.py b/generals/envs/gymnasium_generals.py index ae46ee0..7f93a32 100644 --- a/generals/envs/gymnasium_generals.py +++ b/generals/envs/gymnasium_generals.py @@ -67,7 +67,8 @@ def __init__( # Initialize game state self.prior_observations: dict[str, Observation] | None = None - self.game = self._create_new_game() + grid = self.grid_factory.generate() + self.game = Game(grid, self.agents) # Set up spaces self.observation_space = self._create_observation_space() @@ -78,11 +79,6 @@ def _setup_agent_data(self) -> dict[str, dict[str, Any]]: colors = [(255, 107, 108), (0, 130, 255)] return {id: {"color": color} for id, color in zip(self.agents, colors)} - def _create_new_game(self) -> Game: - """Create a new game instance.""" - grid = self.grid_factory.generate() - return Game(grid, self.agents) - def _create_observation_space(self) -> spaces.Space: """Create the observation space based on grid dimensions.""" dim = self.pad_observations_to