diff --git a/DSSE/environment/coverage_env.py b/DSSE/environment/coverage_env.py index f1b37d8d..7ce8e599 100644 --- a/DSSE/environment/coverage_env.py +++ b/DSSE/environment/coverage_env.py @@ -1,3 +1,4 @@ +import datetime from gymnasium.spaces import Discrete from .env_base import DroneSwarmSearchBase from .simulation.particle_simulation import ParticleSimulation @@ -10,14 +11,13 @@ class CoverageDroneSwarmSearch(DroneSwarmSearchBase): metadata = { "name": "DroneSwarmSearchCPP", } - reward_scheme = Reward( - default=0, - leave_grid=-10, - exceed_timestep=-100, - drones_collision=-10, - search_cell=10, - search_and_find=100, - ) + reward_scheme = { + "default": -0.2, + "exceed_timestep": 0.0, + "search_cell": 1.0, + "done": 60, + "reward_poc": 45.0 + } def __init__( self, @@ -31,13 +31,24 @@ def __init__( drone_probability_of_detection=0.9, pre_render_time=10, prob_matrix_path=None, + particle_amount=50_000, + particle_radius=800, + num_particle_to_filter_as_noise=1, + start_time: datetime = None, + grid_cell_size=130, ) -> None: - - # Prob matrix + + if start_time is None: + start_time = datetime.datetime.now() + self.probability_matrix = ParticleSimulation( disaster_lat=disaster_position[0], disaster_long=disaster_position[1], + start_time=start_time, duration_hours=pre_render_time, + particle_amount=particle_amount, + particle_radius=particle_radius, + num_particle_to_filter_as_noise=num_particle_to_filter_as_noise ) if prob_matrix_path is not None: if not isinstance(prob_matrix_path, str): @@ -56,6 +67,7 @@ def __init__( drone_amount=drone_amount, drone_speed=drone_speed, probability_of_detection=drone_probability_of_detection, + grid_cell_size=grid_cell_size, ) self.disaster_position = disaster_position # Sets used to keep track of the seen and not seen states for reward calculation @@ -72,6 +84,8 @@ def reset(self, seed=None, options=None): self.reset_search_state() + self.reward_scheme["done"] = len(self.not_seen_states) / len(self.agents) + self.reward_scheme["reward_poc"] = len(self.not_seen_states) self.cumm_pos = 0 self.repeated_coverage = 0 infos = self.compute_infos(False) @@ -79,26 +93,30 @@ def reset(self, seed=None, options=None): def reset_search_state(self): # This is in (x, y) - self.seen_states = {pos for pos in self.agents_positions} + self.seen_states = set() + self.not_seen_states: set = self.all_states.copy() mat = self.probability_matrix.get_matrix() # (row, col) close_to_zero = np.argwhere(np.abs(mat) < 1e-10) - + # Remove the need to visit cells with POC near to 0 for y, x in close_to_zero: - self.seen_states.add((x, y)) + point = (x, y) + if point in self.not_seen_states: + self.not_seen_states.remove(point) - self.not_seen_states = self.all_states - self.seen_states def create_observations(self): observations = {} probability_matrix = self.probability_matrix.get_matrix() + prob_max = probability_matrix.max() + norm = probability_matrix / prob_max for idx, agent in enumerate(self.agents): observation = ( self.agents_positions[idx], - probability_matrix, + norm, ) observations[agent] = observation @@ -112,7 +130,7 @@ def step(self, actions: dict[str, int]) -> tuple: raise ValueError("Please reset the env before interacting with it") terminations = {a: False for a in self.agents} - rewards = {a: self.reward_scheme.default for a in self.agents} + rewards = {a: self.reward_scheme["default"] for a in self.agents} truncations = {a: False for a in self.agents} self.timestep += 1 @@ -127,25 +145,21 @@ def step(self, actions: dict[str, int]) -> tuple: if self.timestep >= self.timestep_limit: - rewards[agent] = self.reward_scheme.exceed_timestep + rewards[agent] = self.reward_scheme["exceed_timestep"] truncations[agent] = True continue - - # Action 8 is to stay in the same position, default reward. - if drone_action == 8: - continue drone_x, drone_y = self.agents_positions[idx] new_position = self.move_drone((drone_x, drone_y), drone_action) if not self.is_valid_position(new_position): - rewards[agent] = self.reward_scheme.leave_grid continue self.agents_positions[idx] = new_position new_x, new_y = new_position if new_position in self.not_seen_states: - reward_poc = (1 / (self.timestep)) * prob_matrix[new_y, new_x] * 1_000 - rewards[agent] = self.reward_scheme.search_cell + reward_poc + time_multiplier = (1 - self.timestep / self.timestep_limit) + reward_poc = time_multiplier * prob_matrix[new_y, new_x] * self.reward_scheme["reward_poc"] + rewards[agent] = self.reward_scheme["search_cell"] + reward_poc self.seen_states.add(new_position) self.not_seen_states.remove(new_position) # Probability of sucess (POS) = POC * POD @@ -153,6 +167,7 @@ def step(self, actions: dict[str, int]) -> tuple: # Remove the probability of the visited cell. prob_matrix[new_y, new_x] = 0.0 else: + # rewards[agent] = - self.repeated_coverage += 1 # Get dummy infos @@ -161,14 +176,15 @@ def step(self, actions: dict[str, int]) -> tuple: self.render() if is_completed: - # TODO: Proper define reward for completing the search (R_done) + # (R_done) + time_adjusted = (1 - self.timestep / self.timestep_limit) * self.reward_scheme["done"] + r_done = self.reward_scheme["done"] + time_adjusted rewards = { - drone: self.reward_scheme.search_and_find for drone in self.agents + drone: r_done for drone in self.agents } terminations = {drone: True for drone in self.agents} infos = self.compute_infos(is_completed) - self.compute_drone_collision(terminations, rewards) # Get observations observations = self.create_observations() # If terminted, reset the agents (pettingzoo parallel env requirement) @@ -177,14 +193,13 @@ def step(self, actions: dict[str, int]) -> tuple: return observations, rewards, terminations, truncations, infos def compute_infos(self, is_completed: bool) -> dict[str, dict]: - # TODO: Is this the best way to inform the coverage rate, Cum_pos and repetitions? total_states = len(self.seen_states) + len(self.not_seen_states) coverage_rate = len(self.seen_states) / total_states infos = { "is_completed": is_completed, "coverage_rate": coverage_rate, "repeated_coverage": self.repeated_coverage / total_states, - "acumulated_pos": self.cumm_pos, + "accumulated_pos": self.cumm_pos, } return {drone: infos for drone in self.agents} diff --git a/DSSE/environment/env.py b/DSSE/environment/env.py index abd22757..c0ab14f6 100644 --- a/DSSE/environment/env.py +++ b/DSSE/environment/env.py @@ -19,12 +19,12 @@ class DroneSwarmSearch(DroneSwarmSearchBase): } reward_scheme = Reward( - default=0.1, - leave_grid=-200, - exceed_timestep=-200, - drones_collision=-200, - search_cell=1, - search_and_find=200, + default=0.0, + leave_grid=0, + exceed_timestep=0, + drones_collision=0, + search_cell=0, + search_and_find=1, ) def __init__( @@ -41,8 +41,9 @@ def __init__( person_initial_position=(0, 0), drone_amount=1, drone_speed=10, - probability_of_detection=1, + probability_of_detection=1.0, pre_render_time=0, + grid_cell_size=130, ): if person_amount <= 0: raise ValueError("The number of persons must be greater than 0.") @@ -57,6 +58,7 @@ def __init__( drone_amount=drone_amount, drone_speed=drone_speed, probability_of_detection=probability_of_detection, + grid_cell_size=grid_cell_size, ) self.pre_render_steps = round( @@ -241,8 +243,6 @@ def step(self, actions): # Check truncation conditions (overwrites termination conditions) if self.timestep >= self.timestep_limit: rewards[agent] = self.reward_scheme.exceed_timestep - if self.rewards_sum[agent] > 0: - rewards[agent] += self.rewards_sum[agent] // 2 truncations[agent] = True terminations[agent] = True continue @@ -288,11 +288,6 @@ def step(self, actions): for agent in self.agents: terminations[agent] = True truncations[agent] = True - elif is_searching: - prob_matrix = self.probability_matrix.get_matrix() - rewards[agent] = ( - prob_matrix[drone_y][drone_x] - ) self.rewards_sum[agent] += rewards[agent] @@ -301,7 +296,8 @@ def step(self, actions): infos = {drone: {"Found": person_found} for drone in self.agents} # CHECK COLISION - Drone - self.compute_drone_collision(terminations, rewards) + # self.compute_drone_collision(terminations, rewards) + self.render_step(any(terminations.values()), person_found) diff --git a/DSSE/environment/env_base.py b/DSSE/environment/env_base.py index 1d2fb5da..e4780a83 100644 --- a/DSSE/environment/env_base.py +++ b/DSSE/environment/env_base.py @@ -4,7 +4,6 @@ from pettingzoo import ParallelEnv from .entities.drone import DroneData from .pygame_interface import PygameInterface -from .simulation.dynamic_probability import ProbabilityMatrix from .constants import Actions from gymnasium.spaces import MultiDiscrete, Discrete, Tuple, Box from copy import copy @@ -21,8 +20,9 @@ def __init__( drone_amount=1, drone_speed=10, probability_of_detection=1, + grid_cell_size=130, ) -> None: - self.cell_size = 130 # in meters + self.cell_size = grid_cell_size # in meters self.grid_size = grid_size self._was_reset = False if not isinstance(drone_amount, int): @@ -60,7 +60,7 @@ def __init__( # Initializing render self.pygame_renderer = PygameInterface( - self.grid_size, render_gradient, render_grid + self.grid_size, render_gradient, render_grid, self.metadata["name"] ) def calculate_simulation_time_step( @@ -163,21 +163,6 @@ def create_observations(self): def step(self, actions): raise NotImplementedError("Method not implemented") - def compute_drone_collision(self, terminations, rewards): - """ - Check for drone collision and compute terminations, rewards and truncations. - """ - for drone_1_id in range(len(self.agents)): - for drone_2_id in range(drone_1_id + 1, len(self.agents)): - drone_1_name = self.agents[drone_1_id] - drone_2_name = self.agents[drone_2_id] - if self.agents_positions[drone_1_id] == self.agents_positions[drone_2_id]: - terminations[drone_1_name] = True - terminations[drone_2_name] = True - rewards[drone_1_name] = self.reward_scheme.drones_collision - rewards[drone_2_name] = self.reward_scheme.drones_collision - - def move_drone(self, position, action): """ Returns a tuple with (is_terminal, new_position, reward) @@ -199,6 +184,8 @@ def move_drone(self, position, action): new_position = (position[0] - 1, position[1] + 1) case Actions.DOWN_RIGHT.value: # DOWN_RIGHT new_position = (position[0] + 1, position[1] + 1) + case _: + new_position = position return new_position @@ -217,7 +204,7 @@ def observation_space(self, agent): low=0, high=1, shape=(self.grid_size, self.grid_size), - dtype=np.float32, + dtype=np.float64, ), ) ) diff --git a/DSSE/environment/pygame_interface.py b/DSSE/environment/pygame_interface.py index 2b5e74ec..8cc15ed2 100644 --- a/DSSE/environment/pygame_interface.py +++ b/DSSE/environment/pygame_interface.py @@ -13,7 +13,7 @@ class PygameInterface: FPS = 5 def __init__( - self, grid_size: int, render_gradient: bool, render_grid: bool + self, grid_size: int, render_gradient: bool, render_grid: bool, env_name: str ) -> None: pygame.init() self.grid_size = grid_size @@ -23,6 +23,7 @@ def __init__( self.screen = None self.render_on = False self.probability_matrix = None + self.env_name = env_name self.block_size = self.window_size / self.grid_size self.drone_img = None @@ -126,7 +127,13 @@ def compute_cell_color(self, normalized_prob): elif normalized_prob >= 0.25: red = 255 green = 255 - return (red, green, 0) + + if self.env_name == "DroneSwarmSearchCPP": + blue = 255 if normalized_prob > 0 else 0 + else: + blue = 0 + + return (red, green, blue) def render_episode_end_screen(self, message: str, color: tuple): font = pygame.font.SysFont(None, 50) diff --git a/DSSE/environment/simulation/particle_simulation.py b/DSSE/environment/simulation/particle_simulation.py index 723878f5..6d68019c 100644 --- a/DSSE/environment/simulation/particle_simulation.py +++ b/DSSE/environment/simulation/particle_simulation.py @@ -3,7 +3,6 @@ from datetime import datetime, timedelta from typing import List, Tuple - EARTH_MEAN_RADIUS = 6373.0 @@ -12,23 +11,34 @@ def __init__( self, disaster_lat: float, disaster_long: float, + start_time: datetime, duration_hours: int = 10, loglevel: int = 20, animate: bool = False, cell_size: int = 130, + particle_amount: int = 50_000, + particle_radius: int = 1000, + num_particle_to_filter_as_noise: int = 0, ) -> None: try: from opendrift.models.oceandrift import OceanDrift + self.ocean_drift = OceanDrift except ImportError: - raise ImportError("OpenDrift not installed. Install the environment with the 'coverage' extra: pip install DSSE[coverage]") - + raise ImportError( + "OpenDrift not installed. Install the environment with the 'coverage' extra: pip install DSSE[coverage]" + ) + self.disaster_lat = disaster_lat self.disaster_long = disaster_long + self.start_time = start_time self.loglevel = loglevel self.animate = animate self.duration_hours = duration_hours self.cell_size = cell_size + self.particle_amount = particle_amount + self.particle_radius = particle_radius + self.particles_as_noise = num_particle_to_filter_as_noise # Internal variables self.map_size = 0 @@ -42,34 +52,29 @@ def run_or_get_simulation(self): def run_simulation(self): duration = timedelta(hours=self.duration_hours) - start_time = datetime.now() - duration - number = 50_000 - radius = 1000 - coordinates = self.simulate(start_time, number, radius, duration) + coordinates = self.simulate(duration) self.map_size = self.calculate_map_size(coordinates) cartesian = self.convert_lat_lon_to_xy(coordinates) self.probability_map = self.create_probability_map(cartesian) # Maintain always a copy of the original map self.original_map = self.probability_map.copy() - def simulate( - self, - time: datetime, - number: int, - radius: int, - duration: timedelta, - ) -> List[Tuple[float, float]]: + def simulate(self, duration: timedelta) -> List[Tuple[float, float]]: o = self.ocean_drift(loglevel=self.loglevel) + # Add Wind & Ocean data o.add_readers_from_list( - ["https://tds.hycom.org/thredds/dodsC/GLBy0.08/expt_93.0/uv3z"] + [ + "https://tds.hycom.org/thredds/dodsC/GLBy0.08/expt_93.0/uv3z", + "https://pae-paha.pacioos.hawaii.edu/thredds/dodsC/ncep_global/NCEP_Global_Atmospheric_Model_best.ncd", + ] ) o.seed_elements( lat=self.disaster_lat, lon=self.disaster_long, - time=time, - number=number, - radius=radius, + time=self.start_time, + number=self.particle_amount, + radius=self.particle_radius, ) o.run(duration=duration, time_step=1800) @@ -168,29 +173,58 @@ def create_probability_map( """ Creates a probability map based on the coordinates of the particles. """ - prob_map = np.zeros((self.map_size, self.map_size)) + prob_map = np.zeros((self.map_size, self.map_size), dtype=np.float64) for x, y in cartesian_coords: prob_map[y][x] += 1 - particle_sum = max(np.sum(prob_map), 1) + prob_map[prob_map <= self.particles_as_noise] = 0.0 + prob_map = self.trimm_map(prob_map) + self.map_size = len(prob_map) + particle_sum = max(np.sum(prob_map), 1) probability_map = prob_map / particle_sum return probability_map + def trimm_map(self, prob_map) -> np.ndarray: + """ + Trims map to fit cells with particles. + """ + zero_values = np.argwhere(prob_map > 0) + row_min, col_min = zero_values.min(axis=0) + row_max, col_max = zero_values.max(axis=0) + + new_width = row_max - row_min + new_height = col_max - col_min + + # Pad the map to make it square + padding = ((0, 0), (0, 0)) + if new_width > new_height: + padding = ((0, 0), (0, new_width - new_height)) + elif new_height > new_width: + padding = ((0, new_height - new_width), (0, 0)) + + # Pads with zeros (there were no particles there anyway) + res = np.pad( + prob_map[row_min:row_max, col_min:col_max], + padding, + mode="constant", + constant_values=0.0, + ) + return res + def get_matrix(self): return self.probability_map def get_map_size(self): return self.map_size - + def save_state(self, output_path: str): with open(output_path, "wb") as f: np.save(f, self.original_map) - - + def load_state(self, input_path: str): with open(input_path, "rb") as f: self.probability_map = np.load(f) self.original_map = self.probability_map.copy() - self.map_size = len(self.probability_map) \ No newline at end of file + self.map_size = len(self.probability_map) diff --git a/DSSE/environment/wrappers/__init__.py b/DSSE/environment/wrappers/__init__.py new file mode 100644 index 00000000..812191ad --- /dev/null +++ b/DSSE/environment/wrappers/__init__.py @@ -0,0 +1,13 @@ +from .all_positions_wrapper import AllPositionsWrapper +from .matrix_encode_wrapper import MatrixEncodeWrapper +from .top_n_cells_wrapper import TopNProbsWrapper +from .retain_drone_pos_wrapper import RetainDronePosWrapper +from .all_flatten_wrapper import AllFlattenWrapper + +__all__ = [ + "AllPositionsWrapper", + "MatrixEncodeWrapper", + "TopNProbsWrapper", + "RetainDronePosWrapper", + "AllFlattenWrapper", +] \ No newline at end of file diff --git a/DSSE/environment/wrappers/all_flatten_wrapper.py b/DSSE/environment/wrappers/all_flatten_wrapper.py new file mode 100644 index 00000000..e6457372 --- /dev/null +++ b/DSSE/environment/wrappers/all_flatten_wrapper.py @@ -0,0 +1,48 @@ +import numpy as np +from pettingzoo.utils.wrappers import BaseParallelWrapper +from gymnasium.spaces import Box +from DSSE import DroneSwarmSearch + + +class AllFlattenWrapper(BaseParallelWrapper): + """ + Wrapper that modifies the observation space to include the positions of all agents + the flatten matrix. + """ + def __init__(self, env: DroneSwarmSearch): + super().__init__(env) + + self.observation_spaces = { + agent: self.observation_space(agent) + for agent in self.env.possible_agents + } + + def step(self, actions): + obs, reward, terminated, truncated, infos = self.env.step(actions) + self.flatten_obs(obs) + return obs, reward, terminated, truncated, infos + + def flatten_obs(self, obs): + for idx, agent in enumerate(obs.keys()): + agents_positions = np.array(self.env.agents_positions) / (self.env.grid_size - 1) + agents_positions[[0, idx]] = agents_positions[[idx, 0]] + obs[agent] = ( + np.concatenate((agents_positions.flatten(), obs[agent][1].flatten())) + ) + + + def reset(self, **kwargs): + obs, infos = self.env.reset(**kwargs) + self.flatten_obs(obs) + return obs, infos + + def observation_space(self, agent): + return Box( + low=0, + high=1, + shape=(len(self.env.possible_agents) * 2 + self.env.grid_size * self.env.grid_size, ), + dtype=np.float64, + ) + + + + diff --git a/DSSE/environment/wrappers/all_positions_wrapper.py b/DSSE/environment/wrappers/all_positions_wrapper.py index 844dc60a..e8ff5d42 100644 --- a/DSSE/environment/wrappers/all_positions_wrapper.py +++ b/DSSE/environment/wrappers/all_positions_wrapper.py @@ -10,6 +10,11 @@ class AllPositionsWrapper(BaseParallelWrapper): """ def __init__(self, env: DroneSwarmSearch): super().__init__(env) + + self.observation_spaces = { + agent: self.observation_space(agent) + for agent in self.env.possible_agents + } def step(self, actions): obs, reward, terminated, truncated, infos = self.env.step(actions) @@ -17,13 +22,12 @@ def step(self, actions): return obs, reward, terminated, truncated, infos def add_other_positions_obs(self, obs): - prob_matrix = obs["drone0"][1] - for idx, agent in enumerate(self.env.agents): + for idx, agent in enumerate(obs.keys()): agents_positions = np.array(self.env.agents_positions, dtype=np.int64) - agents_positions[[0, idx]] = agents_positions[[idx, 0]] + agents_positions[[0, idx]] = agents_positions[[idx, 0]] obs[agent] = ( - agents_positions, - prob_matrix + agents_positions.flatten(), + obs[agent][1] ) @@ -35,12 +39,12 @@ def reset(self, **kwargs): def observation_space(self, agent): return Tuple( ( - Box(0, self.env.grid_size, shape=(len(self.env.possible_agents), 2), dtype=np.int64), + Box(0, self.env.grid_size, shape=(len(self.env.possible_agents) * 2, ), dtype=np.int64), Box( low=0, high=1, shape=(self.env.grid_size, self.env.grid_size), - dtype=np.float32, + dtype=np.float64, ) ) ) diff --git a/DSSE/environment/wrappers/communication_wrapper.py b/DSSE/environment/wrappers/communication_wrapper.py new file mode 100644 index 00000000..48c02496 --- /dev/null +++ b/DSSE/environment/wrappers/communication_wrapper.py @@ -0,0 +1,54 @@ +from pettingzoo.utils.wrappers import BaseParallelWrapper +from DSSE import DroneSwarmSearch +from numba import njit +import numpy as np + + +class CommunicationWrapper(BaseParallelWrapper): + """ + Ads tracking of seen cells to the observation space + """ + def __init__(self, env: DroneSwarmSearch, n_steps: int = 20): + super().__init__(env) + self.n_steps = n_steps + self.passed_map = None + + + def reset(self, **kwargs): + obs, infos = self.env.reset(**kwargs) + self.passed_map = np.zeros((self.env.grid_size, self.env.grid_size)) + return obs, infos + + def step(self, actions): + obs, reward, terminated, truncated, infos = self.env.step(actions) + for pos in self.env.agents_positions: + # (x, y) to (row, col) + self.passed_map[pos[1], pos[0]] = self.env.timestep + obs = self.add_communication(obs) + return obs, reward, terminated, truncated, infos + + def add_communication(self, obs): + # All observations have the same matrix, so we can just calculate it once + if len(obs) > 0: + new_matrix = self.modify_matrix(obs["drone0"][1], self.n_steps, self.passed_map, self.env.timestep) + for idx, agent in enumerate(obs.keys()): + obs[agent] = ( + obs[agent][0], + new_matrix + ) + return obs + + @staticmethod + @njit(cache=True, fastmath=True) + def modify_matrix(matrix, n_steps, passed_map, curr_ts): + new_matrix = matrix.copy() + height, width = matrix.shape + for i in range(height): + for j in range(width): + if matrix[i, j] == 0: + continue + multiplier = min((curr_ts - passed_map[i, j]) / n_steps, 1.0) + new_matrix[i, j] = new_matrix[i, j] * multiplier + return new_matrix + + \ No newline at end of file diff --git a/DSSE/environment/wrappers/gaussian_wrapper.py b/DSSE/environment/wrappers/gaussian_wrapper.py new file mode 100644 index 00000000..b5cc2fab --- /dev/null +++ b/DSSE/environment/wrappers/gaussian_wrapper.py @@ -0,0 +1,44 @@ +import numpy as np +from pettingzoo.utils.wrappers import BaseParallelWrapper +from gymnasium.spaces import Box +from DSSE import DroneSwarmSearch + + +class GaussianWrapper(BaseParallelWrapper): + """ + Wrapper that modifies the observation space to be the parameters of the gaussian + the drones positions + """ + def __init__(self, env: DroneSwarmSearch): + super().__init__(env) + + self.observation_spaces = { + agent: self.observation_space(agent) + for agent in self.env.possible_agents + } + + def step(self, actions): + obs, reward, terminated, truncated, infos = self.env.step(actions) + self.modify_obs(obs) + return obs, reward, terminated, truncated, infos + + def modify_obs(self, obs): + for idx, agent in enumerate(obs.keys()): + agents_positions = np.array(self.env.agents_positions, dtype=np.int64) + agents_positions[[0, idx]] = agents_positions[[idx, 0]] + obs[agent] = ( + agents_positions.flatten(), + obs[agent][1] + ) + + + def reset(self, **kwargs): + obs, infos = self.env.reset(**kwargs) + self.modify_obs(obs) + return obs, infos + + def observation_space(self, agent): + return Box(low=0, high=1, shape=(3 + len(self.env.possible_agents) * 2, ), dtype=np.float64) + + + + diff --git a/DSSE/environment/wrappers/matrix_encode_wrapper.py b/DSSE/environment/wrappers/matrix_encode_wrapper.py new file mode 100644 index 00000000..783c53f8 --- /dev/null +++ b/DSSE/environment/wrappers/matrix_encode_wrapper.py @@ -0,0 +1,46 @@ +import numpy as np +from pettingzoo.utils.wrappers import BaseParallelWrapper +from gymnasium.spaces import Box +from DSSE import DroneSwarmSearch + + +class MatrixEncodeWrapper(BaseParallelWrapper): + """ + Wrapper that modifies the observation space to include the positions of all agents encoded in the probability matrix. + """ + + def __init__(self, env: DroneSwarmSearch): + super().__init__(env) + + self.observation_spaces = { + agent: self.observation_space(agent) for agent in self.env.possible_agents + } + + def reset(self, **kwargs): + obs, infos = self.env.reset(**kwargs) + self.encode_matrix_obs(obs) + return obs, infos + + def step(self, actions): + obs, reward, terminated, truncated, infos = self.env.step(actions) + self.encode_matrix_obs(obs) + return obs, reward, terminated, truncated, infos + + def encode_matrix_obs(self, obs): + for idx, agent in enumerate(obs.keys()): + prob_matrix = obs[agent][1].copy() + for i, pos in enumerate(self.env.agents_positions): + x, y = pos + if i == idx: + prob_matrix[y, x] += 2 + else: + prob_matrix[y, x] = -1 + obs[agent] = prob_matrix + + def observation_space(self, agent): + return Box( + low=-1, + high=3, + shape=(self.env.grid_size, self.env.grid_size), + dtype=np.float64, + ) diff --git a/DSSE/environment/wrappers/retain_drone_pos_wrapper.py b/DSSE/environment/wrappers/retain_drone_pos_wrapper.py new file mode 100644 index 00000000..d0d38a7f --- /dev/null +++ b/DSSE/environment/wrappers/retain_drone_pos_wrapper.py @@ -0,0 +1,30 @@ +from pettingzoo.utils.wrappers import BaseParallelWrapper +from DSSE import DroneSwarmSearch + + +class RetainDronePosWrapper(BaseParallelWrapper): + """ + Wrapper that modifies the reset function to retain the drone positions + """ + def __init__(self, env: DroneSwarmSearch, drone_positions: list): + super().__init__(env) + if len(drone_positions) != len(self.env.possible_agents): + raise ValueError("Drone positions must have the same length as the number of possible agents") + self.drone_positions = drone_positions + + + def reset(self, **kwargs): + opt = kwargs.get("options", {}) + if not opt: + options = { + "drones_positions": self.drone_positions + } + kwargs["options"] = options + else: + opt["drones_positions"] = self.drone_positions + kwargs["options"] = opt + obs, infos = self.env.reset(**kwargs) + return obs, infos + + + diff --git a/DSSE/environment/wrappers/top_n_cells_wrapper.py b/DSSE/environment/wrappers/top_n_cells_wrapper.py new file mode 100644 index 00000000..ce5024be --- /dev/null +++ b/DSSE/environment/wrappers/top_n_cells_wrapper.py @@ -0,0 +1,51 @@ +import numpy as np +from pettingzoo.utils.wrappers import BaseParallelWrapper +from gymnasium.spaces import Box +from DSSE import DroneSwarmSearch + + +class TopNProbsWrapper(BaseParallelWrapper): + """ + Wrapper that modifies the observation space to include the positions of all agents, and the top n positions with highest probability in the observation + """ + def __init__(self, env: DroneSwarmSearch, n_positions: int = 10): + super().__init__(env) + self.n_positions = n_positions + + self.observation_spaces = { + agent: self.observation_space(agent) + for agent in self.env.possible_agents + } + + def step(self, actions): + obs, reward, terminated, truncated, infos = self.env.step(actions) + self.modify_obs(obs) + return obs, reward, terminated, truncated, infos + + def reset(self, **kwargs): + obs, infos = self.env.reset(**kwargs) + self.modify_obs(obs) + return obs, infos + + def modify_obs(self, obs): + for idx, agent in enumerate(obs.keys()): + agents_positions = np.array(self.env.agents_positions, dtype=np.int64) + agents_positions[[0, idx]] = agents_positions[[idx, 0]] + obs[agent] = ( + np.concatenate((agents_positions.flatten(), self.get_top_prob_positions(obs[agent][1]))) + ) + + def get_top_prob_positions(self, probability_matrix): + flattened_probs = probability_matrix.flatten() + indices = flattened_probs.argsort()[-self.n_positions:][::-1] + positions = np.unravel_index(indices, probability_matrix.shape) + positions = np.stack((positions[1], positions[0]), axis=-1) + return positions.flatten() + + def observation_space(self, agent): + agents_pos_len = len(self.env.possible_agents) * 2 + return Box(low=0, high=self.env.grid_size, shape=(agents_pos_len + self.n_positions * 2, ), dtype=np.int64) + + + + diff --git a/DSSE/tests/matrix.npy b/DSSE/tests/matrix.npy index 7c8e958c..6d567c1b 100644 Binary files a/DSSE/tests/matrix.npy and b/DSSE/tests/matrix.npy differ diff --git a/DSSE/tests/test_env.py b/DSSE/tests/test_env.py index bfd80754..abd4c7ff 100644 --- a/DSSE/tests/test_env.py +++ b/DSSE/tests/test_env.py @@ -80,32 +80,6 @@ def test_maximum_drones_allowed(grid_size, drone_amount): ), f"There should be {drone_amount} drones, but found {len(env.get_agents())}." -@pytest.mark.parametrize( - "drone_amount", - [ - 2, - ], -) -def test_drone_collision_termination(drone_amount): - - env = init_drone_swarm_search(drone_amount=drone_amount) - _ = env.reset() - - done = False - while not done: - actions = {"drone0": Actions.SEARCH.value, "drone1": Actions.LEFT.value} - _, reward, terminations, truncations, _ = env.step(actions) - done = any(truncations.values()) or any(terminations.values()) - - assert done, "The simulation should terminate upon drone collision." - assert any( - terminations.values() - ), "There should be a termination flag set due to the collision." - assert ( - sum(reward.values()) < 0 - ), "The total reward should be negative after a collision." - - @pytest.mark.parametrize( "timestep_limit", [ @@ -144,35 +118,33 @@ def test_timeout_termination(timestep_limit): @pytest.mark.parametrize( - "grid_size, person_initial_position", + "grid_size, person_initial_position, timestep_limit", [ - (15, (4, 4)), - (20, (10, 10)), - (25, (15, 15)), - (30, (20, 20)), + (15, (4, 4), 200), + (20, (10, 10), 100), + (25, (15, 15), 50), + (30, (20, 20), 25), ], ) -def test_leave_grid_get_negative_reward(grid_size, person_initial_position): +def test_leave_grid_dont_finish(grid_size, person_initial_position, timestep_limit): env = init_drone_swarm_search( - grid_size=grid_size, person_initial_position=person_initial_position + render_mode="ansi", grid_size=grid_size, person_initial_position=person_initial_position, timestep_limit=timestep_limit ) opt = {"drones_positions": [(0, 0)]} _ = env.reset(options=opt) done = False reward_sum = 0 - while not done and reward_sum >= env.reward_scheme.leave_grid * (env.timestep_limit - 1): + steps = 0 + while not done and steps < timestep_limit: actions = {"drone0": Actions.UP.value} _, reward, terminations, done, _ = env.step(actions) done = any(done.values()) - reward_sum += sum(reward.values()) + steps += 1 assert ( not done ), "The simulation should not end, indicating the drone left the grid or another termination condition was met." - assert ( - sum(reward.values()) < 0 - ), "The total reward should be negative, indicating a penalty was applied." assert not any( terminations.values() ), "There not should be at least one termination condition met." @@ -428,6 +400,215 @@ def test_get_wrong_if_scale_mult_is_not_greater_than_0(person_amount, mult): _ = env.reset(options=opt) +@pytest.mark.parametrize( + "person_amount, mult", + [ + (1, [1, 0.1]), + (2, [1]), + (3, [1, 0.8, 0.7, 1]), + (4, [1, 0.8]), + (5, [1, 0.8, 0.7, 0.6, 0.5, 0.6, 0.5]), + ], +) +def test_get_wrong_if_number_of_mults_is_not_equal_to_person_amount( + person_amount, mult +): + with pytest.raises(Exception): + env = init_drone_swarm_search(person_amount=person_amount) + opt = {"person_pod_multipliers": mult} + _ = env.reset(options=opt) +@pytest.mark.parametrize( + "drone_amount, drones_positions", + [ + (1, [(-1, 3)]), + (2, [(12, 0), (25, 13)]), + (3, [(0, 0), (19, 19), (25, -10)]), + (4, [(5, 0), (0, 0), (10, 10), (10, 10)]), + ], +) +def test_invalid_drone_position_raises_error(drone_amount, drones_positions): + with pytest.raises(ValueError): + env = init_drone_swarm_search(drone_amount=drone_amount) + opt = {"drones_positions": drones_positions} + _ = env.reset(options=opt) + + +@pytest.mark.parametrize( + "drone_amount", + [ + 1, + 20, + 35, + 48, + ], +) +def test_if_all_drones_are_created_with_default_positions(drone_amount): + env = init_drone_swarm_search(drone_amount=drone_amount) + + _ = env.reset() + + assert ( + len(env.get_agents()) == drone_amount + ), f"Should have {drone_amount} drones, but found {len(env.get_agents())}." + + +@pytest.mark.parametrize( + "drone_amount, grid_size", + [ + (1, 10), + (2, 15), + (5, 20), + (15, 25), + ], +) +def test_with_the_observation_size_is_correct_for_all_drones(drone_amount, grid_size): + env = init_drone_swarm_search(grid_size=grid_size, drone_amount=drone_amount) + + observations, _ = env.reset() + + for drone in range(drone_amount): + drone_id = f"drone{drone}" + observation_matriz = observations[drone_id][1] + + assert observation_matriz.shape == ( + grid_size, + grid_size, + ), f"The observation matrix for {drone_id} should have a shape of ({grid_size}, {grid_size}), but was {observation_matriz.shape}." + + +def test_petting_zoo_interface_works(): + env = init_drone_swarm_search() + parallel_api_test(env) + env.close() + + +@pytest.mark.parametrize( + "person_initial_position, person_amount", + [ + ((10, 10), 10), + ((10, 10), 15), + ((10, 10), 20), + ((10, 10), 25), + ], +) +def test_castaway_count_after_reset(person_initial_position, person_amount): + env = init_drone_swarm_search( + person_amount=person_amount, person_initial_position=person_initial_position + ) + _ = env.reset() + + assert ( + len(env.get_persons()) == person_amount + ), f"Should have {person_amount} castaways, but found {len(env.get_persons())}." + + +@pytest.mark.parametrize( + "person_initial_position, person_amount, drone_amount", + [ + ((10, 10), 1, 1), + ((1, 10), 5, 1), + ((19, 5), 10, 1), + ((5, 16), 15, 1), + ], +) +def test_castaway_count_after_reset( + person_initial_position, person_amount, drone_amount +): + env = init_drone_swarm_search( + person_amount=person_amount, + person_initial_position=person_initial_position, + drone_amount=drone_amount, + ) + observations = env.reset() + + rewards = 0 + done = False + while not done: + actions = policy(observations, env.get_agents(), env) + observations, reward, _, done, info = env.step(actions) + rewards += sum(reward.values()) + done = any(done.values()) + + _ = env.reset() + + assert ( + rewards >= DroneSwarmSearch.reward_scheme.search_and_find * person_amount + ), f"The total reward should be positive after finding all castaways. But the total reward was: {rewards}." + assert done, "The simulation should end after finding all castaways." + assert ( + len(env.get_persons()) == person_amount + ), f"Should have {person_amount} castaways, but found {len(env.get_persons())}." + assert ( + len(env.get_agents()) == drone_amount + ), f"Should have {drone_amount} drones, but found {len(env.get_agents())}." + + +@pytest.mark.parametrize( + "pre_render_time, cell_size, drone_max_speed, wind_resistance", + [ + (1, 130, 10, 0.0), + (5, 130, 20, 0.0), + (10, 130, 30, 0.0), + (15, 130, 40, 0.0), + (20, 130, 50, 0.0), + ], +) +def test_pre_render_work_after_reset( + pre_render_time, cell_size, drone_max_speed, wind_resistance +): + env = init_drone_swarm_search( + pre_render_time=pre_render_time, drone_speed=drone_max_speed + ) + _ = env.reset() + pre_render_steps = round( + (pre_render_time * 60) / (cell_size / (drone_max_speed - wind_resistance)) + ) + + assert ( + env.pre_render_steps == pre_render_steps + ), f"The pre-render time should be {pre_render_steps}, but was {env.pre_render_time}." + + _ = env.reset() + + assert ( + env.pre_render_steps == pre_render_steps + ), f"The pre-render time should be {pre_render_steps}, but was {env.pre_render_time}." + + +@pytest.mark.parametrize( + "person_amount, mult", + [ + (1, ["1"]), + (2, [1, "0.8"]), + (3, [1, "0.8", 0.7]), + (4, ["1", 0.8, "0.7", 0.6]), + (5, ["1", "0.8", "0.7", "0.6", "0.5"]), + ], +) +def test_get_wrong_if_scale_pod_is_not_a_number(person_amount, mult): + with pytest.raises(Exception): + env = init_drone_swarm_search(person_amount=person_amount) + opt = {"person_pod_multipliers": mult} + _ = env.reset(options=opt) + + +@pytest.mark.parametrize( + "person_amount, mult", + [ + (1, [-1.2]), + (2, [1, -0.8]), + (3, [1, -0.8, 1.7]), + (4, [1, 0.8, -0.7, 0.6]), + (5, [1, 0.8, -0, 0.6, -3.5]), + ], +) +def test_get_wrong_if_scale_mult_is_not_greater_than_0(person_amount, mult): + with pytest.raises(Exception): + env = init_drone_swarm_search(person_amount=person_amount) + opt = {"person_pod_multipliers": mult} + _ = env.reset(options=opt) + + @pytest.mark.parametrize( "person_amount, mult", [ diff --git a/DSSE/tests/test_env_coverage.py b/DSSE/tests/test_env_coverage.py index c51a56af..214cbbf4 100644 --- a/DSSE/tests/test_env_coverage.py +++ b/DSSE/tests/test_env_coverage.py @@ -45,60 +45,33 @@ def test_wrong_drone_number(drone_amount): with pytest.raises(ValueError): init_Coverage_drone_swarm_search(drone_amount=drone_amount) - @pytest.mark.parametrize( - "drone_amount, drones_positions", + "drone_amount, drones_positions, timestep_limit", [ - (2, [(0, 0), (2, 0),]), + (1, [(0, 0)], 100), ], ) -def test_drone_collision_termination(drone_amount, drones_positions): - - env = init_Coverage_drone_swarm_search(drone_amount=drone_amount) - opt = { - "drones_positions": drones_positions, - } - _ = env.reset(options=opt) - - done = False - while not done: - actions = {"drone0": Actions.RIGHT.value, "drone1": Actions.LEFT.value} - _, reward, terminations, truncations, _ = env.step(actions) - done = any(terminations.values()) or any(truncations.values()) - - assert done, "The simulation should terminate upon drone collision." - assert any( - terminations.values() - ), "There should be a termination flag set due to the collision." - assert ( - sum(reward.values()) < 0 - ), "The total reward should be negative after a collision." - -@pytest.mark.parametrize( - "drone_amount, drones_positions", - [ - (1, [(0, 0)]), - ], -) -def test_leave_grid_get_negative_reward(drone_amount, drones_positions): - env = init_Coverage_drone_swarm_search(drone_amount=drone_amount) +def test_leave_grid_get_negative_reward(drone_amount, drones_positions, timestep_limit): + env = init_Coverage_drone_swarm_search(drone_amount=drone_amount, timestep_limit=timestep_limit) opt = {"drones_positions": drones_positions} _ = env.reset(options=opt) done = False reward_sum = 0 - while not done and reward_sum >= (env.reward_scheme.leave_grid * (env.timestep_limit-1)) +1: + steps = 0 + while not done and steps < timestep_limit - 1: actions = {"drone0": Actions.UP.value} _, reward, terminations, done, _ = env.step(actions) done = any(done.values()) reward_sum += sum(reward.values()) + steps += 1 assert ( not done ), "The simulation should not end, indicating the drone left the grid or another termination condition was met." assert ( sum(reward.values()) < 0 - ), "The total reward should be negative, indicating a penalty was applied." + ), f"The total reward should be negative, indicating a penalty was applied. reward: {reward_sum}" assert not any( terminations.values() ), "There not should be at least one termination condition met." diff --git a/pyproject.toml b/pyproject.toml index 2b29fa93..f52abc16 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ "gymnasium>=0.27.1", "pygame>=2.3.0", "pettingzoo>=1.22.3", - "matplotlib>=3.7.0", + "matplotlib==3.8.4", "numba>=0.59.0", ] @@ -43,7 +43,7 @@ all = [ "gymnasium>=0.27.1", "pygame>=2.3.0", "pettingzoo>=1.22.3", - "matplotlib>=3.7.0", + "matplotlib==3.8.4", "numba>=0.59.0", "GDAL==3.4.1", "opendrift", diff --git a/requirements.txt b/requirements.txt index fcfb2596..8c2230f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy>=1.23.1 gymnasium>=0.27.1 pygame>=2.3.0 pettingzoo>=1.22.3 -matplotlib>=3.7.0 +matplotlib==3.8.4 pytest>=8.0.0 numba>=0.59.0 wheel