Merge branch 'main' into vitepress_docs

pfeinsper · May 23, 2024 · cb8b3c2 · cb8b3c2
2 parents ed2034e + 53f356d
commit cb8b3c2
Show file tree

Hide file tree

Showing 18 changed files with 655 additions and 172 deletions.
diff --git a/DSSE/environment/coverage_env.py b/DSSE/environment/coverage_env.py
@@ -1,3 +1,4 @@
+import datetime
 from gymnasium.spaces import Discrete
 from .env_base import DroneSwarmSearchBase
 from .simulation.particle_simulation import ParticleSimulation
@@ -10,14 +11,13 @@ class CoverageDroneSwarmSearch(DroneSwarmSearchBase):
     metadata = {
         "name": "DroneSwarmSearchCPP",
     }
-    reward_scheme = Reward(
-        default=0,
-        leave_grid=-10,
-        exceed_timestep=-100,
-        drones_collision=-10,
-        search_cell=10,
-        search_and_find=100,
-    )
+    reward_scheme = {
+        "default": -0.2,
+        "exceed_timestep": 0.0,
+        "search_cell": 1.0,
+        "done": 60,
+        "reward_poc": 45.0
+    }
 
     def __init__(
         self,
@@ -31,13 +31,24 @@ def __init__(
         drone_probability_of_detection=0.9,
         pre_render_time=10,
         prob_matrix_path=None,
+        particle_amount=50_000,
+        particle_radius=800,
+        num_particle_to_filter_as_noise=1,
+        start_time: datetime = None,
+        grid_cell_size=130,
     ) -> None:
-
-        # Prob matrix
+
+        if start_time is None:
+            start_time = datetime.datetime.now()
+
         self.probability_matrix = ParticleSimulation(
             disaster_lat=disaster_position[0],
             disaster_long=disaster_position[1],
+            start_time=start_time,
             duration_hours=pre_render_time,
+            particle_amount=particle_amount,
+            particle_radius=particle_radius,
+            num_particle_to_filter_as_noise=num_particle_to_filter_as_noise
         )
         if prob_matrix_path is not None:
             if not isinstance(prob_matrix_path, str):
@@ -56,6 +67,7 @@ def __init__(
             drone_amount=drone_amount,
             drone_speed=drone_speed,
             probability_of_detection=drone_probability_of_detection,
+            grid_cell_size=grid_cell_size,
         )
         self.disaster_position = disaster_position
         # Sets used to keep track of the seen and not seen states for reward calculation
@@ -72,33 +84,39 @@ def reset(self, seed=None, options=None):
 
         self.reset_search_state()
 
+        self.reward_scheme["done"] = len(self.not_seen_states) / len(self.agents)
+        self.reward_scheme["reward_poc"] = len(self.not_seen_states)
         self.cumm_pos = 0
         self.repeated_coverage = 0
         infos = self.compute_infos(False)
         return obs, infos
 
     def reset_search_state(self):
         # This is in (x, y)
-        self.seen_states = {pos for pos in self.agents_positions}
+        self.seen_states = set()
+        self.not_seen_states: set = self.all_states.copy()
 
         mat = self.probability_matrix.get_matrix()
         # (row, col)
         close_to_zero = np.argwhere(np.abs(mat) < 1e-10)
-
+        
         # Remove the need to visit cells with POC near to 0
         for y, x in close_to_zero:
-            self.seen_states.add((x, y))
+            point = (x, y)
+            if point in self.not_seen_states:
+                self.not_seen_states.remove(point)
 
-        self.not_seen_states = self.all_states - self.seen_states
 
     def create_observations(self):
         observations = {}
 
         probability_matrix = self.probability_matrix.get_matrix()
+        prob_max = probability_matrix.max()
+        norm = probability_matrix / prob_max
         for idx, agent in enumerate(self.agents):
             observation = (
                 self.agents_positions[idx],
-                probability_matrix,
+                norm,
             )
             observations[agent] = observation
 
@@ -112,7 +130,7 @@ def step(self, actions: dict[str, int]) -> tuple:
             raise ValueError("Please reset the env before interacting with it")
 
         terminations = {a: False for a in self.agents}
-        rewards = {a: self.reward_scheme.default for a in self.agents}
+        rewards = {a: self.reward_scheme["default"] for a in self.agents}
         truncations = {a: False for a in self.agents}
         self.timestep += 1
 
@@ -127,32 +145,29 @@ def step(self, actions: dict[str, int]) -> tuple:
 
 
             if self.timestep >= self.timestep_limit:
-                rewards[agent] = self.reward_scheme.exceed_timestep
+                rewards[agent] = self.reward_scheme["exceed_timestep"]
                 truncations[agent] = True
                 continue
-
-            # Action 8 is to stay in the same position, default reward.
-            if drone_action == 8:
-                continue
 
             drone_x, drone_y = self.agents_positions[idx]
             new_position = self.move_drone((drone_x, drone_y), drone_action)
             if not self.is_valid_position(new_position):
-                rewards[agent] = self.reward_scheme.leave_grid
                 continue
 
             self.agents_positions[idx] = new_position
             new_x, new_y = new_position
             if new_position in self.not_seen_states:
-                reward_poc = (1 / (self.timestep)) * prob_matrix[new_y, new_x] * 1_000
-                rewards[agent] = self.reward_scheme.search_cell + reward_poc
+                time_multiplier = (1 - self.timestep / self.timestep_limit)
+                reward_poc = time_multiplier * prob_matrix[new_y, new_x] * self.reward_scheme["reward_poc"]
+                rewards[agent] = self.reward_scheme["search_cell"] + reward_poc
                 self.seen_states.add(new_position)
                 self.not_seen_states.remove(new_position)
                 # Probability of sucess (POS) = POC * POD
                 self.cumm_pos += prob_matrix[new_y, new_x] * self.drone.pod
                 # Remove the probability of the visited cell.
                 prob_matrix[new_y, new_x] = 0.0
             else:
+                # rewards[agent] = -
                 self.repeated_coverage += 1
 
         # Get dummy infos
@@ -161,14 +176,15 @@ def step(self, actions: dict[str, int]) -> tuple:
             self.render()
 
         if is_completed:
-            # TODO: Proper define reward for completing the search (R_done)
+            # (R_done)
+            time_adjusted = (1 - self.timestep / self.timestep_limit) * self.reward_scheme["done"]
+            r_done = self.reward_scheme["done"] + time_adjusted
             rewards = {
-                drone: self.reward_scheme.search_and_find for drone in self.agents
+                drone: r_done for drone in self.agents
             }
             terminations = {drone: True for drone in self.agents}
         infos = self.compute_infos(is_completed)
 
-        self.compute_drone_collision(terminations, rewards)
         # Get observations
         observations = self.create_observations()
         # If terminted, reset the agents (pettingzoo parallel env requirement)
@@ -177,14 +193,13 @@ def step(self, actions: dict[str, int]) -> tuple:
         return observations, rewards, terminations, truncations, infos
 
     def compute_infos(self, is_completed: bool) -> dict[str, dict]:
-        # TODO: Is this the best way to inform the coverage rate, Cum_pos and repetitions?
         total_states = len(self.seen_states) + len(self.not_seen_states)
         coverage_rate = len(self.seen_states) / total_states
         infos = {
             "is_completed": is_completed,
             "coverage_rate": coverage_rate,
             "repeated_coverage": self.repeated_coverage / total_states,
-            "acumulated_pos": self.cumm_pos,
+            "accumulated_pos": self.cumm_pos,
         }
         return {drone: infos for drone in self.agents}
 

diff --git a/DSSE/environment/env.py b/DSSE/environment/env.py
@@ -19,12 +19,12 @@ class DroneSwarmSearch(DroneSwarmSearchBase):
     }
 
     reward_scheme = Reward(
-        default=0.1,
-        leave_grid=-200,
-        exceed_timestep=-200,
-        drones_collision=-200,
-        search_cell=1,
-        search_and_find=200,
+        default=0.0,
+        leave_grid=0,
+        exceed_timestep=0,
+        drones_collision=0,
+        search_cell=0,
+        search_and_find=1,
     )
 
     def __init__(
@@ -41,8 +41,9 @@ def __init__(
         person_initial_position=(0, 0),
         drone_amount=1,
         drone_speed=10,
-        probability_of_detection=1,
+        probability_of_detection=1.0,
         pre_render_time=0,
+        grid_cell_size=130,
     ):
         if person_amount <= 0:
             raise ValueError("The number of persons must be greater than 0.")
@@ -57,6 +58,7 @@ def __init__(
             drone_amount=drone_amount,
             drone_speed=drone_speed,
             probability_of_detection=probability_of_detection,
+            grid_cell_size=grid_cell_size,
         )
 
         self.pre_render_steps = round(
@@ -241,8 +243,6 @@ def step(self, actions):
             # Check truncation conditions (overwrites termination conditions)
             if self.timestep >= self.timestep_limit:
                 rewards[agent] = self.reward_scheme.exceed_timestep
-                if self.rewards_sum[agent] > 0:
-                    rewards[agent] += self.rewards_sum[agent] // 2
                 truncations[agent] = True
                 terminations[agent] = True
                 continue
@@ -288,11 +288,6 @@ def step(self, actions):
                     for agent in self.agents:
                         terminations[agent] = True
                         truncations[agent] = True
-            elif is_searching:
-                prob_matrix = self.probability_matrix.get_matrix()
-                rewards[agent] = (
-                    prob_matrix[drone_y][drone_x]
-                )
 
             self.rewards_sum[agent] += rewards[agent]
 
@@ -301,7 +296,8 @@ def step(self, actions):
         infos = {drone: {"Found": person_found} for drone in self.agents}
 
         # CHECK COLISION - Drone
-        self.compute_drone_collision(terminations, rewards)
+        # self.compute_drone_collision(terminations, rewards)
+
 
         self.render_step(any(terminations.values()), person_found)
 

diff --git a/DSSE/environment/env_base.py b/DSSE/environment/env_base.py
@@ -4,7 +4,6 @@
 from pettingzoo import ParallelEnv
 from .entities.drone import DroneData
 from .pygame_interface import PygameInterface
-from .simulation.dynamic_probability import ProbabilityMatrix
 from .constants import Actions
 from gymnasium.spaces import MultiDiscrete, Discrete, Tuple, Box
 from copy import copy
@@ -21,8 +20,9 @@ def __init__(
         drone_amount=1,
         drone_speed=10,
         probability_of_detection=1,
+        grid_cell_size=130,
     ) -> None:
-        self.cell_size = 130  # in meters
+        self.cell_size = grid_cell_size  # in meters
         self.grid_size = grid_size
         self._was_reset = False
         if not isinstance(drone_amount, int):
@@ -60,7 +60,7 @@ def __init__(
 
         # Initializing render
         self.pygame_renderer = PygameInterface(
-            self.grid_size, render_gradient, render_grid
+            self.grid_size, render_gradient, render_grid, self.metadata["name"]
         )
 
     def calculate_simulation_time_step(
@@ -163,21 +163,6 @@ def create_observations(self):
     def step(self, actions):
         raise NotImplementedError("Method not implemented")
 
-    def compute_drone_collision(self, terminations, rewards):
-        """
-        Check for drone collision and compute terminations, rewards and truncations.
-        """
-        for drone_1_id in range(len(self.agents)):
-            for drone_2_id in range(drone_1_id + 1, len(self.agents)):
-                drone_1_name = self.agents[drone_1_id]
-                drone_2_name = self.agents[drone_2_id]
-                if self.agents_positions[drone_1_id] == self.agents_positions[drone_2_id]:
-                    terminations[drone_1_name] = True
-                    terminations[drone_2_name] = True
-                    rewards[drone_1_name] = self.reward_scheme.drones_collision
-                    rewards[drone_2_name] = self.reward_scheme.drones_collision
-
-
     def move_drone(self, position, action):
         """
         Returns a tuple with (is_terminal, new_position, reward)
@@ -199,6 +184,8 @@ def move_drone(self, position, action):
                 new_position = (position[0] - 1, position[1] + 1)
             case Actions.DOWN_RIGHT.value:  # DOWN_RIGHT
                 new_position = (position[0] + 1, position[1] + 1)
+            case _:
+                new_position = position
 
         return new_position
 
@@ -217,7 +204,7 @@ def observation_space(self, agent):
                     low=0,
                     high=1,
                     shape=(self.grid_size, self.grid_size),
-                    dtype=np.float32,
+                    dtype=np.float64,
                 ),
             )
         )

diff --git a/DSSE/environment/pygame_interface.py b/DSSE/environment/pygame_interface.py
@@ -13,7 +13,7 @@ class PygameInterface:
     FPS = 5
 
     def __init__(
-        self, grid_size: int, render_gradient: bool, render_grid: bool
+        self, grid_size: int, render_gradient: bool, render_grid: bool, env_name: str
     ) -> None:
         pygame.init()
         self.grid_size = grid_size
@@ -23,6 +23,7 @@ def __init__(
         self.screen = None
         self.render_on = False
         self.probability_matrix = None
+        self.env_name = env_name
 
         self.block_size = self.window_size / self.grid_size
         self.drone_img = None
@@ -126,7 +127,13 @@ def compute_cell_color(self, normalized_prob):
             elif normalized_prob >= 0.25:
                 red = 255
                 green = 255
-        return (red, green, 0)
+
+        if self.env_name == "DroneSwarmSearchCPP":
+            blue = 255 if normalized_prob > 0 else 0
+        else:
+            blue = 0
+
+        return (red, green, blue)
 
     def render_episode_end_screen(self, message: str, color: tuple):
         font = pygame.font.SysFont(None, 50)