Skip to content

Commit

Permalink
Merge branch 'main' into vitepress_docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Pedro2712 authored May 23, 2024
2 parents ed2034e + 53f356d commit cb8b3c2
Show file tree
Hide file tree
Showing 18 changed files with 655 additions and 172 deletions.
73 changes: 44 additions & 29 deletions DSSE/environment/coverage_env.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
from gymnasium.spaces import Discrete
from .env_base import DroneSwarmSearchBase
from .simulation.particle_simulation import ParticleSimulation
Expand All @@ -10,14 +11,13 @@ class CoverageDroneSwarmSearch(DroneSwarmSearchBase):
metadata = {
"name": "DroneSwarmSearchCPP",
}
reward_scheme = Reward(
default=0,
leave_grid=-10,
exceed_timestep=-100,
drones_collision=-10,
search_cell=10,
search_and_find=100,
)
reward_scheme = {
"default": -0.2,
"exceed_timestep": 0.0,
"search_cell": 1.0,
"done": 60,
"reward_poc": 45.0
}

def __init__(
self,
Expand All @@ -31,13 +31,24 @@ def __init__(
drone_probability_of_detection=0.9,
pre_render_time=10,
prob_matrix_path=None,
particle_amount=50_000,
particle_radius=800,
num_particle_to_filter_as_noise=1,
start_time: datetime = None,
grid_cell_size=130,
) -> None:

# Prob matrix

if start_time is None:
start_time = datetime.datetime.now()

self.probability_matrix = ParticleSimulation(
disaster_lat=disaster_position[0],
disaster_long=disaster_position[1],
start_time=start_time,
duration_hours=pre_render_time,
particle_amount=particle_amount,
particle_radius=particle_radius,
num_particle_to_filter_as_noise=num_particle_to_filter_as_noise
)
if prob_matrix_path is not None:
if not isinstance(prob_matrix_path, str):
Expand All @@ -56,6 +67,7 @@ def __init__(
drone_amount=drone_amount,
drone_speed=drone_speed,
probability_of_detection=drone_probability_of_detection,
grid_cell_size=grid_cell_size,
)
self.disaster_position = disaster_position
# Sets used to keep track of the seen and not seen states for reward calculation
Expand All @@ -72,33 +84,39 @@ def reset(self, seed=None, options=None):

self.reset_search_state()

self.reward_scheme["done"] = len(self.not_seen_states) / len(self.agents)
self.reward_scheme["reward_poc"] = len(self.not_seen_states)
self.cumm_pos = 0
self.repeated_coverage = 0
infos = self.compute_infos(False)
return obs, infos

def reset_search_state(self):
# This is in (x, y)
self.seen_states = {pos for pos in self.agents_positions}
self.seen_states = set()
self.not_seen_states: set = self.all_states.copy()

mat = self.probability_matrix.get_matrix()
# (row, col)
close_to_zero = np.argwhere(np.abs(mat) < 1e-10)

# Remove the need to visit cells with POC near to 0
for y, x in close_to_zero:
self.seen_states.add((x, y))
point = (x, y)
if point in self.not_seen_states:
self.not_seen_states.remove(point)

self.not_seen_states = self.all_states - self.seen_states

def create_observations(self):
observations = {}

probability_matrix = self.probability_matrix.get_matrix()
prob_max = probability_matrix.max()
norm = probability_matrix / prob_max
for idx, agent in enumerate(self.agents):
observation = (
self.agents_positions[idx],
probability_matrix,
norm,
)
observations[agent] = observation

Expand All @@ -112,7 +130,7 @@ def step(self, actions: dict[str, int]) -> tuple:
raise ValueError("Please reset the env before interacting with it")

terminations = {a: False for a in self.agents}
rewards = {a: self.reward_scheme.default for a in self.agents}
rewards = {a: self.reward_scheme["default"] for a in self.agents}
truncations = {a: False for a in self.agents}
self.timestep += 1

Expand All @@ -127,32 +145,29 @@ def step(self, actions: dict[str, int]) -> tuple:


if self.timestep >= self.timestep_limit:
rewards[agent] = self.reward_scheme.exceed_timestep
rewards[agent] = self.reward_scheme["exceed_timestep"]
truncations[agent] = True
continue

# Action 8 is to stay in the same position, default reward.
if drone_action == 8:
continue

drone_x, drone_y = self.agents_positions[idx]
new_position = self.move_drone((drone_x, drone_y), drone_action)
if not self.is_valid_position(new_position):
rewards[agent] = self.reward_scheme.leave_grid
continue

self.agents_positions[idx] = new_position
new_x, new_y = new_position
if new_position in self.not_seen_states:
reward_poc = (1 / (self.timestep)) * prob_matrix[new_y, new_x] * 1_000
rewards[agent] = self.reward_scheme.search_cell + reward_poc
time_multiplier = (1 - self.timestep / self.timestep_limit)
reward_poc = time_multiplier * prob_matrix[new_y, new_x] * self.reward_scheme["reward_poc"]
rewards[agent] = self.reward_scheme["search_cell"] + reward_poc
self.seen_states.add(new_position)
self.not_seen_states.remove(new_position)
# Probability of sucess (POS) = POC * POD
self.cumm_pos += prob_matrix[new_y, new_x] * self.drone.pod
# Remove the probability of the visited cell.
prob_matrix[new_y, new_x] = 0.0
else:
# rewards[agent] = -
self.repeated_coverage += 1

# Get dummy infos
Expand All @@ -161,14 +176,15 @@ def step(self, actions: dict[str, int]) -> tuple:
self.render()

if is_completed:
# TODO: Proper define reward for completing the search (R_done)
# (R_done)
time_adjusted = (1 - self.timestep / self.timestep_limit) * self.reward_scheme["done"]
r_done = self.reward_scheme["done"] + time_adjusted
rewards = {
drone: self.reward_scheme.search_and_find for drone in self.agents
drone: r_done for drone in self.agents
}
terminations = {drone: True for drone in self.agents}
infos = self.compute_infos(is_completed)

self.compute_drone_collision(terminations, rewards)
# Get observations
observations = self.create_observations()
# If terminted, reset the agents (pettingzoo parallel env requirement)
Expand All @@ -177,14 +193,13 @@ def step(self, actions: dict[str, int]) -> tuple:
return observations, rewards, terminations, truncations, infos

def compute_infos(self, is_completed: bool) -> dict[str, dict]:
# TODO: Is this the best way to inform the coverage rate, Cum_pos and repetitions?
total_states = len(self.seen_states) + len(self.not_seen_states)
coverage_rate = len(self.seen_states) / total_states
infos = {
"is_completed": is_completed,
"coverage_rate": coverage_rate,
"repeated_coverage": self.repeated_coverage / total_states,
"acumulated_pos": self.cumm_pos,
"accumulated_pos": self.cumm_pos,
}
return {drone: infos for drone in self.agents}

Expand Down
26 changes: 11 additions & 15 deletions DSSE/environment/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ class DroneSwarmSearch(DroneSwarmSearchBase):
}

reward_scheme = Reward(
default=0.1,
leave_grid=-200,
exceed_timestep=-200,
drones_collision=-200,
search_cell=1,
search_and_find=200,
default=0.0,
leave_grid=0,
exceed_timestep=0,
drones_collision=0,
search_cell=0,
search_and_find=1,
)

def __init__(
Expand All @@ -41,8 +41,9 @@ def __init__(
person_initial_position=(0, 0),
drone_amount=1,
drone_speed=10,
probability_of_detection=1,
probability_of_detection=1.0,
pre_render_time=0,
grid_cell_size=130,
):
if person_amount <= 0:
raise ValueError("The number of persons must be greater than 0.")
Expand All @@ -57,6 +58,7 @@ def __init__(
drone_amount=drone_amount,
drone_speed=drone_speed,
probability_of_detection=probability_of_detection,
grid_cell_size=grid_cell_size,
)

self.pre_render_steps = round(
Expand Down Expand Up @@ -241,8 +243,6 @@ def step(self, actions):
# Check truncation conditions (overwrites termination conditions)
if self.timestep >= self.timestep_limit:
rewards[agent] = self.reward_scheme.exceed_timestep
if self.rewards_sum[agent] > 0:
rewards[agent] += self.rewards_sum[agent] // 2
truncations[agent] = True
terminations[agent] = True
continue
Expand Down Expand Up @@ -288,11 +288,6 @@ def step(self, actions):
for agent in self.agents:
terminations[agent] = True
truncations[agent] = True
elif is_searching:
prob_matrix = self.probability_matrix.get_matrix()
rewards[agent] = (
prob_matrix[drone_y][drone_x]
)

self.rewards_sum[agent] += rewards[agent]

Expand All @@ -301,7 +296,8 @@ def step(self, actions):
infos = {drone: {"Found": person_found} for drone in self.agents}

# CHECK COLISION - Drone
self.compute_drone_collision(terminations, rewards)
# self.compute_drone_collision(terminations, rewards)


self.render_step(any(terminations.values()), person_found)

Expand Down
25 changes: 6 additions & 19 deletions DSSE/environment/env_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pettingzoo import ParallelEnv
from .entities.drone import DroneData
from .pygame_interface import PygameInterface
from .simulation.dynamic_probability import ProbabilityMatrix
from .constants import Actions
from gymnasium.spaces import MultiDiscrete, Discrete, Tuple, Box
from copy import copy
Expand All @@ -21,8 +20,9 @@ def __init__(
drone_amount=1,
drone_speed=10,
probability_of_detection=1,
grid_cell_size=130,
) -> None:
self.cell_size = 130 # in meters
self.cell_size = grid_cell_size # in meters
self.grid_size = grid_size
self._was_reset = False
if not isinstance(drone_amount, int):
Expand Down Expand Up @@ -60,7 +60,7 @@ def __init__(

# Initializing render
self.pygame_renderer = PygameInterface(
self.grid_size, render_gradient, render_grid
self.grid_size, render_gradient, render_grid, self.metadata["name"]
)

def calculate_simulation_time_step(
Expand Down Expand Up @@ -163,21 +163,6 @@ def create_observations(self):
def step(self, actions):
raise NotImplementedError("Method not implemented")

def compute_drone_collision(self, terminations, rewards):
"""
Check for drone collision and compute terminations, rewards and truncations.
"""
for drone_1_id in range(len(self.agents)):
for drone_2_id in range(drone_1_id + 1, len(self.agents)):
drone_1_name = self.agents[drone_1_id]
drone_2_name = self.agents[drone_2_id]
if self.agents_positions[drone_1_id] == self.agents_positions[drone_2_id]:
terminations[drone_1_name] = True
terminations[drone_2_name] = True
rewards[drone_1_name] = self.reward_scheme.drones_collision
rewards[drone_2_name] = self.reward_scheme.drones_collision


def move_drone(self, position, action):
"""
Returns a tuple with (is_terminal, new_position, reward)
Expand All @@ -199,6 +184,8 @@ def move_drone(self, position, action):
new_position = (position[0] - 1, position[1] + 1)
case Actions.DOWN_RIGHT.value: # DOWN_RIGHT
new_position = (position[0] + 1, position[1] + 1)
case _:
new_position = position

return new_position

Expand All @@ -217,7 +204,7 @@ def observation_space(self, agent):
low=0,
high=1,
shape=(self.grid_size, self.grid_size),
dtype=np.float32,
dtype=np.float64,
),
)
)
Expand Down
11 changes: 9 additions & 2 deletions DSSE/environment/pygame_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class PygameInterface:
FPS = 5

def __init__(
self, grid_size: int, render_gradient: bool, render_grid: bool
self, grid_size: int, render_gradient: bool, render_grid: bool, env_name: str
) -> None:
pygame.init()
self.grid_size = grid_size
Expand All @@ -23,6 +23,7 @@ def __init__(
self.screen = None
self.render_on = False
self.probability_matrix = None
self.env_name = env_name

self.block_size = self.window_size / self.grid_size
self.drone_img = None
Expand Down Expand Up @@ -126,7 +127,13 @@ def compute_cell_color(self, normalized_prob):
elif normalized_prob >= 0.25:
red = 255
green = 255
return (red, green, 0)

if self.env_name == "DroneSwarmSearchCPP":
blue = 255 if normalized_prob > 0 else 0
else:
blue = 0

return (red, green, blue)

def render_episode_end_screen(self, message: str, color: tuple):
font = pygame.font.SysFont(None, 50)
Expand Down
Loading

0 comments on commit cb8b3c2

Please sign in to comment.