From 1d8431b35b1d080d038342893ee9f946bedce445 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 08:48:07 -0300
Subject: [PATCH 01/10] Update paper.bib

Trying to fix citations
---
 paper.bib | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/paper.bib b/paper.bib
index d891425..b537cb1 100644
--- a/paper.bib
+++ b/paper.bib
@@ -18,13 +18,15 @@ @misc{who
   note         = {Accessed: 2024-04-24}
 }
 
-@manual{iamsar,
-  title        = {International Aeronautical and Maritime Search and Rescue Manual - Volume II - Mission Co-ordination},
+@inbook{iamsar,
+  title        = {International Aeronautical and Maritime Search and Rescue Manual - },
   organization = {International Maritime Organization and International Civil Aviation Organization},
+  publisher    = {International Maritime Organization},
   year         = {2022},
   volume       = {II},
   edition      = {9th},
   chapter      = {5},
+  isbn         = {9789280117356},
   url          = {https://store.icao.int/en/international-aeronautical-and-maritime-search-and-rescue-manual-volume-ii-mission-co-ordination-doc-9731-2}
 }
 
@@ -37,7 +39,7 @@ @article{terry2021pettingzoo
   year    = {2021}
 }
 
-@software{Terry_PettingZoo_Gym_for,
+@misc{Terry_PettingZoo_Gym_for,
   author  = {Terry, Jordan and Black, Benjamin and Grammel, Nathaniel and Jayakumar, Mario and Hari, Ananth and Sullivan, Ryan and Santos, Luis and Perez, Rodrigo and Horsch, Caroline and Dieffendahl, Clemens and Williams, Niall and Lokesh, Yashas},
   license = {MIT},
   title   = {{PettingZoo: Gym for multi-agent reinforcement learning}},
@@ -138,4 +140,4 @@ @article{WU2023113444
   author   = {Jie Wu and Liang Cheng and Sensen Chu},
   keywords = {Persons-in-water, Drift characteristics, Sea-area-scale models, Probability of +CWL, Search and rescue},
   abstract = {Drowning events are becoming more common. It is important to accurately predict the drift trajectory of floating objects. This study proposes a sea-area-scale drift modeling method to determine the drift trajectory of persons-in-water in offshore China. The proposed method divided offshore China into 18 areas, and field experiments were carried out in the northern East China Sea (N_ES) and the Taiwan Strait (TS). First, a series of models were created, namely N_ES_I (N_ES upright), N_ES_II (N_ES facedown), TS_I (TS upright), TS_II (TS facedown), C_I (comprehensive upright), and C_II (comprehensive facedown). The leeway coefficients and jibing frequencies were then determined, and the positive crosswind speed (+CWL) probabilities under different marine environments were analyzed. A large volume of information from open-source channels was used to establish a database of historical drift cases. The driven model was validated using a combination of field experiments and historical drift information. The results indicate that the trajectories simulated using the sea-area-scale models proposed in this study are more consistent with the actual trajectories than those using the constant coefficients. The prediction areas are sensitive to the +CWL probability and the jibing frequencies. This study can help to improve the ability of the drift trajectory in China.}
-}
\ No newline at end of file
+}

From 64b50bd2924f83c648de261693430dcdcc807fdd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 08:56:43 -0300
Subject: [PATCH 02/10] Update paper.bib

---
 paper.bib | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/paper.bib b/paper.bib
index b537cb1..6d86050 100644
--- a/paper.bib
+++ b/paper.bib
@@ -19,27 +19,29 @@ @misc{who
 }
 
 @inbook{iamsar,
-  title        = {International Aeronautical and Maritime Search and Rescue Manual - },
+  title        = {International Aeronautical and Maritime Search and Rescue Manual},
   organization = {International Maritime Organization and International Civil Aviation Organization},
   publisher    = {International Maritime Organization},
   year         = {2022},
   volume       = {II},
-  edition      = {9th},
   chapter      = {5},
   isbn         = {9789280117356},
   url          = {https://store.icao.int/en/international-aeronautical-and-maritime-search-and-rescue-manual-volume-ii-mission-co-ordination-doc-9731-2}
 }
 
-@article{terry2021pettingzoo,
-  title   = {Pettingzoo: Gym for multi-agent reinforcement learning},
-  author  = {Terry, J and Black, Benjamin and Grammel, Nathaniel and Jayakumar, Mario and Hari, Ananth and Sullivan, Ryan and Santos, Luis S and Dieffendahl, Clemens and Horsch, Caroline and Perez-Vicente, Rodrigo and others},
-  journal = {Advances in Neural Information Processing Systems},
-  volume  = {34},
-  pages   = {15032--15043},
-  year    = {2021}
+@inproceedings{terry2021pettingzoo,
+ author      = {Terry, J and Black, Benjamin and Grammel, Nathaniel and Jayakumar, Mario  and Hari, Ananth  and Sullivan, Ryan and Santos, Luis S and Dieffendahl, Clemens and Horsch, Caroline and Perez-Vicente, Rodrigo and Williams, Niall  and Lokesh, Yashas  and Ravi , Praveen },
+ booktitle   = {Advances in Neural Information Processing Systems},
+ editor      = {M. Ranzato and A. Beygelzimer and Y. Dauphin and P.S. Liang and J. Wortman Vaughan},
+ pages       = {15032--15043},
+ publisher   = {Curran Associates, Inc.},
+ title       = {PettingZoo: Gym for Multi-Agent Reinforcement Learning},
+ url         = {https://proceedings.neurips.cc/paper_files/paper/2021/file/7ed2d3454c5eea71148b11d0c25104ff-Paper.pdf},
+ volume      = {34},
+ year        = {2021}
 }
 
-@misc{Terry_PettingZoo_Gym_for,
+@software{Terry_PettingZoo_Gym_for,
   author  = {Terry, Jordan and Black, Benjamin and Grammel, Nathaniel and Jayakumar, Mario and Hari, Ananth and Sullivan, Ryan and Santos, Luis and Perez, Rodrigo and Horsch, Caroline and Dieffendahl, Clemens and Williams, Niall and Lokesh, Yashas},
   license = {MIT},
   title   = {{PettingZoo: Gym for multi-agent reinforcement learning}},

From 3f0aabd77cfd7f5631eda0ad2e53aa9cb953bed4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 09:05:24 -0300
Subject: [PATCH 03/10] Update paper.md

Fix figure sourcing
---
 paper.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paper.md b/paper.md
index 319d36a..a2b1387 100644
--- a/paper.md
+++ b/paper.md
@@ -56,7 +56,7 @@ To achieve this objective, traditional SAR operations used path planning algorit
 
 In order to contribute to researches on the effectiveness of integrating reinforcement learning techniques into SAR path planning, the Drone Swarm Search Environment (`DSSE`), distributed as a Python package, was designed to provide a training environment using the PettingZoo [@terry2021pettingzoo] interface. Its purpose is to facilitate the training and evaluation of single or multi-agent reinforcement learning algorithms. Additionally, it has been included as a third-party environment in the official PettingZoo documentation [@Terry_PettingZoo_Gym_for].
 
-![Simulation environment showcasing the algorithm's execution.\label{fig:example}](docs/pics/dsse-example.png){ width=50% }
+![Simulation environment showcasing the algorithm's execution.\label{fig:example}](docs/public/pics/dsse-example.png){ width=50% }
 
 The environment depicted in \autoref{fig:example} comprises a grid, a probability matrix, drones, and an arbitrary number of persons-in-water (PIW). The movement of the PIW is influenced by, but not identical to, the dynamics of the probability matrix, which models the drift of sea currents impacting the PIW [@WU2023113444]. The probability matrix itself is defined using a two-dimensional Gaussian distribution, which expands over time, thus broadening the potential search area. This expansion simulates the diffusion of the PIW, approximating the zone where drones are most likely to detect them. Moreover, the environment employs a reward function that incentivizes the speed of the search, rewarding the agents for shorter successful search durations.
 

From 1aa315cb1c24b62829320328620955f9978288ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 09:07:32 -0300
Subject: [PATCH 04/10] Update paper.bib

---
 paper.bib | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paper.bib b/paper.bib
index 6d86050..b0cb303 100644
--- a/paper.bib
+++ b/paper.bib
@@ -19,9 +19,9 @@ @misc{who
 }
 
 @inbook{iamsar,
-  title        = {International Aeronautical and Maritime Search and Rescue Manual},
+  title        = {Chapter 5. Search techniques and operations},
+  booktitle    = {International Aeronautical and Maritime Search and Rescue Manual},
   organization = {International Maritime Organization and International Civil Aviation Organization},
-  publisher    = {International Maritime Organization},
   year         = {2022},
   volume       = {II},
   chapter      = {5},

From 96b52d65b03d13123d28b1588362885d4f7bb26e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 10:25:42 -0300
Subject: [PATCH 05/10] Update paper.bib

---
 paper.bib | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/paper.bib b/paper.bib
index b0cb303..40adcbe 100644
--- a/paper.bib
+++ b/paper.bib
@@ -77,6 +77,20 @@ @article{WU2024116403
   abstract = {The prevalence of maritime transportation and operations is increasing, leading to a gradual increase in drowning accidents at sea. In the context of maritime search and rescue (SAR), it is essential to develop effective search plans to improve the survival probability of persons-in-water (PIWs). However, conventional SAR search plans typically use predetermined patterns to ensure complete coverage of the search area, disregarding the varying probabilities associated with the PIW distribution. To address this issue, this study has proposed a maritime SAR vessel coverage path planning framework (SARCPPF) suitable for multiple PIWs. This framework comprises three modules, namely, drift trajectory prediction, the establishment of a multilevel search area environment model, and coverage search. First, sea area-scale drift trajectory prediction models were employed using the random particle simulation method to forecast drift trajectories. A hierarchical probability environment map model was established to guide the SAR of multiple SAR units. Subsequently, we integrated deep reinforcement learning with a reward function that encompasses multiple variables to guide the navigation behavior of ship agents. We developed a coverage path planning algorithm aimed at maximizing the success rates within a limited timeframe. The experimental results have demonstrated that our model enables vessel agents to prioritize high-probability regions while avoiding repeated coverage.}
 }
 
+@article{SILVER2021103535,
+    title          = {Reward is enough},
+    journal        = {Artificial Intelligence},
+    volume         = {299},
+    pages          = {103535},
+    year           = {2021},
+    issn           = {0004-3702},
+    doi            = {https://doi.org/10.1016/j.artint.2021.103535},
+    url            = {https://www.sciencedirect.com/science/article/pii/S0004370221000862},
+    author         = {David Silver and Satinder Singh and Doina Precup and Richard S. Sutton},
+    keywords       = {Artificial intelligence, Artificial general intelligence, Reinforcement learning, Reward},
+    abstract       = {In this article we hypothesise that intelligence, and its associated abilities, can be understood as subserving the maximisation of reward. Accordingly, reward is enough to drive behaviour that exhibits abilities studied in natural and artificial intelligence, including knowledge, learning, perception, social intelligence, language, generalisation and imitation. This is in contrast to the view that specialised problem formulations are needed for each ability, based on other signals or objectives. Furthermore, we suggest that agents that learn through trial and error experience to maximise reward could learn behaviour that exhibits most if not all of these abilities, and therefore that powerful reinforcement learning agents could constitute a solution to artificial general intelligence.}
+}
+
 @article{gmd-11-1405-2018,
   author  = {Dagestad, K.-F. and R\"ohrs, J. and Breivik, {\O}. and {\AA}dlandsvik, B.},
   title   = {OpenDrift v1.0: a generic framework for trajectory modelling},

From bea538b72b33bf529b32635d914bea356c7335df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 10:46:32 -0300
Subject: [PATCH 06/10] Update paper.bib

---
 paper.bib | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/paper.bib b/paper.bib
index 40adcbe..e25d1fc 100644
--- a/paper.bib
+++ b/paper.bib
@@ -29,6 +29,17 @@ @inbook{iamsar
   url          = {https://store.icao.int/en/international-aeronautical-and-maritime-search-and-rescue-manual-volume-ii-mission-co-ordination-doc-9731-2}
 }
 
+@article{trummel1986,
+  title={The complexity of the optimal searcher path problem},
+  author={Trummel, KE and Weisinger, JR},
+  journal={Operations Research},
+  volume={34},
+  number={2},
+  pages={324--327},
+  year={1986},
+  publisher={INFORMS}
+}
+
 @inproceedings{terry2021pettingzoo,
  author      = {Terry, J and Black, Benjamin and Grammel, Nathaniel and Jayakumar, Mario  and Hari, Ananth  and Sullivan, Ryan and Santos, Luis S and Dieffendahl, Clemens and Horsch, Caroline and Perez-Vicente, Rodrigo and Williams, Niall  and Lokesh, Yashas  and Ravi , Praveen },
  booktitle   = {Advances in Neural Information Processing Systems},

From 2bcc6feb537a7582f766acd338346118f15e12ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 11:27:30 -0300
Subject: [PATCH 07/10] Update paper.md

---
 paper.md | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/paper.md b/paper.md
index a2b1387..88f56ff 100644
--- a/paper.md
+++ b/paper.md
@@ -46,15 +46,23 @@ bibliography: paper.bib
 
 # Summary
 
-The goal of this project is to contribute to the research of solutions that employ reinforcement learning techniques to maritime search and rescue missions of shipwrecked people. The software equip's researchers with a simulation of shipwrecked people casted away according to maritime currents to producing a stochastic environment to be used to train and evaluate autonomous agents.
+The goal of this project is to advance research in maritime search and rescue missions using Reinforcement Learning techniques. The software provides researchers with two distinct environments: one simulates shipwrecked people drifting with maritime currents, creating a stochastic setting for training and evaluating autonomous agents; the other features a realistic particle simulation for mapping and optimizing search area coverage by autonomous agents.
+
+Both environments adhere to open-source standards and offer extensive customization options, allowing users to tailor them to specific research needs. These tools enable Reinforcement Learning agents to learn efficient policies for locating shipwrecked individuals or maximizing search area coverage, thereby enhancing the effectiveness of maritime rescue operations.
 
 # Statement of need
 
 Maritime navigation plays a crucial role across various domains, including leisure activities and commercial fishing. However, maritime transportation is particularly significant as it accounts for 80% to 90% of global trade [@allianz]. While maritime navigation is essential for global trade, it also poses significant safety risks, as evidenced by the World Health Organization's report [@who] of approximately 236,000 annual drowning deaths worldwide. Therefore, maritime safety is essential, demanding significant enhancements in search and rescue (SAR) missions. It is imperative that SAR missions minimize the search area and maximize the chances of locating the search object.
 
-To achieve this objective, traditional SAR operations used path planning algorithms such as parallel sweep, expanding square, and sector searches [@iamsar]. But recent researches propose a different approach to this problem using reinforcement learning techniques over pre-determined search patterns [@AI2021110098; @WU2024116403].
+To achieve this objective, traditional SAR operations have utilized path planning algorithms such as parallel sweep, expanding square, and sector searches [@iamsar]. However, these methods have not been optimal. Trummel and Weisinger [@trummel1986] demonstrated that finding an optimal search path, where the agent must search all sub-areas using the shortest possible path, is NP-complete. Recent research, however, proposes a different approach using Reinforcement Learning (RL) algorithms instead of pre-determined search patterns [@AI2021110098; @WU2024116403]. This is based on the belief that RL can develop new, more efficient search patterns tailored to specific applications. The hypothesis is that maximizing reward fosters generalization abilities, thereby creating powerful agents [@SILVER2021103535]. Such advancements could potentially save more lives.
+
+The two primary metrics for evaluating an efficient search are Coverage Rate and Time to Detection. Coverage Rate is the proportion of the search area covered by the search units over a specific period. Higher coverage rates typically indicate more effective search strategies. Time to Detection is the time taken from the start of the search operation to the successful detection of the target. Minimizing this time is often a critical objective in SAR missions.
+
+Expanding on the state-of-the-art research presented by [@AI2021110098; @WU2024116403], this project introduces a unique simulation environment that has not been made available by other researchers. Additionally, this new environment enables experiments on search areas that are significantly larger than those used in existing research.
+
+# Functonality
 
-In order to contribute to researches on the effectiveness of integrating reinforcement learning techniques into SAR path planning, the Drone Swarm Search Environment (`DSSE`), distributed as a Python package, was designed to provide a training environment using the PettingZoo [@terry2021pettingzoo] interface. Its purpose is to facilitate the training and evaluation of single or multi-agent reinforcement learning algorithms. Additionally, it has been included as a third-party environment in the official PettingZoo documentation [@Terry_PettingZoo_Gym_for].
+In order to contribute to research on the effectiveness of integrating RL techniques into SAR path planning, the Drone Swarm Search Environment (`DSSE`), distributed as a Python package, was designed to provide a training environment using the PettingZoo [@terry2021pettingzoo] interface. Its purpose is to facilitate the training and evaluation of single or multi-agent RL algorithms. Additionally, it has been included as a third-party environment in the official PettingZoo documentation [@Terry_PettingZoo_Gym_for].
 
 ![Simulation environment showcasing the algorithm's execution.\label{fig:example}](docs/public/pics/dsse-example.png){ width=50% }
 
@@ -78,10 +86,10 @@ while not done:
 
 The package also includes a second environment option. Similar to the first, this alternative setup is designed for training agents, but with key differences in its objectives and mechanics. Unlike the first environment, which rewards agents for speed in their searches, this second option rewards agents that cover the largest area without repetition. It incorporates a trade-off by using a stationary probability matrix, but enhances the simulation with a more advanced Lagrangian particle model [@gmd-11-1405-2018] for pinpointing the PIW's position. Moreover, this environment omits the inclusion of shipwrecked individuals, focusing instead on promoting research into how agents can learn to efficiently expand their search coverage over broader areas.
 
-The grid is divided into square cells, each representing a quadrant with sides measuring 130 meters in the real world. This correlation with real-world dimensions is crucial for developing agents capable of learning from realistic motion patterns. The drones, which are controlled by reinforcement learning algorithms, serve as these agents. During the environment's instantiation, users define the drones' nominal speeds. These drones can move both orthogonally and diagonally across the grid, and they are equipped to search each cell for the presence of the PIW.
+The grid is divided into square cells, each representing a quadrant with sides measuring 130 meters in the real world. This correlation with real-world dimensions is crucial for developing agents capable of learning from realistic motion patterns. The drones, which are controlled by RL algorithms, serve as these agents. During the environment's instantiation, users define the drones' nominal speeds. These drones can move both orthogonally and diagonally across the grid, and they are equipped to search each cell for the presence of the PIW.
 
 Several works have been developed over the past few years to define better algorithms for the search and rescue of shipwrecks [@AI2021110098; @WU2024116403]. However, no environment for agent training is made available publicly. For this reason, the development and provision of this environment as a Python library and open-source project are expected to have significant relevance to the machine learning community and ocean safety.
 
-This new library makes it possible to implement and evaluate new reinforcement learning algorithms, such as Deep Q-Networks (DQN) [@dqn2015] and Proximal Policy Optimization (PPO) [@ppo2017], with little effort. An earlier iteration of this software was utilized in research that compared the Reinforce algorithm with the parallel sweep path planning algorithm [@dsse2023].
+This new library makes it possible to implement and evaluate new RL algorithms, such as Deep Q-Networks (DQN) [@dqn2015] and Proximal Policy Optimization (PPO) [@ppo2017], with little effort. An earlier iteration of this software was utilized in research that compared the Reinforce algorithm with the parallel sweep path planning algorithm [@dsse2023].
 
 # References

From 3bb1a9d4dfe11581f92bacd4769480aab158da3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 11:45:57 -0300
Subject: [PATCH 08/10] Update paper.bib

---
 paper.bib | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/paper.bib b/paper.bib
index e25d1fc..55a6b0f 100644
--- a/paper.bib
+++ b/paper.bib
@@ -124,6 +124,12 @@ @conference{dsse2023
   issn         = {1983-7402}
 }
 
+@unpublished{algorithmsDSSE2024,
+  author  = {Ricardo Ribeiro Rodrigues and Jorás Custódio Campos de Oliveira and Pedro Henrique Britto Aragão Andrade and Renato Laffranchi Falcão},
+  title   = {Algorithms for Drone Swarm Search (DSSE)},
+  url     = {https://github.com/pfeinsper/drone-swarm-search-algorithms}
+}
+
 @article{dqn2015,
   title    = {Human-level control through deep reinforcement learning},
   author   = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and

From 5a8a0f305ce40f1a2d03f976245b5868bbdbbece Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 11:46:05 -0300
Subject: [PATCH 09/10] Update paper.md

---
 paper.md | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/paper.md b/paper.md
index 88f56ff..be0e0b8 100644
--- a/paper.md
+++ b/paper.md
@@ -54,9 +54,9 @@ Both environments adhere to open-source standards and offer extensive customizat
 
 Maritime navigation plays a crucial role across various domains, including leisure activities and commercial fishing. However, maritime transportation is particularly significant as it accounts for 80% to 90% of global trade [@allianz]. While maritime navigation is essential for global trade, it also poses significant safety risks, as evidenced by the World Health Organization's report [@who] of approximately 236,000 annual drowning deaths worldwide. Therefore, maritime safety is essential, demanding significant enhancements in search and rescue (SAR) missions. It is imperative that SAR missions minimize the search area and maximize the chances of locating the search object.
 
-To achieve this objective, traditional SAR operations have utilized path planning algorithms such as parallel sweep, expanding square, and sector searches [@iamsar]. However, these methods have not been optimal. Trummel and Weisinger [@trummel1986] demonstrated that finding an optimal search path, where the agent must search all sub-areas using the shortest possible path, is NP-complete. Recent research, however, proposes a different approach using Reinforcement Learning (RL) algorithms instead of pre-determined search patterns [@AI2021110098; @WU2024116403]. This is based on the belief that RL can develop new, more efficient search patterns tailored to specific applications. The hypothesis is that maximizing reward fosters generalization abilities, thereby creating powerful agents [@SILVER2021103535]. Such advancements could potentially save more lives.
+To achieve this objective, traditional SAR operations have utilized path planning algorithms such as parallel sweep, expanding square, and sector searches [@iamsar]. However, these methods have not been optimal. [@trummel1986] demonstrated that finding an optimal search path, where the agent must search all sub-areas using the shortest possible path, is NP-complete. Recent research, however, proposes a different approach using Reinforcement Learning (RL) algorithms instead of pre-determined search patterns [@AI2021110098; @WU2024116403]. This is based on the belief that RL can develop new, more efficient search patterns tailored to specific applications. The hypothesis is that maximizing reward fosters generalization abilities, thereby creating powerful agents [@SILVER2021103535]. Such advancements could potentially save more lives.
 
-The two primary metrics for evaluating an efficient search are Coverage Rate and Time to Detection. Coverage Rate is the proportion of the search area covered by the search units over a specific period. Higher coverage rates typically indicate more effective search strategies. Time to Detection is the time taken from the start of the search operation to the successful detection of the target. Minimizing this time is often a critical objective in SAR missions.
+The two primary metrics for evaluating an efficient search are coverage rate and time to detection. Coverage rate is the proportion of the search area covered by the search units over a specific period. Higher coverage rates typically indicate more effective search strategies. Time to detection is the time taken from the start of the search operation to the successful detection of the target. Minimizing this time is often a critical objective in SAR missions.
 
 Expanding on the state-of-the-art research presented by [@AI2021110098; @WU2024116403], this project introduces a unique simulation environment that has not been made available by other researchers. Additionally, this new environment enables experiments on search areas that are significantly larger than those used in existing research.
 
@@ -68,6 +68,8 @@ In order to contribute to research on the effectiveness of integrating RL techni
 
 The environment depicted in \autoref{fig:example} comprises a grid, a probability matrix, drones, and an arbitrary number of persons-in-water (PIW). The movement of the PIW is influenced by, but not identical to, the dynamics of the probability matrix, which models the drift of sea currents impacting the PIW [@WU2023113444]. The probability matrix itself is defined using a two-dimensional Gaussian distribution, which expands over time, thus broadening the potential search area. This expansion simulates the diffusion of the PIW, approximating the zone where drones are most likely to detect them. Moreover, the environment employs a reward function that incentivizes the speed of the search, rewarding the agents for shorter successful search durations.
 
+The package also includes a second environment option. Similar to the first, this alternative setup is designed for training agents, but with key differences in its objectives and mechanics. Unlike the first environment, which rewards agents for speed in their searches, this second option rewards agents that cover the largest area without repetition. It incorporates a trade-off by using a stationary probability matrix, but enhances the simulation with a more advanced Lagrangian particle model [@gmd-11-1405-2018] for pinpointing the PIW's position. Moreover, this environment omits the inclusion of shipwrecked individuals, focusing instead on promoting research into how agents can learn to efficiently expand their search coverage over broader areas.
+
 Using this environment, any researcher or practitioner can write code and execute an agent's training, such as the source code presented below.
 
 ```python
@@ -76,20 +78,19 @@ from DSSE import DroneSwarmSearch
 env = DroneSwarmSearch()
 
 observations, info = env.reset()
+
 rewards = 0
 done = False
 while not done:
-      actions = policy(observations, env.get_agents())
-      observations, rewards, terminations, truncations, infos = env.step(actions) 
-      done = any(terminations.values()) or any(truncations.values())
+    actions = policy(observations, env.get_agents())
+    observations, rewards, terminations, truncations, infos = env.step(actions)
+    done = any(terminations.values()) or any(truncations.values())
 ```
 
-The package also includes a second environment option. Similar to the first, this alternative setup is designed for training agents, but with key differences in its objectives and mechanics. Unlike the first environment, which rewards agents for speed in their searches, this second option rewards agents that cover the largest area without repetition. It incorporates a trade-off by using a stationary probability matrix, but enhances the simulation with a more advanced Lagrangian particle model [@gmd-11-1405-2018] for pinpointing the PIW's position. Moreover, this environment omits the inclusion of shipwrecked individuals, focusing instead on promoting research into how agents can learn to efficiently expand their search coverage over broader areas.
-
 The grid is divided into square cells, each representing a quadrant with sides measuring 130 meters in the real world. This correlation with real-world dimensions is crucial for developing agents capable of learning from realistic motion patterns. The drones, which are controlled by RL algorithms, serve as these agents. During the environment's instantiation, users define the drones' nominal speeds. These drones can move both orthogonally and diagonally across the grid, and they are equipped to search each cell for the presence of the PIW.
 
 Several works have been developed over the past few years to define better algorithms for the search and rescue of shipwrecks [@AI2021110098; @WU2024116403]. However, no environment for agent training is made available publicly. For this reason, the development and provision of this environment as a Python library and open-source project are expected to have significant relevance to the machine learning community and ocean safety.
 
-This new library makes it possible to implement and evaluate new RL algorithms, such as Deep Q-Networks (DQN) [@dqn2015] and Proximal Policy Optimization (PPO) [@ppo2017], with little effort. An earlier iteration of this software was utilized in research that compared the Reinforce algorithm with the parallel sweep path planning algorithm [@dsse2023].
+This new library makes it possible to implement and evaluate new RL algorithms, such as Deep Q-Networks (DQN) [@dqn2015] and Proximal Policy Optimization (PPO) [@ppo2017], with little effort. Additionally, several state-of-the-art RL algorithms have already been implemented and are available [@algorithmsDSSE2024]. An earlier iteration of this software was utilized in research that compared the Reinforce algorithm with the parallel sweep path planning algorithm [@dsse2023].
 
 # References

From 534e6b46c7fb48027f08da97c7ebd78ff14711e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Renato=20Laffranchi=20Falc=C3=A3o?=
 <66042074+renatex333@users.noreply.github.com>
Date: Mon, 3 Jun 2024 11:49:20 -0300
Subject: [PATCH 10/10] Update paper.bib

---
 paper.bib | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paper.bib b/paper.bib
index 55a6b0f..1c512b2 100644
--- a/paper.bib
+++ b/paper.bib
@@ -127,6 +127,7 @@ @conference{dsse2023
 @unpublished{algorithmsDSSE2024,
   author  = {Ricardo Ribeiro Rodrigues and Jorás Custódio Campos de Oliveira and Pedro Henrique Britto Aragão Andrade and Renato Laffranchi Falcão},
   title   = {Algorithms for Drone Swarm Search (DSSE)},
+  year    = {2024},
   url     = {https://github.com/pfeinsper/drone-swarm-search-algorithms}
 }