Merge pull request #261 from pfeinsper/paper-writing

Addressing paper feedbacks on JOSS Review
pfeinsper · Jun 11, 2024 · 6b78af8 · 6b78af8
2 parents f7d3eb6 + 534e6b4
commit 6b78af8
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 22 deletions.
diff --git a/paper.bib b/paper.bib
@@ -18,23 +18,38 @@ @misc{who
   note         = {Accessed: 2024-04-24}
 }
 
-@manual{iamsar,
-  title        = {International Aeronautical and Maritime Search and Rescue Manual - Volume II - Mission Co-ordination},
+@inbook{iamsar,
+  title        = {Chapter 5. Search techniques and operations},
+  booktitle    = {International Aeronautical and Maritime Search and Rescue Manual},
   organization = {International Maritime Organization and International Civil Aviation Organization},
   year         = {2022},
   volume       = {II},
-  edition      = {9th},
   chapter      = {5},
+  isbn         = {9789280117356},
   url          = {https://store.icao.int/en/international-aeronautical-and-maritime-search-and-rescue-manual-volume-ii-mission-co-ordination-doc-9731-2}
 }
 
-@article{terry2021pettingzoo,
-  title   = {Pettingzoo: Gym for multi-agent reinforcement learning},
-  author  = {Terry, J and Black, Benjamin and Grammel, Nathaniel and Jayakumar, Mario and Hari, Ananth and Sullivan, Ryan and Santos, Luis S and Dieffendahl, Clemens and Horsch, Caroline and Perez-Vicente, Rodrigo and others},
-  journal = {Advances in Neural Information Processing Systems},
-  volume  = {34},
-  pages   = {15032--15043},
-  year    = {2021}
+@article{trummel1986,
+  title={The complexity of the optimal searcher path problem},
+  author={Trummel, KE and Weisinger, JR},
+  journal={Operations Research},
+  volume={34},
+  number={2},
+  pages={324--327},
+  year={1986},
+  publisher={INFORMS}
+}
+
+@inproceedings{terry2021pettingzoo,
+ author      = {Terry, J and Black, Benjamin and Grammel, Nathaniel and Jayakumar, Mario  and Hari, Ananth  and Sullivan, Ryan and Santos, Luis S and Dieffendahl, Clemens and Horsch, Caroline and Perez-Vicente, Rodrigo and Williams, Niall  and Lokesh, Yashas  and Ravi , Praveen },
+ booktitle   = {Advances in Neural Information Processing Systems},
+ editor      = {M. Ranzato and A. Beygelzimer and Y. Dauphin and P.S. Liang and J. Wortman Vaughan},
+ pages       = {15032--15043},
+ publisher   = {Curran Associates, Inc.},
+ title       = {PettingZoo: Gym for Multi-Agent Reinforcement Learning},
+ url         = {https://proceedings.neurips.cc/paper_files/paper/2021/file/7ed2d3454c5eea71148b11d0c25104ff-Paper.pdf},
+ volume      = {34},
+ year        = {2021}
 }
 
 @software{Terry_PettingZoo_Gym_for,
@@ -73,6 +88,20 @@ @article{WU2024116403
   abstract = {The prevalence of maritime transportation and operations is increasing, leading to a gradual increase in drowning accidents at sea. In the context of maritime search and rescue (SAR), it is essential to develop effective search plans to improve the survival probability of persons-in-water (PIWs). However, conventional SAR search plans typically use predetermined patterns to ensure complete coverage of the search area, disregarding the varying probabilities associated with the PIW distribution. To address this issue, this study has proposed a maritime SAR vessel coverage path planning framework (SARCPPF) suitable for multiple PIWs. This framework comprises three modules, namely, drift trajectory prediction, the establishment of a multilevel search area environment model, and coverage search. First, sea area-scale drift trajectory prediction models were employed using the random particle simulation method to forecast drift trajectories. A hierarchical probability environment map model was established to guide the SAR of multiple SAR units. Subsequently, we integrated deep reinforcement learning with a reward function that encompasses multiple variables to guide the navigation behavior of ship agents. We developed a coverage path planning algorithm aimed at maximizing the success rates within a limited timeframe. The experimental results have demonstrated that our model enables vessel agents to prioritize high-probability regions while avoiding repeated coverage.}
 }
 
+@article{SILVER2021103535,
+    title          = {Reward is enough},
+    journal        = {Artificial Intelligence},
+    volume         = {299},
+    pages          = {103535},
+    year           = {2021},
+    issn           = {0004-3702},
+    doi            = {https://doi.org/10.1016/j.artint.2021.103535},
+    url            = {https://www.sciencedirect.com/science/article/pii/S0004370221000862},
+    author         = {David Silver and Satinder Singh and Doina Precup and Richard S. Sutton},
+    keywords       = {Artificial intelligence, Artificial general intelligence, Reinforcement learning, Reward},
+    abstract       = {In this article we hypothesise that intelligence, and its associated abilities, can be understood as subserving the maximisation of reward. Accordingly, reward is enough to drive behaviour that exhibits abilities studied in natural and artificial intelligence, including knowledge, learning, perception, social intelligence, language, generalisation and imitation. This is in contrast to the view that specialised problem formulations are needed for each ability, based on other signals or objectives. Furthermore, we suggest that agents that learn through trial and error experience to maximise reward could learn behaviour that exhibits most if not all of these abilities, and therefore that powerful reinforcement learning agents could constitute a solution to artificial general intelligence.}
+}
+
 @article{gmd-11-1405-2018,
   author  = {Dagestad, K.-F. and R\"ohrs, J. and Breivik, {\O}. and {\AA}dlandsvik, B.},
   title   = {OpenDrift v1.0: a generic framework for trajectory modelling},
@@ -95,6 +124,13 @@ @conference{dsse2023
   issn         = {1983-7402}
 }
 
+@unpublished{algorithmsDSSE2024,
+  author  = {Ricardo Ribeiro Rodrigues and Jorás Custódio Campos de Oliveira and Pedro Henrique Britto Aragão Andrade and Renato Laffranchi Falcão},
+  title   = {Algorithms for Drone Swarm Search (DSSE)},
+  year    = {2024},
+  url     = {https://github.com/pfeinsper/drone-swarm-search-algorithms}
+}
+
 @article{dqn2015,
   title    = {Human-level control through deep reinforcement learning},
   author   = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and
@@ -138,4 +174,4 @@ @article{WU2023113444
   author   = {Jie Wu and Liang Cheng and Sensen Chu},
   keywords = {Persons-in-water, Drift characteristics, Sea-area-scale models, Probability of +CWL, Search and rescue},
   abstract = {Drowning events are becoming more common. It is important to accurately predict the drift trajectory of floating objects. This study proposes a sea-area-scale drift modeling method to determine the drift trajectory of persons-in-water in offshore China. The proposed method divided offshore China into 18 areas, and field experiments were carried out in the northern East China Sea (N_ES) and the Taiwan Strait (TS). First, a series of models were created, namely N_ES_I (N_ES upright), N_ES_II (N_ES facedown), TS_I (TS upright), TS_II (TS facedown), C_I (comprehensive upright), and C_II (comprehensive facedown). The leeway coefficients and jibing frequencies were then determined, and the positive crosswind speed (+CWL) probabilities under different marine environments were analyzed. A large volume of information from open-source channels was used to establish a database of historical drift cases. The driven model was validated using a combination of field experiments and historical drift information. The results indicate that the trajectories simulated using the sea-area-scale models proposed in this study are more consistent with the actual trajectories than those using the constant coefficients. The prediction areas are sensitive to the +CWL probability and the jibing frequencies. This study can help to improve the ability of the drift trajectory in China.}
-}
+}
diff --git a/paper.md b/paper.md
@@ -46,20 +46,30 @@ bibliography: paper.bib
 
 # Summary
 
-The goal of this project is to contribute to the research of solutions that employ reinforcement learning techniques to maritime search and rescue missions of shipwrecked people. The software equip's researchers with a simulation of shipwrecked people casted away according to maritime currents to producing a stochastic environment to be used to train and evaluate autonomous agents.
+The goal of this project is to advance research in maritime search and rescue missions using Reinforcement Learning techniques. The software provides researchers with two distinct environments: one simulates shipwrecked people drifting with maritime currents, creating a stochastic setting for training and evaluating autonomous agents; the other features a realistic particle simulation for mapping and optimizing search area coverage by autonomous agents.
+
+Both environments adhere to open-source standards and offer extensive customization options, allowing users to tailor them to specific research needs. These tools enable Reinforcement Learning agents to learn efficient policies for locating shipwrecked individuals or maximizing search area coverage, thereby enhancing the effectiveness of maritime rescue operations.
 
 # Statement of need
 
 Maritime navigation plays a crucial role across various domains, including leisure activities and commercial fishing. However, maritime transportation is particularly significant as it accounts for 80% to 90% of global trade [@allianz]. While maritime navigation is essential for global trade, it also poses significant safety risks, as evidenced by the World Health Organization's report [@who] of approximately 236,000 annual drowning deaths worldwide. Therefore, maritime safety is essential, demanding significant enhancements in search and rescue (SAR) missions. It is imperative that SAR missions minimize the search area and maximize the chances of locating the search object.
 
-To achieve this objective, traditional SAR operations used path planning algorithms such as parallel sweep, expanding square, and sector searches [@iamsar]. But recent researches propose a different approach to this problem using reinforcement learning techniques over pre-determined search patterns [@AI2021110098; @WU2024116403].
+To achieve this objective, traditional SAR operations have utilized path planning algorithms such as parallel sweep, expanding square, and sector searches [@iamsar]. However, these methods have not been optimal. [@trummel1986] demonstrated that finding an optimal search path, where the agent must search all sub-areas using the shortest possible path, is NP-complete. Recent research, however, proposes a different approach using Reinforcement Learning (RL) algorithms instead of pre-determined search patterns [@AI2021110098; @WU2024116403]. This is based on the belief that RL can develop new, more efficient search patterns tailored to specific applications. The hypothesis is that maximizing reward fosters generalization abilities, thereby creating powerful agents [@SILVER2021103535]. Such advancements could potentially save more lives.
+
+The two primary metrics for evaluating an efficient search are coverage rate and time to detection. Coverage rate is the proportion of the search area covered by the search units over a specific period. Higher coverage rates typically indicate more effective search strategies. Time to detection is the time taken from the start of the search operation to the successful detection of the target. Minimizing this time is often a critical objective in SAR missions.
+
+Expanding on the state-of-the-art research presented by [@AI2021110098; @WU2024116403], this project introduces a unique simulation environment that has not been made available by other researchers. Additionally, this new environment enables experiments on search areas that are significantly larger than those used in existing research.
 
-In order to contribute to researches on the effectiveness of integrating reinforcement learning techniques into SAR path planning, the Drone Swarm Search Environment (`DSSE`), distributed as a Python package, was designed to provide a training environment using the PettingZoo [@terry2021pettingzoo] interface. Its purpose is to facilitate the training and evaluation of single or multi-agent reinforcement learning algorithms. Additionally, it has been included as a third-party environment in the official PettingZoo documentation [@Terry_PettingZoo_Gym_for].
+# Functonality
 
-![Simulation environment showcasing the algorithm's execution.\label{fig:example}](docs/pics/dsse-example.png){ width=50% }
+In order to contribute to research on the effectiveness of integrating RL techniques into SAR path planning, the Drone Swarm Search Environment (`DSSE`), distributed as a Python package, was designed to provide a training environment using the PettingZoo [@terry2021pettingzoo] interface. Its purpose is to facilitate the training and evaluation of single or multi-agent RL algorithms. Additionally, it has been included as a third-party environment in the official PettingZoo documentation [@Terry_PettingZoo_Gym_for].
+
+![Simulation environment showcasing the algorithm's execution.\label{fig:example}](docs/public/pics/dsse-example.png){ width=50% }
 
 The environment depicted in \autoref{fig:example} comprises a grid, a probability matrix, drones, and an arbitrary number of persons-in-water (PIW). The movement of the PIW is influenced by, but not identical to, the dynamics of the probability matrix, which models the drift of sea currents impacting the PIW [@WU2023113444]. The probability matrix itself is defined using a two-dimensional Gaussian distribution, which expands over time, thus broadening the potential search area. This expansion simulates the diffusion of the PIW, approximating the zone where drones are most likely to detect them. Moreover, the environment employs a reward function that incentivizes the speed of the search, rewarding the agents for shorter successful search durations.
 
+The package also includes a second environment option. Similar to the first, this alternative setup is designed for training agents, but with key differences in its objectives and mechanics. Unlike the first environment, which rewards agents for speed in their searches, this second option rewards agents that cover the largest area without repetition. It incorporates a trade-off by using a stationary probability matrix, but enhances the simulation with a more advanced Lagrangian particle model [@gmd-11-1405-2018] for pinpointing the PIW's position. Moreover, this environment omits the inclusion of shipwrecked individuals, focusing instead on promoting research into how agents can learn to efficiently expand their search coverage over broader areas.
+
 Using this environment, any researcher or practitioner can write code and execute an agent's training, such as the source code presented below.
 
 ```python
@@ -68,20 +78,19 @@ from DSSE import DroneSwarmSearch
 env = DroneSwarmSearch()
 
 observations, info = env.reset()
+
 rewards = 0
 done = False
 while not done:
-      actions = policy(observations, env.get_agents())
-      observations, rewards, terminations, truncations, infos = env.step(actions) 
-      done = any(terminations.values()) or any(truncations.values())
+    actions = policy(observations, env.get_agents())
+    observations, rewards, terminations, truncations, infos = env.step(actions)
+    done = any(terminations.values()) or any(truncations.values())
 ```
 
-The package also includes a second environment option. Similar to the first, this alternative setup is designed for training agents, but with key differences in its objectives and mechanics. Unlike the first environment, which rewards agents for speed in their searches, this second option rewards agents that cover the largest area without repetition. It incorporates a trade-off by using a stationary probability matrix, but enhances the simulation with a more advanced Lagrangian particle model [@gmd-11-1405-2018] for pinpointing the PIW's position. Moreover, this environment omits the inclusion of shipwrecked individuals, focusing instead on promoting research into how agents can learn to efficiently expand their search coverage over broader areas.
-
-The grid is divided into square cells, each representing a quadrant with sides measuring 130 meters in the real world. This correlation with real-world dimensions is crucial for developing agents capable of learning from realistic motion patterns. The drones, which are controlled by reinforcement learning algorithms, serve as these agents. During the environment's instantiation, users define the drones' nominal speeds. These drones can move both orthogonally and diagonally across the grid, and they are equipped to search each cell for the presence of the PIW.
+The grid is divided into square cells, each representing a quadrant with sides measuring 130 meters in the real world. This correlation with real-world dimensions is crucial for developing agents capable of learning from realistic motion patterns. The drones, which are controlled by RL algorithms, serve as these agents. During the environment's instantiation, users define the drones' nominal speeds. These drones can move both orthogonally and diagonally across the grid, and they are equipped to search each cell for the presence of the PIW.
 
 Several works have been developed over the past few years to define better algorithms for the search and rescue of shipwrecks [@AI2021110098; @WU2024116403]. However, no environment for agent training is made available publicly. For this reason, the development and provision of this environment as a Python library and open-source project are expected to have significant relevance to the machine learning community and ocean safety.
 
-This new library makes it possible to implement and evaluate new reinforcement learning algorithms, such as Deep Q-Networks (DQN) [@dqn2015] and Proximal Policy Optimization (PPO) [@ppo2017], with little effort. An earlier iteration of this software was utilized in research that compared the Reinforce algorithm with the parallel sweep path planning algorithm [@dsse2023].
+This new library makes it possible to implement and evaluate new RL algorithms, such as Deep Q-Networks (DQN) [@dqn2015] and Proximal Policy Optimization (PPO) [@ppo2017], with little effort. Additionally, several state-of-the-art RL algorithms have already been implemented and are available [@algorithmsDSSE2024]. An earlier iteration of this software was utilized in research that compared the Reinforce algorithm with the parallel sweep path planning algorithm [@dsse2023].
 
 # References