-
Notifications
You must be signed in to change notification settings - Fork 4
/
paper.bib
247 lines (225 loc) · 10.7 KB
/
paper.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
@inproceedings{johnson2016malmo,
title={The Malmo Platform for Artificial Intelligence Experimentation},
author={Johnson, Matthew and Hofmann, Katja and Hutton, Tim and Bignell, David},
booktitle={IJCAI},
pages={4246--4247},
year={2016}
}
@misc{UPF,
title={Unified Planning Framework},
url={https://github.com/aiplan4eu/unified-planning},
publisher={GitHub},
journal={GitHub repository},
year={2023}
}
@inproceedings{hafner2022benchmarking,
title={Benchmarking the Spectrum of Agent Capabilities},
author={Danijar Hafner},
booktitle={International Conference on Learning Representations},
year={2022},
url={https://openreview.net/forum?id=1W0z96MFEoH}
}
@article{sutton1999between,
title={Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning},
author={Sutton, Richard S and Precup, Doina and Singh, Satinder},
journal={Artificial intelligence},
volume={112},
number={1-2},
pages={181--211},
year={1999},
publisher={Elsevier}
}
@software{minigrid,
author = {Chevalier-Boisvert, Maxime and Willems, Lucas and Pal, Suman},
title = {Minimalistic Gridworld Environment for Gymnasium},
url = {https://github.com/Farama-Foundation/Minigrid},
year = {2018},
}
@article{machado2018revisiting,
title={Revisiting the arcade learning environment: Evaluation protocols and open problems for general agents},
author={Machado, Marlos C and Bellemare, Marc G and Talvitie, Erik and Veness, Joel and Hausknecht, Matthew and Bowling, Michael},
journal={Journal of Artificial Intelligence Research},
volume={61},
pages={523--562},
year={2018}
}
@article{kuttler2020nethack,
title={The NetHack learning environment},
author={K{\"u}ttler, Heinrich and Nardelli, Nantas and Miller, Alexander and Raileanu, Roberta and Selvatici, Marco and Grefenstette, Edward and Rockt{\"a}schel, Tim},
journal={Advances in Neural Information Processing Systems},
volume={33},
pages={7671--7684},
year={2020}
}
@InProceedings{milani2020minerl2019,
title = {Retrospective Analysis of the 2019 MineRL Competition on Sample Efficient Reinforcement Learning},
author = {Milani, Stephanie and Topin, Nicholay and Houghton, Brandon and Guss, William H. and Mohanty, Sharada P. and Nakata, Keisuke and Vinyals, Oriol and Kuno, Noboru Sean},
booktitle = {Proceedings of the NeurIPS 2019 Competition and Demonstration Track},
pages = {203--214},
year = {2020},
editor = {Escalante, Hugo Jair and Hadsell, Raia},
volume = {123},
series = {Proceedings of Machine Learning Research},
month = {08--14 Dec},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v123/milani20a/milani20a.pdf},
url = {https://proceedings.mlr.press/v123/milani20a.html},
abstract = {To facilitate research in the direction of sample efficient reinforcement learning, we held the MineRL Competition on Sample Efficient Reinforcement Learning Using Human Priors at the Thirty-third Conference on Neural Information Processing Systems (NeurIPS 2019). The primary goal of this competition was to promote the development of algorithms that use human demonstrations alongside reinforcement learning to reduce the number of samples needed to solve complex, hierarchical, and sparse environments. We describe the competition, outlining the primary challenge, the competition design, and the resources that we provided to the participants. We provide an overview of the top solutions, each of which use deep reinforcement learning and/or imitation learning. We also discuss the impact of our organizational decisions on the competition and future directions for improvement.}
}
@article{guss2021minerl2020,
author = {William H. Guss and
Mario Ynocente Castro and
Sam Devlin and
Brandon Houghton and
Noboru Sean Kuno and
Crissman Loomis and
Stephanie Milani and
Sharada P. Mohanty and
Keisuke Nakata and
Ruslan Salakhutdinov and
John Schulman and
Shinya Shiroshita and
Nicholay Topin and
Avinash Ummadisingu and
Oriol Vinyals},
title = {The MineRL 2020 Competition on Sample Efficient Reinforcement Learning
using Human Priors},
journal = {CoRR},
volume = {abs/2101.11071},
year = {2021},
url = {https://arxiv.org/abs/2101.11071},
eprinttype = {arXiv},
eprint = {2101.11071},
timestamp = {Sun, 31 Jan 2021 17:23:50 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2101-11071.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{dreamerv3,
title={Mastering Diverse Domains through World Models},
author={Hafner, Danijar and Pasukonis, Jurgis and Ba, Jimmy and Lillicrap, Timothy},
journal={arXiv preprint arXiv:2301.04104},
year={2023}
}
@inproceedings{procgen,
title={Leveraging procedural generation to benchmark reinforcement learning},
author={Cobbe, Karl and Hesse, Chris and Hilton, Jacob and Schulman, John},
booktitle={International conference on machine learning},
pages={2048--2056},
year={2020},
organization={PMLR}
}
@article{ALE,
title={The arcade learning environment: An evaluation platform for general agents},
author={Bellemare, Marc G and Naddaf, Yavar and Veness, Joel and Bowling, Michael},
journal={Journal of Artificial Intelligence Research},
volume={47},
pages={253--279},
year={2013}
}
@article{deepmindlab,
author = {Charles Beattie and
Joel Z. Leibo and
Denis Teplyashin and
Tom Ward and
Marcus Wainwright and
Heinrich K{\"{u}}ttler and
Andrew Lefrancq and
Simon Green and
V{\'{\i}}ctor Vald{\'{e}}s and
Amir Sadik and
Julian Schrittwieser and
Keith Anderson and
Sarah York and
Max Cant and
Adam Cain and
Adrian Bolton and
Stephen Gaffney and
Helen King and
Demis Hassabis and
Shane Legg and
Stig Petersen},
title = {DeepMind Lab},
journal = {CoRR},
volume = {abs/1612.03801},
year = {2016},
url = {http://arxiv.org/abs/1612.03801},
eprinttype = {arXiv},
eprint = {1612.03801},
timestamp = {Mon, 13 Aug 2018 16:48:18 +0200},
biburl = {https://dblp.org/rec/journals/corr/BeattieLTWWKLGV16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{gym,
author = {Greg Brockman and
Vicki Cheung and
Ludwig Pettersson and
Jonas Schneider and
John Schulman and
Jie Tang and
Wojciech Zaremba},
title = {OpenAI Gym},
journal = {CoRR},
volume = {abs/1606.01540},
year = {2016},
url = {http://arxiv.org/abs/1606.01540},
eprinttype = {arXiv},
eprint = {1606.01540},
timestamp = {Fri, 08 Nov 2019 12:51:06 +0100},
biburl = {https://dblp.org/rec/journals/corr/BrockmanCPSSTZ16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{PDDLgym,
publtype={informal},
author={Tom Silver and Rohan Chitnis},
title={PDDLGym: Gym Environments from PDDL Problems},
year={2020},
cdate={1577836800000},
journal={CoRR},
volume={abs/2002.06432},
url={https://arxiv.org/abs/2002.06432}
}
@article{PDDL,
title={PDDL - The Planning Domain Definition Language},
author={Drew McDermott, },
journal={Artificial Intelligence Planning Systems, TR98003/DCS TR1165},
year={1998}
}
@incollection{ENSHP,
title={Interval-based relaxation for general numeric planning},
author={Scala, Enrico and Haslum, Patrik and Thi{\'e}baux, Sylvie and Ramirez, Miquel},
booktitle={ECAI 2016},
pages={655--663},
year={2016},
publisher={IOS Press}
}
@article{stable-baselines3,
author = {Antonin Raffin and Ashley Hill and Adam Gleave and Anssi Kanervisto and Maximilian Ernestus and Noah Dormann},
title = {Stable-Baselines3: Reliable Reinforcement Learning Implementations},
journal = {Journal of Machine Learning Research},
year = {2021},
volume = {22},
number = {268},
pages = {1-8},
url = {http://jmlr.org/papers/v22/20-1364.html}
}
@inproceedings{ANML,
title={The ANML Language},
author={David E. Smith and Jeremy Frank and William Cushing},
year={2007},
url={https://api.semanticscholar.org/CorpusID:14116191}
}
@InProceedings{2021NetHack,
title = {Insights From the NeurIPS 2021 NetHack Challenge},
author = {Hambro, Eric and Mohanty, Sharada and Babaev, Dmitrii and Byeon, Minwoo and Chakraborty, Dipam and Grefenstette, Edward and Jiang, Minqi and Daejin, Jo and Kanervisto, Anssi and Kim, Jongmin and Kim, Sungwoong and Kirk, Robert and Kurin, Vitaly and K{\"u}ttler, Heinrich and Kwon, Taehwon and Lee, Donghoon and Mella, Vegard and Nardelli, Nantas and Nazarov, Ivan and Ovsov, Nikita and Holder, Jack and Raileanu, Roberta and Ramanauskas, Karolis and Rockt{\"a}schel, Tim and Rothermel, Danielle and Samvelyan, Mikayel and Sorokin, Dmitry and Sypetkowski, Maciej and Sypetkowski, Micha\l{}},
booktitle = {Proceedings of the NeurIPS 2021 Competitions and Demonstrations Track},
pages = {41--52},
year = {2022},
editor = {Kiela, Douwe and Ciccone, Marco and Caputo, Barbara},
volume = {176},
series = {Proceedings of Machine Learning Research},
month = {06--14 Dec},
publisher = {PMLR},
pdf = {https://proceedings.mlr.press/v176/hambro22a/hambro22a.pdf},
url = {https://proceedings.mlr.press/v176/hambro22a.html},
abstract = {In this report, we summarize the takeaways from the first NeurIPS 2021 NetHack Challenge. Participants were tasked with developing a program or agent that can win (i.e., ’ascend’ in) the popular dungeon-crawler game of NetHack by interacting with the NetHack Learning Environment (NLE), a scalable, procedurally generated, and challenging Gym environment for reinforcement learning (RL). The challenge showcased community-driven progress in AI with many diverse approaches significantly beating the previously best results on NetHack. Furthermore, it served as a direct comparison between neural (e.g., deep RL) and symbolic AI, as well as hybrid systems, demonstrating that on NetHack symbolic bots currently outperform deep RL by a large margin. Lastly, no agent got close to winning the game, illustrating NetHack’s suitability as a long-term benchmark for AI research.}
}