If you find this code useful, consider citing our nocaps
paper:
@inproceedings{nocaps2019,
author = {Harsh Agrawal* and Karan Desai* and Yufei Wang and Xinlei Chen and Rishabh Jain and
Mark Johnson and Dhruv Batra and Devi Parikh and Stefan Lee and Peter Anderson},
title = {{nocaps}: {n}ovel {o}bject {c}aptioning {a}t {s}cale},
booktitle = {International Conference on Computer Vision (ICCV)},
year = {2019}
}
As well as the paper that proposed this model:
@inproceedings{Anderson2017up-down,
author = {Peter Anderson and Xiaodong He and Chris Buehler and Damien Teney and Mark Johnson
and Stephen Gould and Lei Zhang},
title = {Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering},
booktitle = {Computer Vision and Pattern Recognition (CVPR)},
year = {2018}
}
If you evaluate your models on our nocaps
benchmark, please consider citing
EvalAI — the platform which hosts our evaluation server:
@inproceedings{evalai,
title = {EvalAI: Towards Better Evaluation Systems for AI Agents},
author = {Deshraj Yadav and Rishabh Jain and Harsh Agrawal and Prithvijit
Chattopadhyay and Taranjeet Singh and Akash Jain and Shiv Baran
Singh and Stefan Lee and Dhruv Batra},
booktitle = {Workshop on AI Systems at SOSP 2019}
year = {2019},
}