Jeewa, A. ., Pillay, A. ., & Jembere, E. . (2019). Directed curiosity-driven exploration in hard exploration, sparse reward environments. In Forum for Artificial Intelligence Research (FAIR). Cape Town: CEUR. Retrieved from http://ceur-ws.org/Vol-2540/FAIR2019_paper_42.pdf
Training agents in hard exploration, sparse reward environments is a difficult task since the reward feedback is insufficient for meaningful learning. In this work, we propose a new technique, called Directed Curiosity, that is a hybrid of Curiosity-Driven Exploration and distancebased reward shaping. The technique is evaluated in a custom navigation task where an agent tries to learn the shortest path to a distant target, in environments of varying difficulty. The technique is compared to agents trained with only a shaped reward signal, a curiosity signal as well as a sparse reward signal. It is shown that directed curiosity is the most successful in hard exploration environments, with the benefits of the approach being highlighted in environments with numerous obstacles and decision points. The limitations of the shaped reward function are also discussed.
@{242,
author = {Asad Jeewa and Anban Pillay and Edgar Jembere},
title = {Directed curiosity-driven exploration in hard exploration, sparse reward environments},
abstract = {Training agents in hard exploration, sparse reward environments is a difficult task since the reward feedback is insufficient for meaningful learning. In this work, we propose a new technique, called Directed Curiosity, that is a hybrid of Curiosity-Driven Exploration and distancebased reward shaping. The technique is evaluated in a custom navigation task where an agent tries to learn the shortest path to a distant target, in environments of varying difficulty. The technique is compared to agents trained with only a shaped reward signal, a curiosity signal as well as a sparse reward signal. It is shown that directed curiosity is the most successful in hard exploration environments, with the benefits of the approach being highlighted in environments with numerous obstacles and decision points. The limitations of the shaped reward function are also discussed.},
year = {2019},
journal = {Forum for Artificial Intelligence Research (FAIR)},
pages = {12 -24},
month = {4/12-6/12},
publisher = {CEUR},
address = {Cape Town},
url = {http://ceur-ws.org/Vol-2540/FAIR2019_paper_42.pdf},
}