@inproceedings{41c11c9a0dea4e0d90017f57630edecd,
title = "Exploration through reward biasing: Reward-biased maximum likelihood estimation for stochastic multi-armed bandits",
abstract = "Inspired by the Reward-Biased Maximum Likelihood Estimate method of adaptive control, we propose RBMLE - a novel family of learning algorithms for stochastic multi-armed bandits (SMABs). For a broad range of SMABs including both the parametric Exponential Family as well as the non-parametric sub-Gaussian/Exponential family, we show that RBMLE yields an index policy. To choose the bias-growth rate.",
author = "Xi Liu and Hsieh, {Ping Chun} and Hung, {Yu Heng} and Anirban Bhattacharya and Kumar, {P. R.}",
note = "Publisher Copyright: {\textcopyright} International Conference on Machine Learning, ICML 2020. All rights reserved.; 37th International Conference on Machine Learning, ICML 2020 ; Conference date: 13-07-2020 Through 18-07-2020",
year = "2020",
month = jul,
language = "English",
series = "37th International Conference on Machine Learning, ICML 2020",
publisher = "International Machine Learning Society (IMLS)",
pages = "6204--6214",
editor = "Hal Daume and Aarti Singh",
booktitle = "37th International Conference on Machine Learning, ICML 2020",
}