@inproceedings{pettet2024decision,
author = {Pettet, Ava and Zhang, Yunuo and Luo, Baiting and Wray, Kyle and Baier, Hendrik and Laszka, Aron and Dubey, Abhishek and Mukhopadhyay, Ayan},
booktitle = {Proceedings of the 23rd International Conference on Autonomous Agents and Multiagent Systems},
title = {Decision Making in Non-Stationary Environments with Policy-Augmented Search},
year = {2024},
address = {Richland, SC},
acceptance = {36},
pages = {2417–2419},
publisher = {International Foundation for Autonomous Agents and Multiagent Systems},
series = {AAMAS '24},
abstract = {Sequential decision-making is challenging in non-stationary environments, where the environment in which an agent operates can change over time. Policies learned before execution become stale when the environment changes, and relearning takes time and computational effort. Online search, on the other hand, can return sub-optimal actions when there are limitations on allowed runtime. In this paper, we introduce Policy-Augmented Monte Carlo tree search (PA-MCTS), which combines action-value estimates from an out-of-date policy with an online search using an up-to-date model of the environment. We prove several theoretical results about PA-MCTS. We also compare and contrast our approach with AlphaZero, another hybrid planning approach, and Deep Q Learning on several OpenAI Gym environments and show that PA-MCTS outperforms these baselines.},
contribution = {lead},
isbn = {9798400704864},
note = {extended abstract},
keywords = {non-stationary MDPs, policy learning, Monte Carlo tree search, sequential decision-making, online planning, offline learning, policy augmentation},
location = {Auckland, New Zealand},
numpages = {3}
}