@inproceedings{6fedf02372454896879d98076f2c6fdf,
title = "Corrective Guidance and Learning for Dialogue Management",
abstract = "Establishing robust dialogue policy with low computation cost is challenging, especially for multi-domain task-oriented dialogue management due to the high complexity in state and action spaces. The previous works mostly using the deterministic policy optimization only attain moderate performance. Meanwhile, state-of-the-art result that uses end-to-end approach is computationally demanding since it utilizes a large-scaled language model based on the generative pre-trained transformer-2 (GPT-2). In this study, a new learning procedure consisting of three learning stages is presented to improve multi-domain dialogue management with corrective guidance. Firstly, the behavior cloning with an auxiliary task is developed to build a robust pre-trained model by mitigating the causal confusion problem in imitation learning. Next, the pre-trained model is rectified by using reinforcement learning via the proximal policy optimization. Lastly, human-in-the-loop learning strategy is fulfilled to enhance the agent performance by directly providing corrective feedback from rule-based agent so that the agent is prevented to trap in confounded states. The experiments on end-to-end evaluation show that the proposed learning method achieves state-of-the-art result by performing nearly identical to the rule-based agent. This method outperforms the second place of 9th dialog system technology challenge (DSTC9) track 2 that uses GPT-2 as the core model in dialogue management.",
keywords = "behavior cloning, dialogue management, human-in-the-loop, imitation learning, policy optimization, reinforcement learning",
author = "Mahdin Rohmatillah and Chien, {Jen Tzung}",
note = "Publisher Copyright: {\textcopyright} 2021 ACM.; 30th ACM International Conference on Information and Knowledge Management, CIKM 2021 ; Conference date: 01-11-2021 Through 05-11-2021",
year = "2021",
month = oct,
day = "26",
doi = "10.1145/3459637.3482333",
language = "English",
series = "International Conference on Information and Knowledge Management, Proceedings",
publisher = "Association for Computing Machinery",
pages = "1548--1557",
booktitle = "CIKM 2021 - Proceedings of the 30th ACM International Conference on Information and Knowledge Management",
}