@inproceedings{13b9ce01252c4a068c3b0c15f2647e45,
title = "PPO-Clip Attains Global Optimality: Towards Deeper Understandings of Clipping",
abstract = "Proximal Policy Optimization algorithm employing a clipped surrogate objective (PPO-Clip) is a prominent exemplar of the policy optimization methods. However, despite its remarkable empirical success, PPO-Clip lacks theoretical substantiation to date. In this paper, we contribute to the field by establishing the first global convergence results of a PPO-Clip variant in both tabular and neural function approximation settings. Our findings highlight the O(1/√T) min-iterate convergence rate specifically in the context of neural function approximation. We tackle the inherent challenges in analyzing PPO-Clip through three central concepts: (i) We introduce a generalized version of the PPO-Clip objective, illuminated by its connection with the hinge loss. (ii) Employing entropic mirror descent, we establish asymptotic convergence for tabular PPO-Clip with direct policy parameterization. (iii) Inspired by the tabular analysis, we streamline convergence analysis by introducing a two-step policy improvement approach. This decouples policy search from complex neural policy parameterization using a regression-based update scheme. Furthermore, we gain deeper insights into the efficacy of PPO-Clip by interpreting these generalized objectives. Our theoretical findings also mark the first characterization of the influence of the clipping mechanism on PPO-Clip convergence. Importantly, the clipping range affects only the pre-constant of the convergence rate.",
author = "Huang, {Nai Chieh} and Hsieh, {Ping Chun} and Ho, {Kuo Hao} and Wu, {I. Chen}",
note = "Publisher Copyright: Copyright {\textcopyright} 2024, Association for the Advancement of Artificial Intelligence (www.aaai.org). All rights reserved.; 38th AAAI Conference on Artificial Intelligence, AAAI 2024 ; Conference date: 20-02-2024 Through 27-02-2024",
year = "2024",
month = mar,
day = "25",
doi = "10.1609/aaai.v38i11.29154",
language = "English",
series = "Proceedings of the AAAI Conference on Artificial Intelligence",
publisher = "Association for the Advancement of Artificial Intelligence",
number = "11",
pages = "12600--12607",
editor = "Michael Wooldridge and Jennifer Dy and Sriraam Natarajan",
booktitle = "Technical Tracks 14",
edition = "11",
}