@inproceedings{51aae77a67f54032a65ce34e2d18fe74,
title = "M-ARY quantized neural networks",
abstract = "Parameter quantization is crucial for model compression. This paper generalizes the binary and ternary quantizations to M-ary quantization for adaptive learning of the quantized neural networks. To compensate the performance loss, the representation values and the quantization partitions of model parameters are jointly trained to optimize the resolution of gradients for parameter updating where the non-differentiable function in back-propagation algorithm is tackled. An asymmetric quantization is implemented. The restriction in parameter quantization is sufficiently relaxed. The resulting M-ary quantization scheme is general and adaptive with different M. Training of the M-ary quantized neural network (MQNN) can be tuned to balance the tradeoff between system performance and memory storage. Experimental results show that MQNN is able to achieve comparable image classification performance with full-precision neural network (FPNN), but the memory storage can be far less than that in FPNN.",
keywords = "Adaptive quantization, Binarized neural network, Model compression, Quantized neural network",
author = "Jen-Tzung Chien and Chang, {Su Ting}",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 2020 IEEE International Conference on Multimedia and Expo, ICME 2020 ; Conference date: 06-07-2020 Through 10-07-2020",
year = "2020",
month = jul,
doi = "10.1109/ICME46284.2020.9102751",
language = "English",
isbn = "978-1-7281-1332-6",
series = "Proceedings - IEEE International Conference on Multimedia and Expo",
publisher = "IEEE Computer Society",
booktitle = "2020 IEEE International Conference on Multimedia and Expo, ICME 2020",
address = "United States",
}