@inproceedings{f784847facf64cf781def7facfb3a2a8,
title = "TinyTS: Memory-Efficient TinyML Model Compiler Framework on Microcontrollers",
abstract = "Deploying deep neural network (DNN) models on Microcontroller Units (MCUs) is typically limited by the tightness of the SRAM memory budget. Previously, machine learning system frameworks often allocated tensor memory layer-wise, but this will result in out-of-memory exceptions when a DNN model includes a large tensor. Patch-based inference, another past solution, reduces peak SRAM memory usage by dividing a tensor into small patches and storing one small patch at a time. However, executing these overlapping small patches requires significantly more time to complete the inference and is undesirable for MCUs. We resolve these problems by developing a novel DNN model compiler: TinyTS. In the TinyTS, our tensor partition method creates a tensor-splitting model that eliminates the redundant computation observed in the patch-based inference. Furthermore, the TinyTS memory planner significantly reduces peak SRAM memory usage by releasing the memory space of unused split tensors for other ready split tensors early before the completion of the entire tensor. Finally, TinyTS presents different optimization techniques to eliminate the metadata storage and runtime overhead when executing multiple fine-grained split tensors. Using the TensorFlow Lite for Microcontroller (TFLM) framework as a baseline, we tested the effectiveness of TinyTS. We found that TinyTS reduces the peak SRAM memory usage of 9 TinyML models up to 5.92X over the baseline. TinyTS also achieves a geometric mean of 8.83X speedup over the patch-based inference. In resolving the two key issues when deploying DNN models on MCUs, TinyTS substantially boosts memory usage efficiency for TinyML applications. The source code of TinyTS can be obtained from https://github.com/nycu-caslab/TinyTS",
keywords = "AIoT, Compiler, Deep Neural Network, TinyML",
author = "Liu, {Yu Yuan} and Zheng, {Hong Sheng} and {Fang Hu}, Yu and Hsu, {Chen Fong} and Yeh, {Tsung Tai}",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 30th IEEE International Symposium on High-Performance Computer Architecture, HPCA 2024 ; Conference date: 02-03-2024 Through 06-03-2024",
year = "2024",
doi = "10.1109/HPCA57654.2024.00070",
language = "English",
series = "Proceedings - International Symposium on High-Performance Computer Architecture",
publisher = "IEEE Computer Society",
pages = "848--860",
booktitle = "Proceedings - 2024 IEEE International Symposium on High-Performance Computer Architecture, HPCA 2024",
address = "United States",
}