@inproceedings{68f27eedd77344b6863f10202daefc25,
title = "Lego: Dynamic Tensor-Splitting Multi-Tenant DNN Models on Multi-Chip-Module Architecture",
abstract = "Modern deep neural network (DNN) accelerators target the acceleration of a single DNN model and limit the throughput for multi-tenant DNN data center applications. The multi-chip-module (MCM) architecture breaks a monolithic accelerator into multiple small chiplets. The MCM is a promising approach that dispatches DNN models across chiplets with equal PEs. However, it is challenging to distribute data of DNN model layers with different parameters across chiplets while maximizing the chiplet utilization. This work proposes Lego MCM architecture that dynamically adapts to the size of DNN model layers and improves the throughput of multi-tenant DNN applications by increasing the chiplet utilization. Lego's dynamic scheduler achieves the geometric average 1.51× speedup over a monolithic DNN accelerator.",
author = "Xuan, {Zhou Yu} and Lee, {Ching Jui} and Yeh, {Tsung Tai}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 19th International System-on-Chip Design Conference, ISOCC 2022 ; Conference date: 19-10-2022 Through 22-10-2022",
year = "2022",
doi = "10.1109/ISOCC56007.2022.10031596",
language = "English",
series = "Proceedings - International SoC Design Conference 2022, ISOCC 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "173--174",
booktitle = "Proceedings - International SoC Design Conference 2022, ISOCC 2022",
address = "美國",
}