@inproceedings{856aab1e94334648aae965236b5635dc,
title = "Horizontal Pod Autoscaling for Precise Startup of AI Microservices at the Network Edge: A Hybrid Proactive and Reactive Approach",
abstract = "Providing AI microservices at the network edge, such as YOLOv5 for detecting mini-subjects and Unet for segmentation, have resulted in a lot of potential business opportunities. However, numerous AI microservices will face the issue of resource competition, especially in a resource-limited environment. Thus, resource scaling for AI microservices at edge nodes, balancing their resource utilization, plays a key role in ensuring the QoS of providing multiple AI microservices but poses several challenges, including varying traffic requests, unstable node environments, and long startup times when scaling resources horizontally. To overcome these challenges, we propose a proactive and reactive hybrid auto-scaling policy called PRHAS. By predicting future traffic volume and the corresponding startup time of microservice, The proposed PRHAS mechanism can adapt computing resource to optimize scaling decisions in terms of normalizing the startup time of microservice. Compared to traditional k8s HPA methods, our policy not only enhances the QoS/SLO of edge AI microservices but also improves resource utilization in edge nodes.",
keywords = "Autoformer, Autoscaling, Edge AI, HPA, Hybrid scaling, Microservices",
author = "Chen, {Zheng Gen} and Chang, {Jin Wei} and Chiang Chen and Li, {Chi Yu} and Huang, {Ching Chun} and Wang, {Li Chun}",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 33rd Wireless and Optical Communications Conference, WOCC 2024 ; Conference date: 25-10-2024 Through 26-10-2024",
year = "2024",
doi = "10.1109/WOCC61718.2024.10786044",
language = "English",
series = "2024 33rd Wireless and Optical Communications Conference, WOCC 2024",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "192--197",
booktitle = "2024 33rd Wireless and Optical Communications Conference, WOCC 2024",
address = "美國",
}