@inproceedings{6b39348766d944a293d32cef43c2b8e2,
title = "Temporally-Aggregating Multiple-Discontinuous-Image Saliency Prediction with Transformer-Based Attention",
abstract = "In this paper, we aim to apply deep saliency prediction to automatic drone exploration, which should consider not only one single image, but multiple images from different view angles or localizations in order to determine the exploration direction. However, little attention has been paid to such saliency prediction problem over multiple-discontinuous-image and none of existing methods take temporal information into consideration, which may mean that the current predicted saliency map is not consistent with the previous predicted results. For this purpose, we propose a method named Temporally-Aggregating Multiple-Discontinuous-Image Saliency Prediction Network (TA-MSNet). It utilizes a transformer-based attention module to correlate relative saliency information among multiple discontinuous images and, furthermore, applies the ConvLSTM module to capture the temporal information. Experiments show that the proposed TA-MSNet can estimate better and more consistent results than previous works for time series data.",
keywords = "deep learning, drone exploration, saliency prediction, time series",
author = "Huang, {Pin Jie} and Lu, {Chi An} and Chen, {Kuan Wen}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 39th IEEE International Conference on Robotics and Automation, ICRA 2022 ; Conference date: 23-05-2022 Through 27-05-2022",
year = "2022",
doi = "10.1109/ICRA46639.2022.9811544",
language = "English",
series = "Proceedings - IEEE International Conference on Robotics and Automation",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "6571--6577",
booktitle = "2022 IEEE International Conference on Robotics and Automation, ICRA 2022",
address = "美國",
}