@inproceedings{93d643c44e4447c5a18e2c6e3607d234,
title = "Anchor-Based Detection for Natural Language Localization in Ego-Centric Videos",
abstract = "The Natural Language Localization (NLL) task aims to localize a sentence in a video with starting and ending timestamps. It requires a comprehensive understanding of both language and videos. We have seen a lot of work conducted for third-person view videos, while the task on ego-centric videos is still under-explored, which is critical for the understanding of increasing ego-centric videos and further facilitating embodied AI tasks. Directly adapting existing methods of NLL to ego-centric video datasets is challenging due to two reasons. Firstly, there is a temporal duration gap between different datasets. Secondly, queries in ego-centric videos usually require a better understanding of more complex and long-term temporal orders. For the above reason, we propose an anchor-based detection model for NLL in ego-centric videos.",
keywords = "cross-modality, ego-centric video, Embodied AI, video understanding",
author = "Bei Liu and Sipeng Zheng and Jianlong Fu and Cheng, {Wen Huang}",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 IEEE International Conference on Consumer Electronics, ICCE 2023 ; Conference date: 06-01-2023 Through 08-01-2023",
year = "2023",
doi = "10.1109/ICCE56470.2023.10043460",
language = "English",
series = "Digest of Technical Papers - IEEE International Conference on Consumer Electronics",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2023 IEEE International Conference on Consumer Electronics, ICCE 2023",
address = "United States",
}