@inproceedings{779703045da94943acd708ad56c20125,
title = "A Deep Learning Based Approach to Synthesize Intelligible Speech with Limited Temporal Envelope Information",
abstract = "Envelope waveforms can be extracted from multiple frequency bands of a speech signal, and envelope waveforms carry important intelligibility information for human speech communication. This study aimed to investigate whether a deep learning-based model with features of temporal envelope information could synthesize an intelligible speech, and to study the effect of reducing the number (from 8 to 2 in this work) of temporal envelope information on the intelligibility of the synthesized speech. The objective evaluation metric of short-time objective intelligibility (STOI) showed that, on average, the synthesized speech of the proposed approach provided higher STOI (i.e., 0.8) scores in each test condition; and the human listening test showed that the average word correct rate of eight listeners was higher than 97.5%. These findings indicated that the proposed deep learning-based system can be a potential approach to synthesize a highly intelligible speech with limited envelope information in the future.",
author = "Hsiao, {Ching Ju} and Fei Chen and Han, {Ji Yan} and Zheng, {Wei Zhong} and Lai, {Ying Hui}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; null ; Conference date: 11-07-2022 Through 15-07-2022",
year = "2022",
doi = "10.1109/EMBC48229.2022.9871247",
language = "English",
series = "Proceedings of the Annual International Conference of the IEEE Engineering in Medicine and Biology Society, EMBS",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1972--1976",
booktitle = "44th Annual International Conference of the IEEE Engineering in Medicine and Biology Society, EMBC 2022",
address = "United States",
}