@inproceedings{588dba696e184646b8bc39eb7bdf2894,
title = "Personalized Taiwanese Speech Synthesis using Cascaded ASR and TTS Framework",
abstract = "To bring endangered Taiwanese language back to life, this paper leveraged a large-scale Taiwanese across Taiwan (TAT) corpus to construct cascaded automatic speech recognition (ASR) and text-to-speech (TTS)-based personalized Taiwanese speech synthesizers to help young people to learn how to speak Taiwanese. This paradigm not only alleviates the low resource, nonparallel corpus and cross-lingual training data problems but also dramatically reduces the fine-tuning data size and training time. Experimental results on a Taiwanese-to-Taiwanese and Mandarin-to-Taiwanese voice conversion tasks had shown that it allows us to successfully produce good personalized Taiwanese TTS with only approximately 3 minutes of data in both cases.",
keywords = "speech recognition, Taiwanese speech corpus, Taiwanese speech synthesis, voice conversion",
author = "Liao, {Yuan Fu} and Hsu, {Wen Han} and Pan, {Chen Ming} and Wang, {Wern Jun} and Matus Pleva and Daniel Hladek",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 32nd International Conference Radioelektronika, RADIOELEKTRONIKA 2022 ; Conference date: 21-04-2022 Through 22-04-2022",
year = "2022",
doi = "10.1109/RADIOELEKTRONIKA54537.2022.9764940",
language = "English",
series = "2022 32nd International Conference Radioelektronika, RADIOELEKTRONIKA 2022 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "2022 32nd International Conference Radioelektronika, RADIOELEKTRONIKA 2022 - Proceedings",
address = "美國",
}