@inproceedings{96982734187e46ed90b6ec0378bf6e5d,
title = "CNN Based Two-stage Multi-resolution End-to-end Model for Singing Melody Extraction",
abstract = "Inspired by human hearing perception, we propose a two-stage multi-resolution end-to-end model for singing melody extraction in this paper. The convolutional neural network (CNN) is the core of the proposed model to generate multi-resolution representations. The 1-D and 2-D multi-resolution analysis on waveform and spectrogram-like graph are successively carried out by using 1-D and 2-D CNN kernels of different lengths and sizes. The 1-D CNNs with kernels of different lengths produce multi-resolution spectrogram-like graphs without suffering from the trade-off between spectral and temporal resolutions. The 2-D CNNs with kernels of different sizes extract features from spectro-temporal envelopes of different scales. Experiment results show the proposed model outperforms three compared systems in three out of five public databases.",
keywords = "Melody extraction, convolution neural network, end-to-end learning, multi-resolution, music information retrieval",
author = "Chen, {Ming Tso} and Li, {Bo Jun} and Tai-Shih Chi",
year = "2019",
month = may,
day = "1",
doi = "10.1109/ICASSP.2019.8683630",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1005--1009",
booktitle = "2019 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2019 - Proceedings",
address = "美國",
note = "44th IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2019 ; Conference date: 12-05-2019 Through 17-05-2019",
}