@inproceedings{9b9e0a3fdb224252a5fd4cf0deba5081,
title = "MM-Hand: 3D-Aware Multi-Modal Guided Hand Generation for 3D Hand Pose Synthesis",
abstract = "Estimating the 3D hand pose from a monocular RGB image is important but challenging. A solution is training on large-scale RGB hand images with accurate 3D hand keypoint annotations. However, it is too expensive in practice. Instead, we develop a learning-based approach to synthesize realistic, diverse, and 3D pose-preserving hand images under the guidance of 3D pose information. We propose a 3D-aware multi-modal guided hand generative network (MM-Hand), together with a novel geometry-based curriculum learning strategy. Our extensive experimental results demonstrate that the 3D-annotated images generated by MM-Hand qualitatively and quantitatively outperform existing options. Moreover, the augmented data can consistently improve the quantitative performance of the state-of-the-art 3D hand pose estimators on two benchmark datasets. The code will be available at https://github.com/ScottHoang/mm-hand. ",
keywords = "3d hand-pose, conditional generative adversarial nets, curriculum learning, multi-modal",
author = "Zhenyu Wu and Duc Hoang and Lin, {Shih Yao} and Yusheng Xie and Liangjian Chen and Lin, {Yen Yu} and Zhangyang Wang and Wei Fan",
note = "Publisher Copyright: {\textcopyright} 2020 ACM.; 28th ACM International Conference on Multimedia, MM 2020 ; Conference date: 12-10-2020 Through 16-10-2020",
year = "2020",
month = oct,
day = "12",
doi = "10.1145/3394171.3413555",
language = "English",
series = "MM 2020 - Proceedings of the 28th ACM International Conference on Multimedia",
publisher = "Association for Computing Machinery, Inc",
pages = "2508--2516",
booktitle = "MM 2020 - Proceedings of the 28th ACM International Conference on Multimedia",
}