@inproceedings{0cbfea33d20043e6afd9ba1f6709ef53,
title = "BATCHMIXUP: Improving Training by Interpolating Hidden States of the Entire Mini-batch",
abstract = "Usually, we train a neural system on a sequence of mini-batches of labeled instances. Each mini-batch is composed of k samples, and each sample will learn a representation vector. MIXUP implicitly generates synthetic samples through linearly interpolating inputs and their corresponding labels of random sample pairs in the same mini-batch. This means that MIXUP only generates new points on the edges connecting every two original points in the representation space. We observed that the new points by the standard MIXUP cover pretty limited regions in the entire space of the mini-batch. In this work, we propose BATCHMIXUP-improving the model learning by interpolating hidden states of the entire mini-batch. BATCHMIXUP can generate new points scattered throughout the space corresponding to the mini-batch. In experiments, BATCHMIXUP shows superior performance than competitive baselines in improving the performance of NLP tasks while using different ratios of training data.",
author = "Wenpeng Yin and Huan Wang and Jin Qu and Caiming Xiong",
note = "Publisher Copyright: {\textcopyright} 2021 Association for Computational Linguistics; Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021 ; Conference date: 01-08-2021 Through 06-08-2021",
year = "2021",
language = "English (US)",
series = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
publisher = "Association for Computational Linguistics (ACL)",
pages = "4908--4912",
editor = "Chengqing Zong and Fei Xia and Wenjie Li and Roberto Navigli",
booktitle = "Findings of the Association for Computational Linguistics",
}