Oversampling is commonly used to improve classifier performance for small tabular imbalanced datasets. State-of-the-art linear interpolation approaches can be used to generate synthetic samples from the convex space of the minority class. Generative networks are common deep learning approaches for synthetic sample generation. However, their scope on synthetic tabular data generation in the context of imbalanced classification is not adequately explored. In this article, we show that existing deep generative models perform poorly compared to linear interpolation-based approaches for imbalanced classification problems on small tabular datasets. To overcome this, we propose a deep generative model, ConvGeN that combines the idea of convex space learning with deep generative models. ConvGeN learns coefficients for the convex combinations of the minority class samples, such that the synthetic data is distinct enough from the majority class. Our benchmarking experiments demonstrate that our proposed model ConvGeN improves imbalanced classification on such small datasets, as compared to existing deep generative models, while being on par with the existing linear interpolation approaches. Moreover, we discuss how our model can be used for synthetic tabular data generation in general, even outside the scope of data imbalance, and thus improves the overall applicability of convex space learning.
%0 Journal Article
%1 schultz_convgen_2024
%A Schultz, Kristian
%A Bej, Saptarshi
%A Hahn, Waldemar
%A Wolfien, Markus
%A Srivastava, Prashant
%A Wolkenhauer, Olaf
%D 2024
%J Pattern Recognition
%K topic_lifescience Convex GAN, Imbalanced LoRAS, Tabular data data, learning, space
%P 110138
%R https://doi.org/10.1016/j.patcog.2023.110138
%T ConvGeN: A convex space learning approach for deep-generative oversampling and imbalanced classification of small tabular datasets
%U https://www.sciencedirect.com/science/article/pii/S003132032300835X
%V 147
%X Oversampling is commonly used to improve classifier performance for small tabular imbalanced datasets. State-of-the-art linear interpolation approaches can be used to generate synthetic samples from the convex space of the minority class. Generative networks are common deep learning approaches for synthetic sample generation. However, their scope on synthetic tabular data generation in the context of imbalanced classification is not adequately explored. In this article, we show that existing deep generative models perform poorly compared to linear interpolation-based approaches for imbalanced classification problems on small tabular datasets. To overcome this, we propose a deep generative model, ConvGeN that combines the idea of convex space learning with deep generative models. ConvGeN learns coefficients for the convex combinations of the minority class samples, such that the synthetic data is distinct enough from the majority class. Our benchmarking experiments demonstrate that our proposed model ConvGeN improves imbalanced classification on such small datasets, as compared to existing deep generative models, while being on par with the existing linear interpolation approaches. Moreover, we discuss how our model can be used for synthetic tabular data generation in general, even outside the scope of data imbalance, and thus improves the overall applicability of convex space learning.
@article{schultz_convgen_2024,
abstract = {Oversampling is commonly used to improve classifier performance for small tabular imbalanced datasets. State-of-the-art linear interpolation approaches can be used to generate synthetic samples from the convex space of the minority class. Generative networks are common deep learning approaches for synthetic sample generation. However, their scope on synthetic tabular data generation in the context of imbalanced classification is not adequately explored. In this article, we show that existing deep generative models perform poorly compared to linear interpolation-based approaches for imbalanced classification problems on small tabular datasets. To overcome this, we propose a deep generative model, ConvGeN that combines the idea of convex space learning with deep generative models. ConvGeN learns coefficients for the convex combinations of the minority class samples, such that the synthetic data is distinct enough from the majority class. Our benchmarking experiments demonstrate that our proposed model ConvGeN improves imbalanced classification on such small datasets, as compared to existing deep generative models, while being on par with the existing linear interpolation approaches. Moreover, we discuss how our model can be used for synthetic tabular data generation in general, even outside the scope of data imbalance, and thus improves the overall applicability of convex space learning.},
added-at = {2024-09-10T10:41:24.000+0200},
author = {Schultz, Kristian and Bej, Saptarshi and Hahn, Waldemar and Wolfien, Markus and Srivastava, Prashant and Wolkenhauer, Olaf},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2a56e136da29e10b23e0552abbf577af0/scadsfct},
doi = {https://doi.org/10.1016/j.patcog.2023.110138},
interhash = {b2990251ba05974b2e1d4f9bfacb75da},
intrahash = {a56e136da29e10b23e0552abbf577af0},
issn = {0031-3203},
journal = {Pattern Recognition},
keywords = {topic_lifescience Convex GAN, Imbalanced LoRAS, Tabular data data, learning, space},
pages = 110138,
timestamp = {2024-11-22T15:49:00.000+0100},
title = {{ConvGeN}: {A} convex space learning approach for deep-generative oversampling and imbalanced classification of small tabular datasets},
url = {https://www.sciencedirect.com/science/article/pii/S003132032300835X},
volume = 147,
year = 2024
}