@InProceedings{OliveiraPenaBert:2020:CoGrSe,
author = "Oliveira, Willian Dihanster G. de and Penatti, Ot{\'a}vio A. B.
and Berton, Lilian",
affiliation = "{Federal University of Sao Paulo} and {Samsung R\&D Institute}
and {Federal University of Sao Paulo}",
title = "A comparison of graph-based semi-supervised learning for data
augmentation",
booktitle = "Proceedings...",
year = "2020",
editor = "Musse, Soraia Raupp and Cesar Junior, Roberto Marcondes and
Pelechano, Nuria and Wang, Zhangyang (Atlas)",
organization = "Conference on Graphics, Patterns and Images, 33. (SIBGRAPI)",
publisher = "IEEE Computer Society",
address = "Los Alamitos",
keywords = "Image classification, data augmentation, image transformation,
GANs, semi-supervised learning, machine learning.",
abstract = "In supervised learning, the algorithm accuracy usually improves
with the size of the labeled dataset used for training the
classifier. However, in many real-life scenarios, obtaining enough
labeled data is costly or even not possible. In many
circumstances, Data Augmentation (DA) techniques are usually
employed, generating more labeled data for training machine
learning algorithms. The common DA techniques are applied to
already labeled data, generating simple variations of this data.
For example, for image classification, image samples are rotated,
cropped, flipped or other operators to generate variations of
input image samples, and keeping their original labels. Other
options are using Neural Networks algorithms that create new
synthetic data or to employ Semi-supervised Learning (SSL) that
label existing unlabeled data. In this paper, we perform a
comparison among graph-based semi-supervised learning (GSSL)
algorithms to augment the labeled dataset. The main advantage of
using GSSL is that we can increase the training set by adding
non-annotated images to the training set, therefore, we can
benefit from the huge amount of unlabeled data available.
Experiments are performed on five datasets for recognition of
handwritten digits and letters (MNIST and EMINIST), animals (Dogs
vs Cats), clothes (MNIST-Fashion) and remote sensing images
(Brazilian Coffee Scenes), in which we compare different
possibilities for DA, including the GSSL, Generative Adversarial
Networks (GANs) and traditional Image Transformations (IT) applied
on input labeled data. We also evaluated the impact of such
techniques on different convolutional neural networks (CNN).
Results indicate that, although all DA techniques performed well,
GSSL was more robust to different image properties, presenting
less accuracy variation across datasets.",
conference-location = "Porto de Galinhas (virtual)",
conference-year = "7-10 Nov. 2020",
doi = "10.1109/SIBGRAPI51738.2020.00043",
url = "http://dx.doi.org/10.1109/SIBGRAPI51738.2020.00043",
language = "en",
ibi = "8JMKD3MGPEW34M/43AH7MS",
url = "http://urlib.net/ibi/8JMKD3MGPEW34M/43AH7MS",
targetfile = "sibgrapi2020_ID30.pdf",
urlaccessdate = "2025, Jan. 15"
}