author = "Oliveira, Willian Dihanster G. de and Penatti, Ot{\'a}vio A. B. 
                         and Berton, Lilian",
          affiliation = "{Federal University of Sao Paulo} and {Samsung R\&D Institute} 
                         and {Federal University of Sao Paulo}",
                title = "A comparison of graph-based semi-supervised learning for data 
            booktitle = "Proceedings...",
                 year = "2020",
               editor = "Musse, Soraia Raupp and Cesar Junior, Roberto Marcondes and 
                         Pelechano, Nuria and Wang, Zhangyang (Atlas)",
         organization = "Conference on Graphics, Patterns and Images, 33. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "Image classification, data augmentation, image transformation, 
                         GANs, semi-supervised learning, machine learning.",
             abstract = "In supervised learning, the algorithm accuracy usually improves 
                         with the size of the labeled dataset used for training the 
                         classifier. However, in many real-life scenarios, obtaining enough 
                         labeled data is costly or even not possible. In many 
                         circumstances, Data Augmentation (DA) techniques are usually 
                         employed, generating more labeled data for training machine 
                         learning algorithms. The common DA techniques are applied to 
                         already labeled data, generating simple variations of this data. 
                         For example, for image classification, image samples are rotated, 
                         cropped, flipped or other operators to generate variations of 
                         input image samples, and keeping their original labels. Other 
                         options are using Neural Networks algorithms that create new 
                         synthetic data or to employ Semi-supervised Learning (SSL) that 
                         label existing unlabeled data. In this paper, we perform a 
                         comparison among graph-based semi-supervised learning (GSSL) 
                         algorithms to augment the labeled dataset. The main advantage of 
                         using GSSL is that we can increase the training set by adding 
                         non-annotated images to the training set, therefore, we can 
                         benefit from the huge amount of unlabeled data available. 
                         Experiments are performed on five datasets for recognition of 
                         handwritten digits and letters (MNIST and EMINIST), animals (Dogs 
                         vs Cats), clothes (MNIST-Fashion) and remote sensing images 
                         (Brazilian Coffee Scenes), in which we compare different 
                         possibilities for DA, including the GSSL, Generative Adversarial 
                         Networks (GANs) and traditional Image Transformations (IT) applied 
                         on input labeled data. We also evaluated the impact of such 
                         techniques on different convolutional neural networks (CNN). 
                         Results indicate that, although all DA techniques performed well, 
                         GSSL was more robust to different image properties, presenting 
                         less accuracy variation across datasets.",
  conference-location = "Virtual",
      conference-year = "Nov. 7-10, 2020",
             language = "en",
           targetfile = "sibgrapi2020_ID30.pdf",
        urlaccessdate = "2020, Dec. 04"