author = "Santos, Fernando Pereira dos and Ponti, Moacir Antonelli",
          affiliation = "{Universidade de S{\~a}o Paulo} and {Universidade de S{\~a}o 
                title = "Alignment of Local and Global Features from Multiple Layers of 
                         Convolutional Neural Network for Image Classification",
            booktitle = "Proceedings...",
                 year = "2019",
               editor = "Oliveira, Luciano Rebou{\c{c}}as de and Sarder, Pinaki and Lage, 
                         Marcos and Sadlo, Filip",
         organization = "Conference on Graphics, Patterns and Images, 32. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "feature learning, convolutional networks, fusion multiple maps, 
                         manifold alignment.",
             abstract = "Convolutional networks have been extensively applied to obtain 
                         features spaces for classification tasks. Although those achieve 
                         high accuracy in many scenarios, typically only the top layers of 
                         the network are explored. Hence, a relevant question arises from 
                         this fact: are initial layers useful in terms of discriminative 
                         ability? In this paper, we leverage the complementary description 
                         offered by such first layers. Our method consists of features 
                         extraction in multiple layers, followed by feature selection, 
                         fusion of feature maps from the different layers, and space 
                         alignment. Through an extensive experimentation with different 
                         datasets and studying different training strategies, our results 
                         show that local information, coming from the first layers, may 
                         significantly improve the classification performance when merged 
                         with a global descriptor extracted from a top layer of the 
                         network. We report different methods for reducing the 
                         dimensionality of the local descriptors, and guidelines on how to 
                         align them so that to perform fusion. Our study encourages future 
                         studies on combining feature maps from multiple layers, which may 
                         be relevant in particular for transfer learning scenarios.",
  conference-location = "Rio de Janeiro, RJ, Brazil",
      conference-year = "Oct. 28 - 31, 2019",
             language = "en",
           targetfile = "camera_ready_92.pdf",
        urlaccessdate = "2022, Jan. 21"