author = "Gaio J{\'u}nior, Airton and Santos, Eulanda Miranda dos",
          affiliation = "{Federal University of Amazonas - UFAM} and {Federal University of 
                         Amazonas - UFAM}",
                title = "A method for opinion classification in video combining facial 
                         expressions and gestures",
            booktitle = "Proceedings...",
                 year = "2018",
               editor = "Ross, Arun and Gastal, Eduardo S. L. and Jorge, Joaquim A. and 
                         Queiroz, Ricardo L. de and Minetto, Rodrigo and Sarkar, Sudeep and 
                         Papa, Jo{\~a}o Paulo and Oliveira, Manuel M. and Arbel{\'a}ez, 
                         Pablo and Mery, Domingo and Oliveira, Maria Cristina Ferreira de 
                         and Spina, Thiago Vallin and Mendes, Caroline Mazetto and Costa, 
                         Henrique S{\'e}rgio Gutierrez and Mejail, Marta Estela and Geus, 
                         Klaus de and Scheer, Sergio",
         organization = "Conference on Graphics, Patterns and Images, 31. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "opinion classification, video, facial expression, gesture body 
                         expression, FV, VLAD, encoder.",
             abstract = "Most of the researches dealing with video-based opinion 
                         recognition problems employ the combination of data from three 
                         different sources: video, audio and text. As a consequence, they 
                         are solutions based on complex and language-dependent models. 
                         Besides such complexity, it may be observed that these current 
                         solutions attain low performance in practical applications. 
                         Focusing on overcoming these drawbacks, this work presents a 
                         method for opinion classification that uses only video as data 
                         source, more precisely, facial expression and body gesture 
                         information are extracted from online videos and combined to lead 
                         to higher classification rates. The proposed method uses feature 
                         encoding strategies to improve data representation and to 
                         facilitate the classification task in order to predict user's 
                         opinion with high accuracy and independently of the language used 
                         in videos. Experiments were carried out using three public 
                         databases and three baselines to test the proposed method. The 
                         results of these experiments show that, even performing only 
                         visual analysis of the videos, the proposed method achieves 16\% 
                         higher accuracy and precision rates, when compared to baselines 
                         that analyze visual, audio and textual data video. Moreover, it is 
                         showed that the proposed method may identify emotions in videos 
                         whose language is other than the language used for training.",
  conference-location = "Foz do Igua{\c{c}}u, PR, Brazil",
      conference-year = "Oct. 29 - Nov. 1, 2018",
             language = "en",
           targetfile = "method-opinion-classification_id_94.pdf",
        urlaccessdate = "2020, Dec. 04"