author = "Otiniano-Rodr{\'{\i}}guez, K. and C{\'a}mara-Ch{\'a}vez, G.",
          affiliation = "{Federal University of Ouro Preto} and {Federal University of Ouro 
                title = "Finger Spelling Recognition from RGB-D Information using Kernel 
            booktitle = "Proceedings...",
                 year = "2013",
               editor = "Boyer, Kim and Hirata, Nina and Nedel, Luciana and Silva, 
         organization = "Conference on Graphics, Patterns and Images, 26. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "sign language, finger spelling, support vector machine (SVM), 
             abstract = "Deaf people use systems of communication based on sign language 
                         and finger spelling. Manual spelling, or finger spelling, is a 
                         system where each letter of the alphabet is represented by an 
                         unique and discrete movement of the hand. RGB and depth images can 
                         be used to characterize hand shapes corresponding to letters of 
                         the alphabet. The advantage of depth cameras over color cameras 
                         for gesture recognition is more evident when performing hand 
                         segmentation. In this paper, we propose a hybrid system approach 
                         for finger spelling recognition using RGB-D information from 
                         Kinect sensor. In a first stage, the hand area is segmented from 
                         background using depth map and precise hand shape is extracted 
                         using both depth data and color data from Kinect sensor. Motivated 
                         by the performance of kernel based features, due to its simplicity 
                         and the ability to turn any type of pixel attribute into 
                         patch-level features, we decided to use the gradient kernel 
                         descriptor for feature extraction from depth images. The 
                         Scale-Invariant Feature Transform (SIFT) is used for describing 
                         the content of the RGB image. Then, the Bag-of-Visual-Words 
                         approach is used to extract semantic information. Finally, these 
                         features are used as input of our Support Vector Machine (SVM) 
                         classifier. The performance of this approach is quantitatively and 
                         qualitatively evaluated on a dataset of real images of American 
                         Sign Language (ASL) hand shapes. Three experiments were performed, 
                         using a combination of RGB and depth information and also using 
                         only RGB or depth information separately. The database used is 
                         composed of 120,000 images. According to our experiments, our 
                         approach has an accuracy rate of 91.26% when RGB and depth 
                         information is used, outperforming other state-of-the-art 
  conference-location = "Arequipa, Peru",
      conference-year = "Aug. 5-8, 2013",
             language = "en",
           targetfile = "final paper.pdf",
        urlaccessdate = "2020, Nov. 28"