Close

@InProceedings{Schirmer:2020:Li2DPo,
               author = "Schirmer, Luiz",
          affiliation = "PUC-rio",
                title = "A lightweight 2D Pose Machine with attention enhancement",
            booktitle = "Proceedings...",
                 year = "2020",
               editor = "Musse, Soraia Raupp and Cesar Junior, Roberto Marcondes and 
                         Pelechano, Nuria and Wang, Zhangyang (Atlas)",
         organization = "Conference on Graphics, Patterns and Images, 33. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "pose estimation, tensor decompostion, attention layer.",
             abstract = "Pose estimation is a challenging task in computer vision that has 
                         many applications, as for example: in motion capture, in medical 
                         analysis, in human posture monitoring, and in robotics. In other 
                         words, it is a main tool to enable machines do understand human 
                         patterns in videos or images. Performing this task in real-time 
                         while maintaining accuracy and precision is critical for many of 
                         these applications. Several papers propose real time approaches 
                         considering deep neural networks for pose estimation. However, in 
                         most cases they fail when considering run-time performance or do 
                         not achieve the precision needed. In this work, we propose a new 
                         model for real-time pose estimation considering attention modules 
                         for convolutional neural networks (CNNs). We introduce a 
                         two-dimensional relative attention mechanism for feature 
                         extraction in pose machines leading to improvements in accuracy. 
                         We create a single shot architecture where both operations to 
                         infer keypoints and part affinity fields share the information. 
                         Also, for each stage, we use tensor decompositions to not only 
                         reduce dimensionality, but also to improve performance. This 
                         allows us to factorize each convolution and drastically reduce the 
                         number of parameters in our network. Our experiments show that, 
                         with this factorized approach, it is possible to achieve 
                         state-of-art performance in terms of run-time while we have a 
                         small reduction in accuracy.",
  conference-location = "Porto de Galinhas (virtual)",
      conference-year = "7-10 Nov. 2020",
                  doi = "10.1109/SIBGRAPI51738.2020.00051",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI51738.2020.00051",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/43B8A7P",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/43B8A7P",
           targetfile = "Pose_estimation_for_Sibgrapi_2020.pdf",
        urlaccessdate = "2024, Dec. 02"
}


Close