author = "Camillo, Mario and Shin-Ting, Wu",
          affiliation = "{University of Campinas} and {University of Campinas}",
                title = "Accessing CUDA features in the OpenGL rendering pipeline: A case 
                         study using N-Body simulation",
            booktitle = "Proceedings...",
                 year = "2017",
               editor = "Torchelsen, Rafael Piccin and Nascimento, Erickson Rangel do and 
                         Panozzo, Daniele and Liu, Zicheng and Farias, Myl{\`e}ne and 
                         Viera, Thales and Sacht, Leonardo and Ferreira, Nivan and Comba, 
                         Jo{\~a}o Luiz Dihl and Hirata, Nina and Schiavon Porto, Marcelo 
                         and Vital, Creto and Pagot, Christian Azambuja and Petronetto, 
                         Fabiano and Clua, Esteban and Cardeal, Fl{\'a}vio",
         organization = "Conference on Graphics, Patterns and Images, 30. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "GLSL, CUDA, n-body simulation, OpenGL.",
             abstract = "The advances of the graphics programing unit (GPU) architecture 
                         and its rapidly evolving towards general purpose GPU make a series 
                         of applications adopt a general purpose (GPGPU) and a graphics 
                         computing interoperability approach in which the first is used for 
                         heavy calculations and the second for 3D graphics rendering. 
                         Because GPGPU exposes several hardware features, such as shared 
                         memory and thread synchronization mechanism, it allows a developer 
                         to write more efficient code. Nevertheless, we conjecture that 
                         such hardware features are also available in the graphics 
                         computing interface OpenGL 4.5 or later through the graphics 
                         concepts: blending, transform feedback, tessellation and 
                         instancing. In this paper we assess our conjecture by implementing 
                         an N-body simulation with both approaches. We indeed devise a 
                         novel non-graphics application to the tessellation hardware and 
                         the instanced rendering circuit. Instead of refining a mesh, we 
                         use the abstract patch for gaining direct accesses to shared 
                         memory. In the place of drawing multiple objects, we apply the 
                         instanced rendering technology for improving sequential data 
                         accesses. Comparative timing analysis is provided. We believe that 
                         these results provide better understanding of the graphics 
                         features that are useful for closing the performance gap between 
                         OpenGL and a GPGPU architecture, and open a new perspective on 
                         implementing solely with the OpenGL graphics applications that 
                         require both intense, but pre-specified, memory accesses and 3D 
                         graphics rendering.",
  conference-location = "Niter{\'o}i, RJ",
      conference-year = "Oct. 17-20, 2017",
             language = "en",
           targetfile = "30-camera-ready.pdf",
        urlaccessdate = "2021, Jan. 25"