Metadata

Close
Metadata
@InProceedings{Mesquita:2017:ViSeOb,
               author = "Mesquita, Rafael Galv{\~a}o de",
          affiliation = "{Universidade Federal de Pernambuco}",
                title = "Visual Search for Object Instances Guided by Visual Attention 
                         Algorithms",
            booktitle = "Proceedings...",
                 year = "2017",
               editor = "Torchelsen, Rafael Piccin and Nascimento, Erickson Rangel do and 
                         Panozzo, Daniele and Liu, Zicheng and Farias, Myl{\`e}ne and 
                         Viera, Thales and Sacht, Leonardo and Ferreira, Nivan and Comba, 
                         Jo{\~a}o Luiz Dihl and Hirata, Nina and Schiavon Porto, Marcelo 
                         and Vital, Creto and Pagot, Christian Azambuja and Petronetto, 
                         Fabiano and Clua, Esteban and Cardeal, Fl{\'a}vio",
         organization = "Conference on Graphics, Patterns and Images, 30. (SIBGRAPI)",
            publisher = "Sociedade Brasileira de Computa{\c{c}}{\~a}o",
              address = "Porto Alegre",
             keywords = "Visual search. saliency detection. visual attention. object 
                         recognition. local feature detectors/descriptors. matching.",
             abstract = "Visual attention is the process by which the human brain 
                         prioritizes and controls visual stimuli and it is, among other 
                         characteristics of the visual system, responsible for the fast way 
                         in which human beings interact with the environment, even 
                         considering a large amount of information to be processed. Visual 
                         attention can be driven by a bottom-up mechanism, in which low 
                         level stimuli of the analysed scene, like color, guides the 
                         focused region to salient regions (regions that are distinguished 
                         from its neighborhood or from the whole scene); or by a top-down 
                         mechanism, in which cognitive factors, like expectations or the 
                         goal of concluding certain task, define the attended location. 
                         This Thesis investigates the use of visual attention algorithms to 
                         guide (and to accelerate) the search for objects in digital 
                         images. Inspired by the bottom-up mechanism, a saliency detector 
                         based on the estimative of the scenes background combined with the 
                         result of a Laplacian-based operator, referred as BLS (Background 
                         Laplacian Saliency), is proposed. Moreover, a modification in SURF 
                         (Speeded-Up Robust Features) local feature detector/descriptor, 
                         named as patch-based SURF, is designed so that the recognition 
                         occurs iteratively in each focused location of the scene, instead 
                         of performing the classical recognition (classic search), in which 
                         the whole scene is analysed at once. The search mode in which the 
                         patch-based SURF is applied and the order of the regions of the 
                         image to be analysed is defined by a saliency detection algorithm 
                         is called BGMS. The BLS and nine other state-of-the-art saliency 
                         detection algorithms are experimented in the BGMS. Results 
                         indicate, in average, a reduction to (i) 73% of the classic search 
                         processing time just by applying patch-based SURF in a random 
                         search, (ii) and to 53% of this time when the search is guided by 
                         BLS. When using other state-of-the-art saliency detection 
                         algorithms, between 55% and 133% of the processing time of the 
                         classic search is needed to perform recognition. Moreover, 
                         inspired by the top-down mechanism, it is proposed the BGCO, in 
                         which the visual search occurs by prioritizing scene descriptors 
                         according to its Hamming distance to the descriptors of a given 
                         target object. The BGCO uses Bloom filters to represent feature 
                         vectors that are similar to the descriptors of the searched object 
                         and it has constant space and time complexity in relation to the 
                         number of elements in the set of the descriptors of the target. 
                         Experiments showed a reduction in the processing time to 80% of 
                         the required time when the classic search is performed. Finally, 
                         by using the BGMS and the BGCO in an integrated way, the 
                         processing time of the search was reduced to 44% of the execution 
                         time required by the classic search.",
  conference-location = "Niter{\'o}i, RJ, Brazil",
      conference-year = "17-20 Oct. 2017",
             language = "en",
                  ibi = "8JMKD3MGPAW/3PJ97CE",
                  url = "http://urlib.net/ibi/8JMKD3MGPAW/3PJ97CE",
           targetfile = "MesquitaMello_final.pdf",
        urlaccessdate = "2025, July 12"
}