author = "Bento, Mariana and Souza, Roberto and Frayne, Richard",
          affiliation = "{University of Calgary} and {University of Calgary} and 
                         {University of Calgary}",
                title = "Multicenter Imaging Studies: Automated Approach to Evaluating Data 
                         Variability and the Role of Outliers",
            booktitle = "Proceedings...",
                 year = "2018",
               editor = "Ross, Arun and Gastal, Eduardo S. L. and Jorge, Joaquim A. and 
                         Queiroz, Ricardo L. de and Minetto, Rodrigo and Sarkar, Sudeep and 
                         Papa, Jo{\~a}o Paulo and Oliveira, Manuel M. and Arbel{\'a}ez, 
                         Pablo and Mery, Domingo and Oliveira, Maria Cristina Ferreira de 
                         and Spina, Thiago Vallin and Mendes, Caroline Mazetto and Costa, 
                         Henrique S{\'e}rgio Gutierrez and Mejail, Marta Estela and Geus, 
                         Klaus de and Scheer, Sergio",
         organization = "Conference on Graphics, Patterns and Images, 31. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "multicenter MR data, outlier detection, data variability.",
             abstract = "Magnetic resonance (MR) as well as other imaging modalities have 
                         been used in a large number of clinical and research studies for 
                         the analysis and quantification of important structures and the 
                         detection of abnormalities. In this context, machine learning is 
                         playing an increasingly important role in the development of 
                         automated tools for aiding in image quantification, patient 
                         diagnosis and follow-up. Normally, these techniques require large, 
                         heterogeneous datasets to provide accurate and generalizable 
                         results. Large, multi-center studies, for example, can provide 
                         such data. Images acquired at different centers, however, can 
                         present varying characteristics due to differences in acquisition 
                         parameters, site procedures and scanners configuration. While 
                         variability in the dataset is required to develop robust, 
                         generalizable studies (i.e., independent of the acquisition 
                         parameters or center), like all studies there is also a need to 
                         ensure overall data quality by prospectively identifying and 
                         removing poor-quality data samples that should not be included, 
                         e.g., outliers. We wish to keep image samples that are 
                         representative of the underlying population (so called inliers), 
                         yet removing those samples that are not. We propose a framework to 
                         analyze data variability and identify samples that should be 
                         removed in order to have more representative, reliable and robust 
                         datasets. Our example case study is based on a public dataset 
                         containing T1-weighted volumetric head images data acquired at six 
                         different centers, using three different scanner vendors and at 
                         two commonly used magnetic fields strengths. We propose an 
                         algorithm for assessing data robustness and finding the optimal 
                         data for study occlusion (i.e., the data size that presents with 
                         lowest variability while maintaining generalizability (i.e., using 
                         samples from all sites)).",
  conference-location = "Foz do Igua{\c{c}}u, PR, Brazil",
      conference-year = "Oct. 29 - Nov. 1, 2018",
             language = "en",
           targetfile = "57_manuscript.pdf",
        urlaccessdate = "2020, Dec. 04"