@article{RDM,
      recid = {9},
      author = {Cabrera, Anthony M and Faber, Clayton and Cepeda, Kyle and  Deber, Robert and Epstein, Cooper and Zheng, Jason and  Cytron, Ron K and Chamberlain, Roger},
      title = {Data Integration Benchmark Suite v1},
      address = {2018-02-18},
      number = {RDM},
      pages = {179.4 MB},
      abstract = {Analyzing big data is a task encountered across  disciplines. Addressing the challenges inherent in dealing  with big data necessitate solutions that cover its three  defining properties: volume, variety, and velocity.  However, what is less understood is the treatment of the  data that must be completed even before any analysis can  begin. Specifically, there is often a non-trivial amount of  time and resources that are utilized to the end of  retrieving and preprocessing big data. This problem, known  collectively as data integration, is a term frequently used  for the general problem of taking data in some initial form  and transforming it into a desired form. Examples of this  include the rearranging of fields, changing the form of  expression of one or more fields, altering the boundary  notation of records and/or fields, encrypting or decrypting  records and/or fields, parsing non-record data and  organizing it into a record-oriented form, etc. In this  work, we present our progress in creating a benchmarking  suite that characterizes a diverse set of data integration  applications.},
      url = {http://data.library.wustl.edu/record/9},
      doi = {https://doi.org/10.7936/K7NZ8715},
}