@inproceedings{0ed0efa6c9bd4c21a2bafcd4b002b5bd,
title = "Semantic data querying over NoSQL databases with apache spark",
abstract = "The rapid growth of semantic data in the form of RDF triples demands a scalable distributed storage and efficient query processing engine for its management and reuse. To overcome the limitation of native RDF stores and traditional relational database management systems and scale adequately with the exponential increase in the size of RDF datasets, Big Data processing infrastructure like Hadoop with MapReduce have been used. NoSQL databases such as HBase and Cassandra for storing large-scale RDF data and in-memory data processing to execute SPARQL query as SQL query using Apache Spark is proposed in this paper. This paper presents techniques for distributed RDF data storage and querying schemes for HBase and Cassandra clusters. We also present a compiler that translates SPARQL queries into their Spark SQL equivalent for execution. An empirical comparison of HBase and Cassandra systems using datasets and queries from Berlin SPARQL Benchmark (BSBM) and SPARQL Performance Benchmark (SP2Bench) on Microsoft Azure cloud is presented.",
keywords = "Apache Spark, Hadoop, In-memory RDF processing, Information reuse, NoSQL, SPARQL Querying, Semantic RDF data storage",
author = "Mahmudul Hassan and Srividya Bansal",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 19th IEEE International Conference on Information Reuse and Integration for Data Science, IRI 2018 ; Conference date: 07-07-2018 Through 09-07-2018",
year = "2018",
month = aug,
day = "2",
doi = "10.1109/IRI.2018.00061",
language = "English (US)",
isbn = "9781538626597",
series = "Proceedings - 2018 IEEE 19th International Conference on Information Reuse and Integration for Data Science, IRI 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "364--371",
booktitle = "Proceedings - 2018 IEEE 19th International Conference on Information Reuse and Integration for Data Science, IRI 2018",
}