@inproceedings{44c8640190ea45d081f47df5f51795f6,
title = "Comparing mapreduce-based k-NN similarity joins on hadoop for high-dimensional data",
abstract = "Similarity joins represent a useful operator for data mining, data analysis and data exploration applications. With the exponential growth of data to be analyzed, distributed approaches like MapReduce are required. So far, the state-of-the-art similarity join approaches based on MapReduce mainly focused on the processing of vector data with less than one hundred dimensions. In this paper, we revisit and investigate the performance of different MapReduce-based approximate k-NN similarity join approaches on Apache Hadoop for large volumes of high-dimensional vector data.",
keywords = "Approximate similarity join, HTTPS data, Hadoop, K-NN, MapReduce",
author = "P{\v r}emysl {\v C}ech and Jakub Marou{\v s}ek and Jakub Loko{\v c} and Yasin Silva and Jeremy Starks",
note = "Funding Information: This project was supported by the GA{\v C}R 15-08916S and GAUK Publisher Copyright: {\textcopyright} Springer International Publishing AG 2017.; 13th International Conference on Advanced Data Mining and Applications, ADMA 2017 ; Conference date: 05-11-2017 Through 06-11-2017",
year = "2017",
doi = "10.1007/978-3-319-69179-4_5",
language = "English (US)",
isbn = "9783319691787",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "63--75",
editor = "Wen-Chih Peng and Zhang, {Wei Emma} and Gao Cong and Aixin Sun and Chengliang Li",
booktitle = "Advanced Data Mining and Applications - 13th International Conference, ADMA 2017, Proceedings",
}