@inproceedings{f069d0fa62e84055801272f93d89cf88,
title = "GEM: An Efficient Entity Matching Framework for Geospatial Data",
abstract = "Identifying various mentions of the same real-world locations is known as spatial entity matching. GEM is an end-to-end Geospatial EM framework that matches polygon geometry entities in addition to point geometry type. Blocking, feature vector creation, and classification are the core steps of our system. GEM comprises of an efficient and lightweight blocking technique, GeoPrune, that uses the geohash encoding mechanism. We re-purpose the spatial proximality operators from Apache Sedona to create semantically rich spatial feature vectors. The classification step in GEM is a pluggable component, which consumes a unique feature vector and determines whether the geolocations match or not. We conduct experiments with three classifiers upon multiple large-scale geospatial datasets consisting of both spatial and relational attributes. GEM achieves an F-measure of 1.0 for a point x point dataset with 176k total pairs, which is 42% higher than a state-of-the-art spatial EM baseline. It achieves F-measures of 0.966 and 0.993 for the point x polygon dataset with 302M total pairs, and the polygon x polygon dataset with 16M total pairs respectively.",
keywords = "Apache Sedona, geohash, spatial blocking, spatial entity matching",
author = "Setu Shah and Vamsi Meduri and Mohamed Sarwat",
note = "Publisher Copyright: {\textcopyright} 2021 ACM.; 29th ACM SIGSPATIAL International Conference on Advances in Geographic Information Systems, SIGSPATIAL 2021 ; Conference date: 02-11-2021 Through 05-11-2021",
year = "2021",
month = nov,
day = "2",
doi = "10.1145/3474717.3483973",
language = "English (US)",
series = "GIS: Proceedings of the ACM International Symposium on Advances in Geographic Information Systems",
publisher = "Association for Computing Machinery",
pages = "346--349",
editor = "Xiaofeng Meng and Fusheng Wang and Chang-Tien Lu and Yan Huang and Shashi Shekhar and Xing Xie",
booktitle = "29th ACM SIGSPATIAL International Conference on Advances in Geographic Information Systems, SIGSPATIAL 2021",
}