@inproceedings{1fda0d816d0c4a648af2909d60667b44,
title = "Integration of fast-evolving data sources using a deep learning approach",
abstract = "Data scientists spent 80–90% of their efforts in data integration and there is still no end-to-end automatic integration and wrangling pipeline working for a large number of data sources. This work proposes a data integration system that transforms fast-evolving raw data sources to user desired tables. Based on a set of pre-trained models, a user only needs to specify the schema of the outcome feature vector as well as a few examples of rows, the system will automatically generate the outcome table from the raw data sources. The training process is automatically injected with provisioned schema evolution so that the model is resistant to data source changes. Our experiments show that the proposed approach is particularly effective for the integration of data with fast evolving schemas.",
keywords = "Data integration, Deep learning, Schema evolution",
author = "Zijie Wang and Lixi Zhou and Jia Zou",
note = "Publisher Copyright: {\textcopyright} 2020, Springer Nature Switzerland AG.; 4th International Workshop on Software Foundations for Data Interoperability, SFDI 2020 and 2nd International Workshop on Large Scale Graph Data Analytics, LSGDA 2020, held in Conjunction with VLDB 2020 ; Conference date: 04-09-2020 Through 04-09-2020",
year = "2020",
doi = "10.1007/978-3-030-61133-0_14",
language = "English (US)",
isbn = "9783030611323",
series = "Communications in Computer and Information Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "172--186",
editor = "Lu Qin and Wenjie Zhang and Ying Zhang and You Peng and Hiroyuki Kato and Wei Wang and Chuan Xiao",
booktitle = "Software Foundations for Data Interoperability and Large Scale Graph Data Analytics - 4th International Workshop, SFDI 2020, and 2nd International Workshop, LSGDA 2020, held in Conjunction with VLDB 2020, Proceedings",
address = "Germany",
}