@inproceedings{a317292952824fd8a3836fb3a5bb9084,
title = "Exploiting information redundancy to wring out structured data from the web",
abstract = "A large number of web sites publish pages containing structured information about recognizable concepts, but these data are only partially used by current applications. Although such information is spread across a myriad of sources, the web scale implies a relevant redundancy. We present a domain independent system that exploits the redundancy of information to automatically extract and integrate data from the Web. Our solution concentrates on sources that provide structured data about multiple instances from the same conceptual domain, e.g. financial data, product information. Our proposal is based on an original approach that exploits the mutual dependency between the data extraction and the data integration tasks. Experiments confirmed the quality and the feasibility of the approach.",
keywords = "data extraction, data integration, wrapper generation",
author = "Lorenzo Blanco and Mirko Bronzi and Valter Crescenzi and Paolo Merialdo and Paolo Papotti",
year = "2010",
doi = "10.1145/1772690.1772805",
language = "English (US)",
isbn = "9781605587998",
series = "Proceedings of the 19th International Conference on World Wide Web, WWW '10",
pages = "1063--1064",
booktitle = "Proceedings of the 19th International Conference on World Wide Web, WWW '10",
note = "19th International World Wide Web Conference, WWW2010 ; Conference date: 26-04-2010 Through 30-04-2010",
}