@inproceedings{6e2dbb30714f4fdf9eb5adbf24ae5200,
title = "Onto miner: Bootstrapping ontologies from overlapping domain specific web sites",
abstract = "In this paper, we present automated techniques for bootstrapping and populating specialized domain ontologies by organizing and mining a set of relevant overlapping Web sites provided by the user. We develop algorithms that detect and utilize HTML regularities in the Web documents to turn them into hierarchical semantic structures encoded as XML. Next, we present tree-mining algorithms that identify key domain concepts and their taxonomical relationships. We also extract semi-structured concept instances annotated with their labels whenever they are available. Experimental evaluation for the News, Travel, and Shopping domains indicates that our algorithms can bootstrap and populate domain specific ontologies with high precision and recall.",
keywords = "Data mining, Ontology, Semantic web, Web mining",
author = "Hasan Davulcu and Srinivas Vadrevu and Saravanakumar Nagarajan",
year = "2004",
month = may,
day = "19",
doi = "10.1145/1013367.1013545",
language = "English (US)",
series = "Proceedings of the 13th International World Wide Web Conference on Alternate Track, Papers and Posters, WWW Alt. 2004",
publisher = "Association for Computing Machinery, Inc",
pages = "500--501",
booktitle = "Proceedings of the 13th International World Wide Web Conference on Alternate Track, Papers and Posters, WWW Alt. 2004",
note = "13th International World Wide Web Conference on Alternate Track, Papers and Posters, WWW Alt. 2004 ; Conference date: 19-05-2004 Through 21-05-2004",
}