@inproceedings{f75eae833d0149cbb400d6507e6d4c7e,
title = "Generation of attribute value taxonomies from data for data-driven construction of accurate and compact classifiers",
abstract = "Attribute Value Taxonomies (AVT) have been shown to be useful in constructing compact, robust, and comprehensible classifiers. However, in many application domains, human-designed AVTs are unavailable. We introduce AVT-Learner, an algorithm for automated construction of attribute value taxonomies from data. AVT-Learner uses Hierarchical Agglomerative Clustering (HAC) to cluster attribute values based on the distribution of classes that cooccur with the values. We describe experiments on UCI data sets that compare the performance of AVT-NBL (an AVT-guided Naive Bayes Learner) with that of the standard Naive Bayes Learner (NBL) applied to the original data set. Our results show that the AVTs generated by AVT-Learner are competitive with human-generated AVTs (in cases where such AVTs are available). AVT-NBL using AVTs generated by AVT-Learner achieves classification accuracies that are comparable to or higher than those obtained by NBL; and the resulting classifiers are significantly more compact than those generated by NBL.",
author = "Kang, {Dae Ki} and Adrian Silvescu and Jun Zhang and Vasant Honavar",
year = "2004",
language = "English (US)",
isbn = "0769521428",
series = "Proceedings - Fourth IEEE International Conference on Data Mining, ICDM 2004",
pages = "130--137",
editor = "R. Rastogi and K. Morik and M. Bramer and X. Wu",
booktitle = "Proceedings - Fourth IEEE International Conference on Data Mining, ICDM 2004",
note = "Proceedings - Fourth IEEE International Conference on Data Mining, ICDM 2004 ; Conference date: 01-11-2004 Through 04-11-2004",
}