@inproceedings{a3b61f92bc474f109299750bf73a4ec7,
title = "Data-driven generation of decision trees for motif-based assignment of protein sequences to functional families",
abstract = "This paper describes an approach to data-driven discovery of sequence motif-based models in the form of decision trees for assigning protein sequences to functional families. Unlike approaches that try to classify protein sequences based on presence of a single motif, this method is able to capture regularities that can be described in terms of presence or absence of arbitrary combinations of motifs. A training set of sequences with known functions is used to automatically construct decision trees that capture regularities that are sufficient to assign the sequences to their respective functional families. The accuracy of the resulting decision tree classifiers are then evaluated on an independent test set. Experimental using several protein data sets indicate that proposed approach matches or beats the technique of assigning protein sequences to functional families based on the presence of a single characteristic motif in terms of the accuracy of resulting classification.",
author = "Dake Wang and Xiangyun Wang and Vasant Honavar and Dobbs, {Drena L.}",
year = "2001",
language = "English (US)",
isbn = "0970789009",
series = "Proceedings of the Atlantic Symposium on Computational Biology and Genome Information Systems and Technolgoy, CBGIST 2001",
pages = "53--58",
editor = "C.H. Wu and P.P. Wang and J.T.L. Wang",
booktitle = "Proceedings of the Atlantic Symposium on Computational Biology and Genome Information Systems and Technology, CBGIST 2001",
note = "Proceedings of the Atlantic Symposium on Computational Biology and Genome Information Systems and Technology, GBGIST 2001 ; Conference date: 15-03-2001 Through 17-03-2001",
}