@inproceedings{78a33f59123c4873abcb60635f9d884f,
title = "Large scale author name disambiguation in digital libraries",
abstract = "Person name disambiguation is essential to distinguish between persons that share the same name where unique identifiers are not present. In many domains this is a common problem including digital libraries where the same name can refer to multiple unique authors. Correctly attributing work and citations requires the digital library's database to be disambiguated. In this work we describe a large scale framework for disambiguating author names efficiently and effectively. The framework uses a density based clustering algorithm with a random forest based distance function to clusters unique authors. Effective use of blocking functions allows the clustering algorithm to be run in parallel. In our experiments we show that the framework disambiguates authors of more than 4 million papers in 24 hours.",
author = "Madian Khabsa and Pucktada Treeratpituk and Giles, {C. Lee}",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2nd IEEE International Conference on Big Data, IEEE Big Data 2014 ; Conference date: 27-10-2014 Through 30-10-2014",
year = "2014",
doi = "10.1109/BigData.2014.7004487",
language = "English (US)",
series = "Proceedings - 2014 IEEE International Conference on Big Data, IEEE Big Data 2014",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "41--42",
editor = "Jimmy Lin and Jian Pei and Hu, {Xiaohua Tony} and Wo Chang and Raghunath Nambiar and Charu Aggarwal and Nick Cercone and Vasant Honavar and Jun Huan and Bamshad Mobasher and Saumyadipta Pyne",
booktitle = "Proceedings - 2014 IEEE International Conference on Big Data, IEEE Big Data 2014",
address = "United States",
}