@inproceedings{36c979a019c040b4b365446328d4fbe8,
title = "Crowd-sourcing Web knowledge for metadata extraction",
abstract = "We explore a new metadata extraction framework without human annotators with the ground truth harvested from Web. A new training sample is selected based on not only the uncertainty and representativeness in the unlabeled pool, but also on its availability and credibility in Web knowledge bases. We construct a dataset of 4329 books with valid metadata and evaluate our approach using 5 Web book databases as oracles. Empirical results demonstrate its effectiveness and efficiency.",
author = "Zhaohui Wu and Wenyi Huang and Chen Liang and Giles, {C. Lee}",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2014 14th IEEE/ACM Joint Conference on Digital Libraries, JCDL 2014 ; Conference date: 08-09-2014 Through 12-09-2014",
year = "2014",
month = dec,
day = "1",
doi = "10.1109/JCDL.2014.6970160",
language = "English (US)",
series = "Proceedings of the ACM/IEEE Joint Conference on Digital Libraries",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "141--144",
booktitle = "2014 IEEE/ACM Joint Conference on Digital Libraries, JCDL 2014",
address = "United States",
}