@inproceedings{5ca58e3f491e4f4c867b65db7dca8459,
title = "Efficient identification of web communities",
abstract = "We define a community on the web as a set of sites that have more links (in either direction) to members of the community than to non-members. Members of such a community can be efficiently identified in a maximum flow / minimum cut framework, where the source is composed of known members, and the sink consists of well-known non-members. A focused crawler that crawls to a fixed depth can approximate community membership by augmenting the graph induced by the crawl with links to a virtual sink node. The effectiveness of the approximation algorithm is demonstrated with several crawl results that identify hubs, authorities, web rings, and other link topologies that are useful but not easily categorized. Applications of our approach include focused crawlers and search engines, automatic population of portal categories, and improved filtering.",
author = "Flake, {Gary William} and Steve Lawrence and Giles, {C. Lee}",
year = "2000",
doi = "10.1145/347090.347121",
language = "English (US)",
isbn = "1581132336",
series = "Proceeding of the Sixth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
publisher = "Association for Computing Machinery (ACM)",
pages = "150--160",
editor = "R. Ramakrishnan and S. Stolfo and R. Bayardo and I. Parsa and R. Ramakrishnan and S. Stolfo and R. Bayardo and I. Parsa",
booktitle = "Proceeding of the Sixth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
address = "United States",
note = "Proceedings of the Sixth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD-2001) ; Conference date: 20-08-2000 Through 23-08-2000",
}