@inproceedings{e1948ace04d946a4959167af46bc0990,
title = "Efficient yet accurate clustering",
abstract = "In this paper we show that most hierarchical agglomerative clustering (HAC) algorithms follow a 90-10 rule where roughly 90% iterations from the beginning merge cluster pairs with dissimilarity less than 10% of the maximum dissimilarity. We propose two algorithms - 2-phase and nested - based on partially overlapping partitioning (POP). To handle high-dimensional data eficiently, we propose a tree structure particularly suitable for POP. Extensive experiments show that the proposed algorithms reduce the time and memory requirement of existing HAC algorithms significantly without compromising in accuracy.",
author = "Manoranjan Dash and Tan, {Kian Lee} and Huan Liu",
year = "2001",
language = "English (US)",
isbn = "0769511198",
series = "Proceedings - IEEE International Conference on Data Mining, ICDM",
pages = "99--106",
booktitle = "Proceedings - 2001 IEEE International Conference on Data Mining, ICDM'01",
note = "1st IEEE International Conference on Data Mining, ICDM'01 ; Conference date: 29-11-2001 Through 02-12-2001",
}