@inproceedings{76b842d7c9d2448886c844b0dc017926,
title = "Redundancy based feature selection for microarray data",
abstract = "In gene expression microarray data analysis, selecting a small number of discriminative genes from thousands of genes is an important problem for accurate classification of diseases or phenotypes. The problem becomes particularly challenging due to the large number of features (genes) and small sample size. Traditional gene selection methods often select the top-ranked genes according to their individual discriminative power without handling the high degree of redundancy among the genes. Latest research shows that removing redundant genes among selected ones can achieve a better representation of the characteristics of the targeted phenotypes and lead to improved classification accuracy. Hence, we study in this paper the relationship between feature relevance and redundancy and propose an efficient method that can effectively remove redundant genes. The efficiency and effectiveness of our method in comparison with representative methods has been demonstrated through an empirical study using public microarray data sets.",
keywords = "Feature redundancy, Gene selection, Microarray data",
author = "Lei Yu and Huan Liu",
year = "2004",
doi = "10.1145/1014052.1014149",
language = "English (US)",
isbn = "1581138881",
series = "KDD-2004 - Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
publisher = "Association for Computing Machinery",
pages = "737--742",
booktitle = "KDD-2004 - Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
note = "KDD-2004 - Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining ; Conference date: 22-08-2004 Through 25-08-2004",
}