@inproceedings{e85c37b25aa649e28ef4d6ebfab61b26,
title = "Text, topics, and turkers: A consensus measure for statistical topics",
abstract = "Topic modeling is an important tool in social media analysis, allowing researchers to quickly understand large text corpora by investigating the topics underlying them. One of the fundamental problems of topic models lies in how to assess the quality of the topics from the perspective of human interpretability. How well can humans understand the meaning of topics generated by statistical topic modeling algorithms? In this work we advance the study of this question by introducing Topic Consensus: a new measure that calculates the quality of a topic through investigating its consensus with some known topics underlying the data. We view the quality of the topics from three perspectives: 1) topic interpretability, 2) how documents relate to the underlying topics, and 3) how interpretable the topics are when the corpus has an underlying categorization. We provide insights into how well the results of Mechanical Turk match automated methods for calculating topic quality. The probability distribution of the words in the topic best fit the Topic Coherence measure, in terms of both correlation as well as finding the best topics.",
keywords = "Text analysis, Text mining, Topic analysis, Topic modeling",
author = "Fred Morstatter and J{\"u}rgen Pfeffer and Katja Mayer and Huan Liu",
note = "Funding Information: This work is sponsored, in part, by O_ce of Naval Research grant N000141410095 as well as LexisNexis and HPCC Systems.; 26th ACM Conference on Hypertext and Social Media, HT 2015 ; Conference date: 01-09-2015 Through 04-09-2015",
year = "2015",
month = aug,
day = "24",
doi = "10.1145/2700171.2791028",
language = "English (US)",
series = "HT 2015 - Proceedings of the 26th ACM Conference on Hypertext and Social Media",
publisher = "Association for Computing Machinery, Inc",
pages = "123--131",
booktitle = "HT 2015 - Proceedings of the 26th ACM Conference on Hypertext and Social Media",
}