@article{77842a518e50442eb5f7c7908979c324,
title = "Development of a high throughput cloud-based data pipeline for 21 cm cosmology",
abstract = "We present a case study of a cloud-based computational workflow for processing large astronomical data sets from the Murchison Widefield Array (MWA) cosmology experiment. Cloud computing is well-suited to large-scale, episodic computation because it offers extreme scalability in a pay-for-use model. This facilitates fast turnaround times for testing computationally expensive analysis techniques. We describe how we have used the Amazon Web Services (AWS) cloud platform to efficiently and economically test and implement our data analysis pipeline. We discuss the challenges of working with the AWS spot market, which reduces costs at the expense of longer processing turnaround times, and we explore this tradeoff with a Monte Carlo simulation.",
keywords = "Cloud computing, Cosmology, Data analysis, Reionization",
author = "R. Byrne and D. Jacobs",
note = "Funding Information: We thank Nichole Barry, Jon Ringuette, and Michael Wilensky for their contributions to the AWS cloud workflow. Computation on AWS was supported in part by the University of Washington student-led Research Computing Club with funding provided by the University of Washington, United States Student Technology Fee Committee. This project was made possible by computing credits from the Amazon/Square Kilometer Array Astrocompute initiative. We thank Lori Clithero, Aaron Bucher, and Sean Smith of AWS and Rob Fatland and Amanda Tan of the University of Washington e-Science Institute for their support and technical guidance. Finally, many thanks to Karen Jacobs for manually scraping AWS Instance Advisor data and Kyle Aitken for the workflow schematic in Fig. 2. This work was directly supported by National Science Foundation, United States grants AST-1613855, 1506024, 1643011, and 1835421. Funding Information: We thank Nichole Barry, Jon Ringuette, and Michael Wilensky for their contributions to the AWS cloud workflow. Computation on AWS was supported in part by the University of Washington student-led Research Computing Club with funding provided by the University of Washington, United States Student Technology Fee Committee. This project was made possible by computing credits from the Amazon/Square Kilometer Array Astrocompute initiative. We thank Lori Clithero, Aaron Bucher, and Sean Smith of AWS and Rob Fatland and Amanda Tan of the University of Washington e-Science Institute for their support and technical guidance. Finally, many thanks to Karen Jacobs for manually scraping AWS Instance Advisor data and Kyle Aitken for the workflow schematic in Fig. 2 . This work was directly supported by National Science Foundation, United States grants AST-1613855 , 1506024 , 1643011 , and 1835421 . Publisher Copyright: {\textcopyright} 2021 Elsevier B.V.",
year = "2021",
month = jan,
doi = "10.1016/j.ascom.2021.100447",
language = "English (US)",
volume = "34",
journal = "Astronomy and Computing",
issn = "2213-1337",
publisher = "Elsevier BV",
}