Each year vast international resources are wasted on irreproducible research. The scientific community has been slow to adopt standard software engineering practices, despite the increases in high-dimensional data, complexities of workflows, and computational environments. Here we show how scientific software applications can be created in a reproducible manner when simple design goals for reproducibility are met. We describe the implementation of a test server framework and 40 scientific benchmarks, covering numerous applications in Rosetta bio-macromolecular modeling. High performance computing cluster integration allows these benchmarks to run continuously and automatically. Detailed protocol captures are useful for developers and users of Rosetta and other macromolecular modeling tools. The framework and design concepts presented here are valuable for developers and users of any type of scientific software and for the scientific community to create reproducible methods. Specific examples highlight the utility of this framework, and the comprehensive documentation illustrates the ease of adding new tests in a matter of hours.
%0 Journal Article
%1 Koehler_Leman2021-zc
%A Koehler Leman, Julia
%A Lyskov, Sergey
%A Lewis, Steven M
%A Adolf-Bryfogle, Jared
%A Alford, Rebecca F
%A Barlow, Kyle
%A Ben-Aharon, Ziv
%A Farrell, Daniel
%A Fell, Jason
%A Hansen, William A
%A Harmalkar, Ameya
%A Jeliazkov, Jeliazko
%A Kuenze, Georg
%A Krys, Justyna D
%A Ljubetic, Ajasja
%A Loshbaugh, Amanda L
%A Maguire, Jack
%A Moretti, Rocco
%A Mulligan, Vikram Khipple
%A Nance, Morgan L
%A Nguyen, Phuong T
%A Ó Conchúir, Shane
%A Roy Burman, Shourya S
%A Samanta, Rituparna
%A Smith, Shannon T
%A Teets, Frank
%A Tiemann, Johanna K S
%A Watkins, Andrew
%A Woods, Hope
%A Yachnin, Brahm J
%A Bahl, Christopher D
%A Bailey-Kellogg, Chris
%A Baker, David
%A Das, Rhiju
%A DiMaio, Frank
%A Khare, Sagar D
%A Kortemme, Tanja
%A Labonte, Jason W
%A Lindorff-Larsen, Kresten
%A Meiler, Jens
%A Schief, William
%A Schueler-Furman, Ora
%A Siegel, Justin B
%A Stein, Amelie
%A Yarov-Yarovoy, Vladimir
%A Kuhlman, Brian
%A Leaver-Fay, Andrew
%A Gront, Dominik
%A Gray, Jeffrey J
%A Bonneau, Richard
%D 2021
%I Springer Science and Business Media LLC
%J Nat. Commun.
%K topic_lifescience
%N 1
%P 6947
%T Ensuring scientific reproducibility in bio-macromolecular modeling via extensive, automated benchmarks
%V 12
%X Each year vast international resources are wasted on irreproducible research. The scientific community has been slow to adopt standard software engineering practices, despite the increases in high-dimensional data, complexities of workflows, and computational environments. Here we show how scientific software applications can be created in a reproducible manner when simple design goals for reproducibility are met. We describe the implementation of a test server framework and 40 scientific benchmarks, covering numerous applications in Rosetta bio-macromolecular modeling. High performance computing cluster integration allows these benchmarks to run continuously and automatically. Detailed protocol captures are useful for developers and users of Rosetta and other macromolecular modeling tools. The framework and design concepts presented here are valuable for developers and users of any type of scientific software and for the scientific community to create reproducible methods. Specific examples highlight the utility of this framework, and the comprehensive documentation illustrates the ease of adding new tests in a matter of hours.
@article{Koehler_Leman2021-zc,
abstract = {Each year vast international resources are wasted on irreproducible research. The scientific community has been slow to adopt standard software engineering practices, despite the increases in high-dimensional data, complexities of workflows, and computational environments. Here we show how scientific software applications can be created in a reproducible manner when simple design goals for reproducibility are met. We describe the implementation of a test server framework and 40 scientific benchmarks, covering numerous applications in Rosetta bio-macromolecular modeling. High performance computing cluster integration allows these benchmarks to run continuously and automatically. Detailed protocol captures are useful for developers and users of Rosetta and other macromolecular modeling tools. The framework and design concepts presented here are valuable for developers and users of any type of scientific software and for the scientific community to create reproducible methods. Specific examples highlight the utility of this framework, and the comprehensive documentation illustrates the ease of adding new tests in a matter of hours.},
added-at = {2024-09-10T11:56:37.000+0200},
author = {Koehler Leman, Julia and Lyskov, Sergey and Lewis, Steven M and Adolf-Bryfogle, Jared and Alford, Rebecca F and Barlow, Kyle and Ben-Aharon, Ziv and Farrell, Daniel and Fell, Jason and Hansen, William A and Harmalkar, Ameya and Jeliazkov, Jeliazko and Kuenze, Georg and Krys, Justyna D and Ljubeti{\v c}, Ajasja and Loshbaugh, Amanda L and Maguire, Jack and Moretti, Rocco and Mulligan, Vikram Khipple and Nance, Morgan L and Nguyen, Phuong T and {\'O} Conch{\'u}ir, Shane and Roy Burman, Shourya S and Samanta, Rituparna and Smith, Shannon T and Teets, Frank and Tiemann, Johanna K S and Watkins, Andrew and Woods, Hope and Yachnin, Brahm J and Bahl, Christopher D and Bailey-Kellogg, Chris and Baker, David and Das, Rhiju and DiMaio, Frank and Khare, Sagar D and Kortemme, Tanja and Labonte, Jason W and Lindorff-Larsen, Kresten and Meiler, Jens and Schief, William and Schueler-Furman, Ora and Siegel, Justin B and Stein, Amelie and Yarov-Yarovoy, Vladimir and Kuhlman, Brian and Leaver-Fay, Andrew and Gront, Dominik and Gray, Jeffrey J and Bonneau, Richard},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2c4e6d4df562111e3a2c4ff3437e01763/scadsfct},
copyright = {https://creativecommons.org/licenses/by/4.0},
interhash = {172d9abf3d46f12ce860ccf4dca5d3e7},
intrahash = {c4e6d4df562111e3a2c4ff3437e01763},
journal = {Nat. Commun.},
keywords = {topic_lifescience},
language = {en},
month = nov,
number = 1,
pages = 6947,
publisher = {Springer Science and Business Media LLC},
timestamp = {2024-09-10T14:02:01.000+0200},
title = {Ensuring scientific reproducibility in bio-macromolecular modeling via extensive, automated benchmarks},
volume = 12,
year = 2021
}