Validation metrics are key for tracking scientific progress and
bridging the current chasm between artificial intelligence
research and its translation into practice. However, increasing
evidence shows that, particularly in image analysis, metrics are
often chosen inadequately. Although taking into account the
individual strengths, weaknesses and limitations of validation
metrics is a critical prerequisite to making educated choices,
the relevant knowledge is currently scattered and poorly
accessible to individual researchers. Based on a multistage
Delphi process conducted by a multidisciplinary expert
consortium as well as extensive community feedback, the present
work provides a reliable and comprehensive common point of
access to information on pitfalls related to validation metrics
in image analysis. Although focused on biomedical image
analysis, the addressed pitfalls generalize across application
domains and are categorized according to a newly created,
domain-agnostic taxonomy. The work serves to enhance global
comprehension of a key topic in image analysis validation.
%0 Journal Article
%1 Reinke2024-ms
%A Reinke, Annika
%A Tizabi, Minu D
%A Baumgartner, Michael
%A Eisenmann, Matthias
%A Heckmann-Nötzel, Doreen
%A Kavur, A Emre
%A Rädsch, Tim
%A Sudre, Carole H
%A Acion, Laura
%A Antonelli, Michela
%A Arbel, Tal
%A Bakas, Spyridon
%A Benis, Arriel
%A Buettner, Florian
%A Cardoso, M Jorge
%A Cheplygina, Veronika
%A Chen, Jianxu
%A Christodoulou, Evangelia
%A Cimini, Beth A
%A Farahani, Keyvan
%A Ferrer, Luciana
%A Galdran, Adrian
%A van Ginneken, Bram
%A Glocker, Ben
%A Godau, Patrick
%A Hashimoto, Daniel A
%A Hoffman, Michael M
%A Huisman, Merel
%A Isensee, Fabian
%A Jannin, Pierre
%A Kahn, Charles E
%A Kainmueller, Dagmar
%A Kainz, Bernhard
%A Karargyris, Alexandros
%A Kleesiek, Jens
%A Kofler, Florian
%A Kooi, Thijs
%A Kopp-Schneider, Annette
%A Kozubek, Michal
%A Kreshuk, Anna
%A Kurc, Tahsin
%A Landman, Bennett A
%A Litjens, Geert
%A Madani, Amin
%A Maier-Hein, Klaus
%A Martel, Anne L
%A Meijering, Erik
%A Menze, Bjoern
%A Moons, Karel G M
%A Müller, Henning
%A Nichyporuk, Brennan
%A Nickel, Felix
%A Petersen, Jens
%A Rafelski, Susanne M
%A Rajpoot, Nasir
%A Reyes, Mauricio
%A Riegler, Michael A
%A Rieke, Nicola
%A Saez-Rodriguez, Julio
%A Sánchez, Clara I
%A Shetty, Shravya
%A Summers, Ronald M
%A Taha, Abdel A
%A Tiulpin, Aleksei
%A Tsaftaris, Sotirios A
%A Van Calster, Ben
%A Varoquaux, Gaël
%A Yaniv, Ziv R
%A Jäger, Paul F
%A Maier-Hein, Lena
%D 2024
%I Springer Science and Business Media LLC
%J Nat. Methods
%K Zno analysis image pitfalls validation
%N 2
%P 182--194
%R https://doi.org/10.1038/s41592-023-02150-0
%T Understanding metric-related pitfalls in image analysis validation
%V 21
%X Validation metrics are key for tracking scientific progress and
bridging the current chasm between artificial intelligence
research and its translation into practice. However, increasing
evidence shows that, particularly in image analysis, metrics are
often chosen inadequately. Although taking into account the
individual strengths, weaknesses and limitations of validation
metrics is a critical prerequisite to making educated choices,
the relevant knowledge is currently scattered and poorly
accessible to individual researchers. Based on a multistage
Delphi process conducted by a multidisciplinary expert
consortium as well as extensive community feedback, the present
work provides a reliable and comprehensive common point of
access to information on pitfalls related to validation metrics
in image analysis. Although focused on biomedical image
analysis, the addressed pitfalls generalize across application
domains and are categorized according to a newly created,
domain-agnostic taxonomy. The work serves to enhance global
comprehension of a key topic in image analysis validation.
@article{Reinke2024-ms,
abstract = {Validation metrics are key for tracking scientific progress and
bridging the current chasm between artificial intelligence
research and its translation into practice. However, increasing
evidence shows that, particularly in image analysis, metrics are
often chosen inadequately. Although taking into account the
individual strengths, weaknesses and limitations of validation
metrics is a critical prerequisite to making educated choices,
the relevant knowledge is currently scattered and poorly
accessible to individual researchers. Based on a multistage
Delphi process conducted by a multidisciplinary expert
consortium as well as extensive community feedback, the present
work provides a reliable and comprehensive common point of
access to information on pitfalls related to validation metrics
in image analysis. Although focused on biomedical image
analysis, the addressed pitfalls generalize across application
domains and are categorized according to a newly created,
domain-agnostic taxonomy. The work serves to enhance global
comprehension of a key topic in image analysis validation.},
added-at = {2025-01-08T12:24:33.000+0100},
author = {Reinke, Annika and Tizabi, Minu D and Baumgartner, Michael and Eisenmann, Matthias and Heckmann-N{\"o}tzel, Doreen and Kavur, A Emre and R{\"a}dsch, Tim and Sudre, Carole H and Acion, Laura and Antonelli, Michela and Arbel, Tal and Bakas, Spyridon and Benis, Arriel and Buettner, Florian and Cardoso, M Jorge and Cheplygina, Veronika and Chen, Jianxu and Christodoulou, Evangelia and Cimini, Beth A and Farahani, Keyvan and Ferrer, Luciana and Galdran, Adrian and van Ginneken, Bram and Glocker, Ben and Godau, Patrick and Hashimoto, Daniel A and Hoffman, Michael M and Huisman, Merel and Isensee, Fabian and Jannin, Pierre and Kahn, Charles E and Kainmueller, Dagmar and Kainz, Bernhard and Karargyris, Alexandros and Kleesiek, Jens and Kofler, Florian and Kooi, Thijs and Kopp-Schneider, Annette and Kozubek, Michal and Kreshuk, Anna and Kurc, Tahsin and Landman, Bennett A and Litjens, Geert and Madani, Amin and Maier-Hein, Klaus and Martel, Anne L and Meijering, Erik and Menze, Bjoern and Moons, Karel G M and M{\"u}ller, Henning and Nichyporuk, Brennan and Nickel, Felix and Petersen, Jens and Rafelski, Susanne M and Rajpoot, Nasir and Reyes, Mauricio and Riegler, Michael A and Rieke, Nicola and Saez-Rodriguez, Julio and S{\'a}nchez, Clara I and Shetty, Shravya and Summers, Ronald M and Taha, Abdel A and Tiulpin, Aleksei and Tsaftaris, Sotirios A and Van Calster, Ben and Varoquaux, Ga{\"e}l and Yaniv, Ziv R and J{\"a}ger, Paul F and Maier-Hein, Lena},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/259637724e36e5d9239fbb539b63412e5/scadsfct},
copyright = {https://www.springernature.com/gp/researchers/text-and-data-mining},
doi = {https://doi.org/10.1038/s41592-023-02150-0},
interhash = {c7796f70ca3c1d87a0a14e41f764cec4},
intrahash = {59637724e36e5d9239fbb539b63412e5},
journal = {Nat. Methods},
keywords = {Zno analysis image pitfalls validation},
language = {en},
month = feb,
number = 2,
pages = {182--194},
publisher = {Springer Science and Business Media LLC},
timestamp = {2025-01-29T14:37:55.000+0100},
title = {Understanding metric-related pitfalls in image analysis validation},
volume = 21,
year = 2024
}