This paper assesses and reports the experience of ten teams working to port, validate, and benchmark several High Performance Computing applications on a novel GPU-accelerated Arm testbed system. The testbed consists of eight NVIDIA Arm HPC Developer Kit systems, each one equipped with a server-class Arm CPU from Ampere Computing and two data center GPUs from NVIDIA Corp. The systems are connected together using InfiniBand interconnect. The selected applications and mini-apps are written using several programming languages and use multiple accelerator-based programming models for GPUs such as CUDA, OpenACC, and OpenMP offloading. Working on application porting requires a robust and easy-to-access programming environment, including a variety of compilers and optimized scientific libraries. The goal of this work is to evaluate platform readiness and assess the effort required from developers to deploy well-established scientific workloads on current and future generation Arm-based GPU-accelerated HPC systems. The reported case studies demonstrate that the current level of maturity and diversity of software and tools is already adequate for large-scale production deployments.
%0 Conference Paper
%1 10.1145/3581576.3581621
%A Elwasif, Wael
%A Godoy, William
%A Hagerty, Nick
%A Harris, J. Austin
%A Hernandez, Oscar
%A Joo, Balint
%A Kent, Paul
%A Lebrun-Grandie, Damien
%A Maccarthy, Elijah
%A Melesse Vergara, Veronica
%A Messer, Bronson
%A Miller, Ross
%A Oral, Sarp
%A Bastrakov, Sergei
%A Bussmann, Michael
%A Debus, Alexander
%A Steiniger, Klaus
%A Stephan, Jan
%A Widera, Rene
%A Bryngelson, Spencer
%A Le Berre, Henry
%A Radhakrishnan, Anand
%A Young, Jeffrey
%A Chandrasekaran, Sunita
%A Ciorba, Florina
%A Simsek, Osman
%A Clark, Kate
%A Spiga, Filippo
%A Hammond, Jeff
%A John, Stone
%A Hardy, David
%A Keller, Sebastian
%A Piccinali, Jean-Guillaume
%A Trott, Christian
%B Proceedings of the HPC Asia 2023 Workshops
%C New York, NY, USA
%D 2023
%I Association for Computing Machinery
%K Zno imported
%P 35–49
%R 10.1145/3581576.3581621
%T Application Experiences on a GPU-Accelerated Arm-based HPC Testbed
%U https://doi.org/10.1145/3581576.3581621
%X This paper assesses and reports the experience of ten teams working to port, validate, and benchmark several High Performance Computing applications on a novel GPU-accelerated Arm testbed system. The testbed consists of eight NVIDIA Arm HPC Developer Kit systems, each one equipped with a server-class Arm CPU from Ampere Computing and two data center GPUs from NVIDIA Corp. The systems are connected together using InfiniBand interconnect. The selected applications and mini-apps are written using several programming languages and use multiple accelerator-based programming models for GPUs such as CUDA, OpenACC, and OpenMP offloading. Working on application porting requires a robust and easy-to-access programming environment, including a variety of compilers and optimized scientific libraries. The goal of this work is to evaluate platform readiness and assess the effort required from developers to deploy well-established scientific workloads on current and future generation Arm-based GPU-accelerated HPC systems. The reported case studies demonstrate that the current level of maturity and diversity of software and tools is already adequate for large-scale production deployments.
%@ 9781450399890
@inproceedings{10.1145/3581576.3581621,
abstract = {This paper assesses and reports the experience of ten teams working to port, validate, and benchmark several High Performance Computing applications on a novel GPU-accelerated Arm testbed system. The testbed consists of eight NVIDIA Arm HPC Developer Kit systems, each one equipped with a server-class Arm CPU from Ampere Computing and two data center GPUs from NVIDIA Corp. The systems are connected together using InfiniBand interconnect. The selected applications and mini-apps are written using several programming languages and use multiple accelerator-based programming models for GPUs such as CUDA, OpenACC, and OpenMP offloading. Working on application porting requires a robust and easy-to-access programming environment, including a variety of compilers and optimized scientific libraries. The goal of this work is to evaluate platform readiness and assess the effort required from developers to deploy well-established scientific workloads on current and future generation Arm-based GPU-accelerated HPC systems. The reported case studies demonstrate that the current level of maturity and diversity of software and tools is already adequate for large-scale production deployments.},
added-at = {2025-01-15T10:42:22.000+0100},
address = {New York, NY, USA},
author = {Elwasif, Wael and Godoy, William and Hagerty, Nick and Harris, J. Austin and Hernandez, Oscar and Joo, Balint and Kent, Paul and Lebrun-Grandie, Damien and Maccarthy, Elijah and Melesse Vergara, Veronica and Messer, Bronson and Miller, Ross and Oral, Sarp and Bastrakov, Sergei and Bussmann, Michael and Debus, Alexander and Steiniger, Klaus and Stephan, Jan and Widera, Rene and Bryngelson, Spencer and Le Berre, Henry and Radhakrishnan, Anand and Young, Jeffrey and Chandrasekaran, Sunita and Ciorba, Florina and Simsek, Osman and Clark, Kate and Spiga, Filippo and Hammond, Jeff and John, Stone and Hardy, David and Keller, Sebastian and Piccinali, Jean-Guillaume and Trott, Christian},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/25a33ece71192f8c214c69dce305acbac/scadsfct},
booktitle = {Proceedings of the HPC Asia 2023 Workshops},
doi = {10.1145/3581576.3581621},
interhash = {c837f8bb3a9179617bbcb20a6a41bc66},
intrahash = {5a33ece71192f8c214c69dce305acbac},
isbn = {9781450399890},
keywords = {Zno imported},
location = {Raffles Blvd, Singapore},
numpages = {15},
pages = {35–49},
publisher = {Association for Computing Machinery},
series = {HPCAsia '23 Workshops},
timestamp = {2025-01-29T12:30:10.000+0100},
title = {Application Experiences on a GPU-Accelerated Arm-based HPC Testbed},
url = {https://doi.org/10.1145/3581576.3581621},
year = 2023
}