Large-scale HPC simulations of plasma dynamics in fusion devices require efficient parallel I/O to avoid slowing down the simulation and to enable the post-processing of critical information. Such complex simulations lacking parallel I/O capabilities may encounter performance bottlenecks, hindering their effectiveness in data-intensive computing tasks. In this work, we focus on introducing and enhancing the efficiency of parallel I/O operations in Particle-in-Cell Monte Carlo simu-lations. We first evaluate the scalability of BIT1, a massively-parallel electrostatic PIC MC code, determining its initial write throughput capabilities and performance bottlenecks using an HPC I/O performance monitoring tool, Darshan. We design and develop an adaptor to the openPMD I/O interface that allows us to stream PIC particle and field information to I/O using the BP4 backend, aggressively optimized for I/O efficiency, including the highly efficient ADIOS2 interface. Next, we explore advanced optimization techniques such as data compression, aggregation, and Lustre file striping, achieving write throughput improvements while enhancing data storage efficiency. Finally, we analyze the enhanced high-throughput parallel I/O and storage capabilities achieved through the integration of openPMD with rapid metadata extraction in BP4 format. Our study demonstrates that the integration of openPMD and advanced I/O optimizations significantly enhances BIT1's I/O performance and storage capabilities, successfully introducing high throughput parallel I/O and surpassing the capabilities of traditional file I/O.
%0 Conference Paper
%1 10740837
%A Williams, Jeremy J.
%A Medeiros, Daniel
%A Costea, Stefan
%A Tskhakaya, David
%A Poeschel, Franz
%A Widera, René
%A Huebl, Axel
%A Klasky, Scott
%A Podhorszki, Norbert
%A Kos, Leon
%A Podolnik, Ales
%A Hromadka, Jakub
%A Narwal, Tapish
%A Steiniger, Klaus
%A Bussmann, Michael
%A Laure, Erwin
%A Markidis, Stefano
%B 2024 IEEE International Conference on Cluster Computing Workshops (CLUSTER Workshops)
%D 2024
%K Zno imported
%P 86-95
%R 10.1109/CLUSTERWorkshops61563.2024.00022
%T Enabling High- Throughput Parallel I/O in Particle-in-Cell Monte Carlo Simulations with openPMD and Darshan I/O Monitoring
%X Large-scale HPC simulations of plasma dynamics in fusion devices require efficient parallel I/O to avoid slowing down the simulation and to enable the post-processing of critical information. Such complex simulations lacking parallel I/O capabilities may encounter performance bottlenecks, hindering their effectiveness in data-intensive computing tasks. In this work, we focus on introducing and enhancing the efficiency of parallel I/O operations in Particle-in-Cell Monte Carlo simu-lations. We first evaluate the scalability of BIT1, a massively-parallel electrostatic PIC MC code, determining its initial write throughput capabilities and performance bottlenecks using an HPC I/O performance monitoring tool, Darshan. We design and develop an adaptor to the openPMD I/O interface that allows us to stream PIC particle and field information to I/O using the BP4 backend, aggressively optimized for I/O efficiency, including the highly efficient ADIOS2 interface. Next, we explore advanced optimization techniques such as data compression, aggregation, and Lustre file striping, achieving write throughput improvements while enhancing data storage efficiency. Finally, we analyze the enhanced high-throughput parallel I/O and storage capabilities achieved through the integration of openPMD with rapid metadata extraction in BP4 format. Our study demonstrates that the integration of openPMD and advanced I/O optimizations significantly enhances BIT1's I/O performance and storage capabilities, successfully introducing high throughput parallel I/O and surpassing the capabilities of traditional file I/O.
@inproceedings{10740837,
abstract = {Large-scale HPC simulations of plasma dynamics in fusion devices require efficient parallel I/O to avoid slowing down the simulation and to enable the post-processing of critical information. Such complex simulations lacking parallel I/O capabilities may encounter performance bottlenecks, hindering their effectiveness in data-intensive computing tasks. In this work, we focus on introducing and enhancing the efficiency of parallel I/O operations in Particle-in-Cell Monte Carlo simu-lations. We first evaluate the scalability of BIT1, a massively-parallel electrostatic PIC MC code, determining its initial write throughput capabilities and performance bottlenecks using an HPC I/O performance monitoring tool, Darshan. We design and develop an adaptor to the openPMD I/O interface that allows us to stream PIC particle and field information to I/O using the BP4 backend, aggressively optimized for I/O efficiency, including the highly efficient ADIOS2 interface. Next, we explore advanced optimization techniques such as data compression, aggregation, and Lustre file striping, achieving write throughput improvements while enhancing data storage efficiency. Finally, we analyze the enhanced high-throughput parallel I/O and storage capabilities achieved through the integration of openPMD with rapid metadata extraction in BP4 format. Our study demonstrates that the integration of openPMD and advanced I/O optimizations significantly enhances BIT1's I/O performance and storage capabilities, successfully introducing high throughput parallel I/O and surpassing the capabilities of traditional file I/O.},
added-at = {2025-01-15T10:48:59.000+0100},
author = {Williams, Jeremy J. and Medeiros, Daniel and Costea, Stefan and Tskhakaya, David and Poeschel, Franz and Widera, René and Huebl, Axel and Klasky, Scott and Podhorszki, Norbert and Kos, Leon and Podolnik, Ales and Hromadka, Jakub and Narwal, Tapish and Steiniger, Klaus and Bussmann, Michael and Laure, Erwin and Markidis, Stefano},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/20321d0e13b1a8fa14a3e777e410e18c0/scadsfct},
booktitle = {2024 IEEE International Conference on Cluster Computing Workshops (CLUSTER Workshops)},
doi = {10.1109/CLUSTERWorkshops61563.2024.00022},
interhash = {34a6fc46de670bef9c45de63cdf5110b},
intrahash = {0321d0e13b1a8fa14a3e777e410e18c0},
keywords = {Zno imported},
month = {Sep.},
pages = {86-95},
timestamp = {2025-01-29T12:19:47.000+0100},
title = {Enabling High- Throughput Parallel I/O in Particle-in-Cell Monte Carlo Simulations with openPMD and Darshan I/O Monitoring},
year = 2024
}