In recent years, High Performance Computing (HPC) has become increasingly important for many industries and research areas besides ‘classic’ applications. As new domains emerge, applications, implementations and frameworks become more diverse. Generic performance analysis tools often cannot keep up with the development speed of new approaches for workload distribution, offloading, and communication. Some of the new approaches employ their own performance monitoring, which is difficult to integrate into generic tools designed for traditional HPC. Performance measurements often result in a collection of separate performance logs that logically form a unit but cannot intuitively be investigated together with established performance tools. In this paper, we present a tool library that can be used to combine separate performance logs and separately recorded metrics into one single performance log, enabling investigation of such performance data as a unit. Use cases from Big Data processing and AI show the broad applicability of our approach.
%0 Conference Paper
%1 2247db283d6f45fba7c99cd5cec7d80e
%A Frenzel, Jan
%A Kulkarni, Apurv Deepak
%A Döbel, Sebastian
%A Wesarg, Bert
%A Knespel, Maximilian
%A Brunst, Holger
%B Proceedings of 2023 SC Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis, SC Workshops 2023
%C United States of America
%D 2023
%I Association for Computing Machinery (ACM), New York
%K area_architectures FIS_scads domain language, logs, performance performance, software specific trace
%P 1403–1411
%R 10.1145/3624062.3624209
%T FROOM: A Framework of Operators for OTF2 Modification
%X In recent years, High Performance Computing (HPC) has become increasingly important for many industries and research areas besides ‘classic’ applications. As new domains emerge, applications, implementations and frameworks become more diverse. Generic performance analysis tools often cannot keep up with the development speed of new approaches for workload distribution, offloading, and communication. Some of the new approaches employ their own performance monitoring, which is difficult to integrate into generic tools designed for traditional HPC. Performance measurements often result in a collection of separate performance logs that logically form a unit but cannot intuitively be investigated together with established performance tools. In this paper, we present a tool library that can be used to combine separate performance logs and separately recorded metrics into one single performance log, enabling investigation of such performance data as a unit. Use cases from Big Data processing and AI show the broad applicability of our approach.
@inproceedings{2247db283d6f45fba7c99cd5cec7d80e,
abstract = {In recent years, High Performance Computing (HPC) has become increasingly important for many industries and research areas besides {\textquoteleft}classic{\textquoteright} applications. As new domains emerge, applications, implementations and frameworks become more diverse. Generic performance analysis tools often cannot keep up with the development speed of new approaches for workload distribution, offloading, and communication. Some of the new approaches employ their own performance monitoring, which is difficult to integrate into generic tools designed for traditional HPC. Performance measurements often result in a collection of separate performance logs that logically form a unit but cannot intuitively be investigated together with established performance tools. In this paper, we present a tool library that can be used to combine separate performance logs and separately recorded metrics into one single performance log, enabling investigation of such performance data as a unit. Use cases from Big Data processing and AI show the broad applicability of our approach.},
added-at = {2024-11-28T16:27:18.000+0100},
address = {United States of America},
author = {Frenzel, Jan and Kulkarni, {Apurv Deepak} and D{\"o}bel, Sebastian and Wesarg, Bert and Knespel, Maximilian and Brunst, Holger},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2abc740d760815791598413cfcffbba84/scadsfct},
booktitle = {Proceedings of 2023 SC Workshops of the International Conference on High Performance Computing, Network, Storage, and Analysis, SC Workshops 2023},
day = 12,
doi = {10.1145/3624062.3624209},
interhash = {239260dbff5b6628b149cf7186fc6827},
intrahash = {abc740d760815791598413cfcffbba84},
keywords = {area_architectures FIS_scads domain language, logs, performance performance, software specific trace},
language = {English},
month = nov,
note = {Publisher Copyright: {\textcopyright} 2023 Owner/Author.},
pages = {1403–1411},
publisher = {Association for Computing Machinery (ACM), New York},
series = {SC: The International Conference for High Performance Computing, Networking, Storage, and Analysis},
timestamp = {2024-12-06T14:36:50.000+0100},
title = {FROOM: A Framework of Operators for OTF2 Modification},
year = 2023
}