Biological research is increasingly dependent on analyzing vast amounts of microscopy datasets. Technologies such as Fiji/ImageJ2 and KNIME support knowledge extraction from biological data by providing a large set of configurable algorithms and an intuitive pipeline creation and execution interface. The increasing complexity of required analysis pipelines and the growing amounts of data to be processed nurture the desire to run existing pipelines on HPC (High Performance Computing) systems. Here, we propose a solution to this challenge by presenting a new HPC integration method for KNIME (Konstanz Information Miner) using the UNICORE middleware (Uniform Interface to Computing Resources) and its automated data processing feature. We designed the integration to be efficient in processing large data workloads on the server side. On the client side it is seamless and lightweight to only minimally increase the complexity for the users. We describe our novel approach and evaluate it using an image processing pipeline that could previously not be executed on an HPC system. The evaluation includes a performance study of the induced overhead of the submission process and of the integrated image processing pipeline based on a large amount of data. This demonstrates how our solution enables scientists to transparently benefit from vast HPC resources without the need to migrate existing algorithms and pipelines.
%0 Conference Paper
%1 10.1007/978-3-319-58943-5_39
%A Grunzke, Richard
%A Jug, Florian
%A Schuller, Bernd
%A Jäkel, René
%A Myers, Gene
%A Nagel, Wolfgang E.
%B Euro-Par 2016: Parallel Processing Workshops
%C Cham
%D 2017
%E Desprez, Frédéric
%E Dutot, Pierre-Francois
%E Kaklamanis, Christos
%E Marchal, Loris
%E Molitorisz, Korbinian
%E Ricci, Laura
%E Scarano, Vittorio
%E Vega-Rodríguez, Miguel A.
%E Varbanescu, Ana Lucia
%E Hunold, Sascha
%E Scott, Stephen L.
%E Lankes, Stefan
%E Weidendorfer, Josef
%I Springer International Publishing
%K imported
%P 480--491
%T Seamless HPC Integration of Data-Intensive KNIME Workflows via UNICORE
%X Biological research is increasingly dependent on analyzing vast amounts of microscopy datasets. Technologies such as Fiji/ImageJ2 and KNIME support knowledge extraction from biological data by providing a large set of configurable algorithms and an intuitive pipeline creation and execution interface. The increasing complexity of required analysis pipelines and the growing amounts of data to be processed nurture the desire to run existing pipelines on HPC (High Performance Computing) systems. Here, we propose a solution to this challenge by presenting a new HPC integration method for KNIME (Konstanz Information Miner) using the UNICORE middleware (Uniform Interface to Computing Resources) and its automated data processing feature. We designed the integration to be efficient in processing large data workloads on the server side. On the client side it is seamless and lightweight to only minimally increase the complexity for the users. We describe our novel approach and evaluate it using an image processing pipeline that could previously not be executed on an HPC system. The evaluation includes a performance study of the induced overhead of the submission process and of the integrated image processing pipeline based on a large amount of data. This demonstrates how our solution enables scientists to transparently benefit from vast HPC resources without the need to migrate existing algorithms and pipelines.
%@ 978-3-319-58943-5
@inproceedings{10.1007/978-3-319-58943-5_39,
abstract = {Biological research is increasingly dependent on analyzing vast amounts of microscopy datasets. Technologies such as Fiji/ImageJ2 and KNIME support knowledge extraction from biological data by providing a large set of configurable algorithms and an intuitive pipeline creation and execution interface. The increasing complexity of required analysis pipelines and the growing amounts of data to be processed nurture the desire to run existing pipelines on HPC (High Performance Computing) systems. Here, we propose a solution to this challenge by presenting a new HPC integration method for KNIME (Konstanz Information Miner) using the UNICORE middleware (Uniform Interface to Computing Resources) and its automated data processing feature. We designed the integration to be efficient in processing large data workloads on the server side. On the client side it is seamless and lightweight to only minimally increase the complexity for the users. We describe our novel approach and evaluate it using an image processing pipeline that could previously not be executed on an HPC system. The evaluation includes a performance study of the induced overhead of the submission process and of the integrated image processing pipeline based on a large amount of data. This demonstrates how our solution enables scientists to transparently benefit from vast HPC resources without the need to migrate existing algorithms and pipelines.},
added-at = {2024-10-02T10:38:17.000+0200},
address = {Cham},
author = {Grunzke, Richard and Jug, Florian and Schuller, Bernd and J{\"a}kel, Ren{\'e} and Myers, Gene and Nagel, Wolfgang E.},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/22593721978a4649b81499fbd32829767/scadsfct},
booktitle = {Euro-Par 2016: Parallel Processing Workshops},
editor = {Desprez, Fr{\'e}d{\'e}ric and Dutot, Pierre-Fran{\c{c}}ois and Kaklamanis, Christos and Marchal, Loris and Molitorisz, Korbinian and Ricci, Laura and Scarano, Vittorio and Vega-Rodr{\'i}guez, Miguel A. and Varbanescu, Ana Lucia and Hunold, Sascha and Scott, Stephen L. and Lankes, Stefan and Weidendorfer, Josef},
interhash = {5ecd189e992fdfa181d99d4c9ef2efb0},
intrahash = {2593721978a4649b81499fbd32829767},
isbn = {978-3-319-58943-5},
keywords = {imported},
pages = {480--491},
publisher = {Springer International Publishing},
timestamp = {2024-10-02T10:38:17.000+0200},
title = {Seamless HPC Integration of Data-Intensive KNIME Workflows via UNICORE},
year = 2017
}