Given a large graph, graph sampling determines a subgraph with similar characteristics for certain metrics of the original graph. The samples are much smaller thereby accelerating and simplifying the analysis and visualization of large graphs. We focus on the implementation of distributed graph sampling for Big Data frameworks and in-memory dataflow systems such as Apache Spark or Apache Flink and evaluate the scalability of the new implementations. The presented methods will be open source and be integrated into Gradoop, a system for distributed graph analytics.
%0 Conference Paper
%1 Gomez2021-tv
%A Gomez, Kevin
%A Täschner, Matthias
%A Rostami, M Ali
%A Rost, Christopher
%A Rahm, Erhard
%D 2021
%I Gesellschaft für Informatik, Bonn
%K
%T Graph sampling with distributed in-memory dataflow systems
%X Given a large graph, graph sampling determines a subgraph with similar characteristics for certain metrics of the original graph. The samples are much smaller thereby accelerating and simplifying the analysis and visualization of large graphs. We focus on the implementation of distributed graph sampling for Big Data frameworks and in-memory dataflow systems such as Apache Spark or Apache Flink and evaluate the scalability of the new implementations. The presented methods will be open source and be integrated into Gradoop, a system for distributed graph analytics.
@inproceedings{Gomez2021-tv,
abstract = {Given a large graph, graph sampling determines a subgraph with similar characteristics for certain metrics of the original graph. The samples are much smaller thereby accelerating and simplifying the analysis and visualization of large graphs. We focus on the implementation of distributed graph sampling for Big Data frameworks and in-memory dataflow systems such as Apache Spark or Apache Flink and evaluate the scalability of the new implementations. The presented methods will be open source and be integrated into Gradoop, a system for distributed graph analytics.},
added-at = {2024-09-10T11:56:37.000+0200},
author = {Gomez, Kevin and T{\"a}schner, Matthias and Rostami, M Ali and Rost, Christopher and Rahm, Erhard},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2baedfe322fbaad476d08ba49158501d9/scadsfct},
interhash = {2b32abee971a3690ec2ef9de0d4f1c01},
intrahash = {baedfe322fbaad476d08ba49158501d9},
keywords = {},
publisher = {Gesellschaft f{\"u}r Informatik, Bonn},
timestamp = {2024-09-10T15:15:57.000+0200},
title = {Graph sampling with distributed in-memory dataflow systems},
year = 2021
}