Superbubbles are a class of induced subgraphs in digraphs that play an essential role in assembly algorithms for high-throughput sequencing data. They are connected with the remainder of the host digraph by a single entrance and a single exit vertex. Linear-time algorithms for the enumeration superbubbles recently have become available. Current approaches require the decomposition of the input digraph into strongly-connected components, which are then analyzed separately. In principle, a single depth-first search could be used, provided one can guarantee that the root of the depth-first search (DFS)-tree is not itself located in the interior or the exit point of a superbubble. Here, we describe a linear-time algorithm to determine suitable roots for a DFS-forest that is guaranteed to identify the superbubbles in a digraph correctly. In addition to the advantages of a more straightforward implementation, we observe a nearly three-fold gain in performance on real-world datasets. We present a reference implementation of the new algorithm that accepts many commonly-used input formats for digraphs. It is available as open source from github.
%0 Journal Article
%1 Gartner2019-qi
%A Gärtner, Fabian
%A Stadler, Peter F
%D 2019
%I MDPI AG
%J Algorithms
%K
%N 4
%P 81
%T Direct superbubble detection
%V 12
%X Superbubbles are a class of induced subgraphs in digraphs that play an essential role in assembly algorithms for high-throughput sequencing data. They are connected with the remainder of the host digraph by a single entrance and a single exit vertex. Linear-time algorithms for the enumeration superbubbles recently have become available. Current approaches require the decomposition of the input digraph into strongly-connected components, which are then analyzed separately. In principle, a single depth-first search could be used, provided one can guarantee that the root of the depth-first search (DFS)-tree is not itself located in the interior or the exit point of a superbubble. Here, we describe a linear-time algorithm to determine suitable roots for a DFS-forest that is guaranteed to identify the superbubbles in a digraph correctly. In addition to the advantages of a more straightforward implementation, we observe a nearly three-fold gain in performance on real-world datasets. We present a reference implementation of the new algorithm that accepts many commonly-used input formats for digraphs. It is available as open source from github.
@article{Gartner2019-qi,
abstract = {Superbubbles are a class of induced subgraphs in digraphs that play an essential role in assembly algorithms for high-throughput sequencing data. They are connected with the remainder of the host digraph by a single entrance and a single exit vertex. Linear-time algorithms for the enumeration superbubbles recently have become available. Current approaches require the decomposition of the input digraph into strongly-connected components, which are then analyzed separately. In principle, a single depth-first search could be used, provided one can guarantee that the root of the depth-first search (DFS)-tree is not itself located in the interior or the exit point of a superbubble. Here, we describe a linear-time algorithm to determine suitable roots for a DFS-forest that is guaranteed to identify the superbubbles in a digraph correctly. In addition to the advantages of a more straightforward implementation, we observe a nearly three-fold gain in performance on real-world datasets. We present a reference implementation of the new algorithm that accepts many commonly-used input formats for digraphs. It is available as open source from github.},
added-at = {2024-09-10T11:56:37.000+0200},
author = {G{\"a}rtner, Fabian and Stadler, Peter F},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2bb1153b0854ecaed83341c0f778ce930/scadsfct},
interhash = {7507656fa580002e3299dd0dbd3a7e58},
intrahash = {bb1153b0854ecaed83341c0f778ce930},
journal = {Algorithms},
keywords = {},
language = {en},
month = apr,
number = 4,
pages = 81,
publisher = {MDPI AG},
timestamp = {2024-09-10T15:15:57.000+0200},
title = {Direct superbubble detection},
volume = 12,
year = 2019
}