Long non-coding RNAs (lncRNAs) are widely recognized as important regulators of gene expression. Their molecular functions range from miRNA sponging to chromatin-associated mechanisms, leading to effects in disease progression and establishing them as diagnostic and therapeutic targets. Still, only a few representatives of this diverse class of RNAs are well studied, while the vast majority is poorly described beyond the existence of their transcripts. In this review we survey common in silico approaches for lncRNA annotation. We focus on the well-established sets of features used for classification and discuss their specific advantages and weaknesses. While the available tools perform very well for the task of distinguishing coding sequence from other RNAs, we find that current methods are not well suited to distinguish lncRNAs or parts thereof from other non-protein-coding input sequences. We conclude that the distinction of lncRNAs from intronic sequences and untranslated regions of coding mRNAs remains a pressing research gap.
%0 Journal Article
%1 Klapproth2021-tj
%A Klapproth, Christopher
%A Sen, Rituparno
%A Stadler, Peter F
%A Findeiß, Sven
%A Fallmann, Jörg
%D 2021
%I MDPI AG
%J Noncoding RNA
%K classification coding extraction; feature learning lncRNA; machine problems; sequence;
%N 4
%P 77
%T Common features in lncRNA annotation and classification: A survey
%V 7
%X Long non-coding RNAs (lncRNAs) are widely recognized as important regulators of gene expression. Their molecular functions range from miRNA sponging to chromatin-associated mechanisms, leading to effects in disease progression and establishing them as diagnostic and therapeutic targets. Still, only a few representatives of this diverse class of RNAs are well studied, while the vast majority is poorly described beyond the existence of their transcripts. In this review we survey common in silico approaches for lncRNA annotation. We focus on the well-established sets of features used for classification and discuss their specific advantages and weaknesses. While the available tools perform very well for the task of distinguishing coding sequence from other RNAs, we find that current methods are not well suited to distinguish lncRNAs or parts thereof from other non-protein-coding input sequences. We conclude that the distinction of lncRNAs from intronic sequences and untranslated regions of coding mRNAs remains a pressing research gap.
@article{Klapproth2021-tj,
abstract = {Long non-coding RNAs (lncRNAs) are widely recognized as important regulators of gene expression. Their molecular functions range from miRNA sponging to chromatin-associated mechanisms, leading to effects in disease progression and establishing them as diagnostic and therapeutic targets. Still, only a few representatives of this diverse class of RNAs are well studied, while the vast majority is poorly described beyond the existence of their transcripts. In this review we survey common in silico approaches for lncRNA annotation. We focus on the well-established sets of features used for classification and discuss their specific advantages and weaknesses. While the available tools perform very well for the task of distinguishing coding sequence from other RNAs, we find that current methods are not well suited to distinguish lncRNAs or parts thereof from other non-protein-coding input sequences. We conclude that the distinction of lncRNAs from intronic sequences and untranslated regions of coding mRNAs remains a pressing research gap.},
added-at = {2024-09-10T11:54:51.000+0200},
author = {Klapproth, Christopher and Sen, Rituparno and Stadler, Peter F and Findei{\ss}, Sven and Fallmann, J{\"o}rg},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/24624548668b258913a901d77cc8fea97/scadsfct},
copyright = {https://creativecommons.org/licenses/by/4.0/},
interhash = {8843b7dc012ddaed6f9a5976475db6e7},
intrahash = {4624548668b258913a901d77cc8fea97},
journal = {Noncoding RNA},
keywords = {classification coding extraction; feature learning lncRNA; machine problems; sequence;},
language = {en},
month = dec,
number = 4,
pages = 77,
publisher = {MDPI AG},
timestamp = {2024-09-10T11:54:51.000+0200},
title = {Common features in {lncRNA} annotation and classification: A survey},
volume = 7,
year = 2021
}