AbstractThe in silico prediction of non-coding and protein-coding genetic loci is an area of research that has gathered large attention in the field of comparative genomics. In the last decade, much effort has been made to investigate numerous properties of nucleotide sequences that hint at their biological role in the cell. We present here a software framework for the alignment-based training, evaluation and application of machine learning models with user-defined parameters. Instead of focusing on the one-size-fits-all approach of pervasive in silico annotation pipelines, we offer a framework for the structured generation and evaluation of models based on arbitrary features and input data, focusing on stable and explainable results. Furthermore, we showcase the usage of our software package in a full-genome screen of Drosophila melanogaster and evaluate our results against the well-known but much less flexible program RNAz.
%0 Unpublished Work
%1 Klapproth2022-no
%A Klapproth, Christopher
%A Zöztsche, Siegfried
%A Kühnl, Felix
%A Fallmann, Jörg
%A Stadler, Peter F
%A Findeiß, Sven
%D 2022
%J bioRxiv
%K topic_mathfoundation topic_lifescience
%T Tailored machine learning models for functional RNA detection in genome-wide screens
%X AbstractThe in silico prediction of non-coding and protein-coding genetic loci is an area of research that has gathered large attention in the field of comparative genomics. In the last decade, much effort has been made to investigate numerous properties of nucleotide sequences that hint at their biological role in the cell. We present here a software framework for the alignment-based training, evaluation and application of machine learning models with user-defined parameters. Instead of focusing on the one-size-fits-all approach of pervasive in silico annotation pipelines, we offer a framework for the structured generation and evaluation of models based on arbitrary features and input data, focusing on stable and explainable results. Furthermore, we showcase the usage of our software package in a full-genome screen of Drosophila melanogaster and evaluate our results against the well-known but much less flexible program RNAz.
@unpublished{Klapproth2022-no,
abstract = {AbstractThe in silico prediction of non-coding and protein-coding genetic loci is an area of research that has gathered large attention in the field of comparative genomics. In the last decade, much effort has been made to investigate numerous properties of nucleotide sequences that hint at their biological role in the cell. We present here a software framework for the alignment-based training, evaluation and application of machine learning models with user-defined parameters. Instead of focusing on the one-size-fits-all approach of pervasive in silico annotation pipelines, we offer a framework for the structured generation and evaluation of models based on arbitrary features and input data, focusing on stable and explainable results. Furthermore, we showcase the usage of our software package in a full-genome screen of Drosophila melanogaster and evaluate our results against the well-known but much less flexible program RNAz.},
added-at = {2024-09-10T11:56:37.000+0200},
author = {Klapproth, Christopher and Z{\"o}ztsche, Siegfried and K{\"u}hnl, Felix and Fallmann, J{\"o}rg and Stadler, Peter F and Findei{\ss}, Sven},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2d7e3d2ccbbe1aa3e60b4cc4e36df2d6f/scadsfct},
interhash = {dda83ca2475ac86718f31a76701452da},
intrahash = {d7e3d2ccbbe1aa3e60b4cc4e36df2d6f},
journal = {bioRxiv},
keywords = {topic_mathfoundation topic_lifescience},
month = sep,
timestamp = {2024-11-22T15:49:25.000+0100},
title = {Tailored machine learning models for functional {RNA} detection in genome-wide screens},
year = 2022
}