In oncology, Deep Learning has shown great potential to personalise tasks such as tumour type classification, based on per-patient omics data-sets. Being high dimensional, incorporation of such data in one model is a challenge, often leading to one-dimensional studies and, therefore, information loss. Instead, we first propose relying on non-fixed sets of mutated genome sequences, which can be used for supervised learning of oncology-relevant tasks by our Transformer-based Deep Neural Network, SETQUENCE. Second, we extend the model to incorporate these representations as well as multiple sources of omics data in a flexible way with SETOMIC. Evaluation, using these representations, shows improved robustness and reduced information loss compared to previous approaches, while still being computationally tractable. By means of Explainable Artificial Intelligence methods, our models are shown to be able to recapitulate the biological contribution of several features in cancer, such as individual expression loci. This validation opens the door to novel directions in multi-faceted genome-wide biomarker discovery and personalised treatment among other presently clinically relevant tasks.
%0 Conference Paper
%1 9863058
%A Jurenaite, Neringa
%A León-Periñán, Daniel
%A Donath, Veronika
%A Torge, Sunna
%A Jäkel, René
%B 2022 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology (CIBCB)
%D 2022
%K livinglab topic_federatedlearn topic_lifescience
%P 1-9
%R 10.1109/CIBCB55180.2022.9863058
%T SetQuence & SetOmic: Deep Set Transformer-based Representations of Cancer Multi-Omics
%X In oncology, Deep Learning has shown great potential to personalise tasks such as tumour type classification, based on per-patient omics data-sets. Being high dimensional, incorporation of such data in one model is a challenge, often leading to one-dimensional studies and, therefore, information loss. Instead, we first propose relying on non-fixed sets of mutated genome sequences, which can be used for supervised learning of oncology-relevant tasks by our Transformer-based Deep Neural Network, SETQUENCE. Second, we extend the model to incorporate these representations as well as multiple sources of omics data in a flexible way with SETOMIC. Evaluation, using these representations, shows improved robustness and reduced information loss compared to previous approaches, while still being computationally tractable. By means of Explainable Artificial Intelligence methods, our models are shown to be able to recapitulate the biological contribution of several features in cancer, such as individual expression loci. This validation opens the door to novel directions in multi-faceted genome-wide biomarker discovery and personalised treatment among other presently clinically relevant tasks.
@inproceedings{9863058,
abstract = {In oncology, Deep Learning has shown great potential to personalise tasks such as tumour type classification, based on per-patient omics data-sets. Being high dimensional, incorporation of such data in one model is a challenge, often leading to one-dimensional studies and, therefore, information loss. Instead, we first propose relying on non-fixed sets of mutated genome sequences, which can be used for supervised learning of oncology-relevant tasks by our Transformer-based Deep Neural Network, SETQUENCE. Second, we extend the model to incorporate these representations as well as multiple sources of omics data in a flexible way with SETOMIC. Evaluation, using these representations, shows improved robustness and reduced information loss compared to previous approaches, while still being computationally tractable. By means of Explainable Artificial Intelligence methods, our models are shown to be able to recapitulate the biological contribution of several features in cancer, such as individual expression loci. This validation opens the door to novel directions in multi-faceted genome-wide biomarker discovery and personalised treatment among other presently clinically relevant tasks.},
added-at = {2024-10-24T09:33:29.000+0200},
author = {Jurenaite, Neringa and León-Periñán, Daniel and Donath, Veronika and Torge, Sunna and Jäkel, René},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2472c8e729b5b9e6311041529a34e3e29/scadsfct},
booktitle = {2022 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology (CIBCB)},
doi = {10.1109/CIBCB55180.2022.9863058},
interhash = {3e591f7f1df956f278d790013cb05a63},
intrahash = {472c8e729b5b9e6311041529a34e3e29},
keywords = {livinglab topic_federatedlearn topic_lifescience},
month = aug,
pages = {1-9},
timestamp = {2024-11-22T15:56:47.000+0100},
title = {SetQuence & SetOmic: Deep Set Transformer-based Representations of Cancer Multi-Omics},
year = 2022
}