This paper evaluates the viability of using fixed language models for training text classification networks on low-end hardware. We combine language models with a CNN architecture and put together a comprehensive benchmark with 8 datasets covering single-label and multi-label classification of topic, sentiment, and genre. Our observations are distilled into a list of trade-offs, concluding that there are scenarios, where not fine-tuning a language model yields competitive effectiveness at faster training, requiring only a quarter of the memory compared to fine-tuning.
%0 Journal Article
%1 Ziegner2023-xl
%A Ziegner, Fabian
%A Borst, Janos
%A Niekler, Andreas
%A Potthast, Martin
%D 2023
%I arXiv
%K topic_language
%T Using language models on low-end hardware
%X This paper evaluates the viability of using fixed language models for training text classification networks on low-end hardware. We combine language models with a CNN architecture and put together a comprehensive benchmark with 8 datasets covering single-label and multi-label classification of topic, sentiment, and genre. Our observations are distilled into a list of trade-offs, concluding that there are scenarios, where not fine-tuning a language model yields competitive effectiveness at faster training, requiring only a quarter of the memory compared to fine-tuning.
@article{Ziegner2023-xl,
abstract = {This paper evaluates the viability of using fixed language models for training text classification networks on low-end hardware. We combine language models with a CNN architecture and put together a comprehensive benchmark with 8 datasets covering single-label and multi-label classification of topic, sentiment, and genre. Our observations are distilled into a list of trade-offs, concluding that there are scenarios, where not fine-tuning a language model yields competitive effectiveness at faster training, requiring only a quarter of the memory compared to fine-tuning.},
added-at = {2024-09-10T10:41:24.000+0200},
author = {Ziegner, Fabian and Borst, Janos and Niekler, Andreas and Potthast, Martin},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/25b910fec09ee44c4a9e51eb127790ff7/scadsfct},
interhash = {f6242435623e8e3b0f05a5191423c92a},
intrahash = {5b910fec09ee44c4a9e51eb127790ff7},
keywords = {topic_language},
publisher = {arXiv},
timestamp = {2024-11-28T17:41:18.000+0100},
title = {Using language models on low-end hardware},
year = 2023
}