YouTube videos are a popular medium for online product reviews. They are not only informative and entertaining, but may also be perceived as quite credible under the viewer’s impression of a personal product demonstration by an expert. As the world’s largest online video platform, YouTube’s content is included prominently in the results of most general-purpose web search engines. Consequently, online marketeers are using classic Search Engine Optimization (SEO) techniques also for placing their video content in search engines. Over the years, we have noticed an ever increasing noise floor of low-quality SEO content in product search results and in this study, we show that this trend has spilled over into videos as well. We examine YouTube video reviews for several thousand products retrieved from three commercial search engines and conduct spam detection experiments based directly on the videos’ subtitle transcripts rather than relying on metadata and comments. We find that at least a third of the retrieved videos can be regarded as spam or low-quality productions. We are further able to distinguish these spam product reviews accurately from higher-quality videos with a semi-supervised n-gram classification approach.
%0 Conference Paper
%1 10.1145/3627508.3638303
%A Bevendorff, Janek
%A Wiegmann, Matti
%A Potthast, Martin
%A Stein, Benno
%B Proceedings of the 2024 Conference on Human Information Interaction and Retrieval
%C New York, NY, USA
%D 2024
%I Association for Computing Machinery
%K topic_language Content Quality, SEO, Spam, Web YouTube
%P 358–363
%R 10.1145/3627508.3638303
%T Product Spam on YouTube: A Case Study
%U https://doi.org/10.1145/3627508.3638303
%X YouTube videos are a popular medium for online product reviews. They are not only informative and entertaining, but may also be perceived as quite credible under the viewer’s impression of a personal product demonstration by an expert. As the world’s largest online video platform, YouTube’s content is included prominently in the results of most general-purpose web search engines. Consequently, online marketeers are using classic Search Engine Optimization (SEO) techniques also for placing their video content in search engines. Over the years, we have noticed an ever increasing noise floor of low-quality SEO content in product search results and in this study, we show that this trend has spilled over into videos as well. We examine YouTube video reviews for several thousand products retrieved from three commercial search engines and conduct spam detection experiments based directly on the videos’ subtitle transcripts rather than relying on metadata and comments. We find that at least a third of the retrieved videos can be regarded as spam or low-quality productions. We are further able to distinguish these spam product reviews accurately from higher-quality videos with a semi-supervised n-gram classification approach.
%@ 9798400704345
@inproceedings{10.1145/3627508.3638303,
abstract = {YouTube videos are a popular medium for online product reviews. They are not only informative and entertaining, but may also be perceived as quite credible under the viewer’s impression of a personal product demonstration by an expert. As the world’s largest online video platform, YouTube’s content is included prominently in the results of most general-purpose web search engines. Consequently, online marketeers are using classic Search Engine Optimization (SEO) techniques also for placing their video content in search engines. Over the years, we have noticed an ever increasing noise floor of low-quality SEO content in product search results and in this study, we show that this trend has spilled over into videos as well. We examine YouTube video reviews for several thousand products retrieved from three commercial search engines and conduct spam detection experiments based directly on the videos’ subtitle transcripts rather than relying on metadata and comments. We find that at least a third of the retrieved videos can be regarded as spam or low-quality productions. We are further able to distinguish these spam product reviews accurately from higher-quality videos with a semi-supervised n-gram classification approach.},
added-at = {2024-09-10T10:41:24.000+0200},
address = {New York, NY, USA},
author = {Bevendorff, Janek and Wiegmann, Matti and Potthast, Martin and Stein, Benno},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2c68e8fd37bc166757c682a9f69707223/scadsfct},
booktitle = {Proceedings of the 2024 Conference on Human Information Interaction and Retrieval},
doi = {10.1145/3627508.3638303},
interhash = {d2189910de993ad55b28485234ae543f},
intrahash = {c68e8fd37bc166757c682a9f69707223},
isbn = {9798400704345},
keywords = {topic_language Content Quality, SEO, Spam, Web YouTube},
location = {Sheffield, United Kingdom},
numpages = {6},
pages = {358–363},
publisher = {Association for Computing Machinery},
series = {CHIIR '24},
timestamp = {2024-11-22T15:47:32.000+0100},
title = {Product Spam on YouTube: A Case Study},
url = {https://doi.org/10.1145/3627508.3638303},
year = 2024
}