At least 5\% of questions submitted to search engines ask about cause-effect relationships in some way. To support the development of tailored approaches that can answer such questions, we construct Webis-CausalQA-22, a benchmark corpus of 1.1 million causal questions with answers. We distinguish different types of causal questions using a novel typology derived from a data-driven, manual analysis of questions from ten large question answering (QA) datasets. Using high-precision lexical rules, we extract causal questions of each type from these datasets to create our corpus. As an initial baseline, the state-of-the-art QA model UnifiedQA achieves a ROUGE-L F1 score of 0.48 on our new benchmark.
%0 Conference Paper
%1 bondarenko-etal-2022-causalqa
%A Bondarenko, Alexander
%A Wolska, Magdalena
%A Heindorf, Stefan
%A Blübaum, Lukas
%A Ngonga Ngomo, Axel-Cyrille
%A Stein, Benno
%A Braslavski, Pavel
%A Hagen, Matthias
%A Potthast, Martin
%B Proceedings of the 29th International Conference on Computational Linguistics
%C Gyeongju, Republic of Korea
%D 2022
%I International Committee on Computational Linguistics
%K
%P 3296--3308
%T CausalQA: A Benchmark for Causal Question Answering
%U https://aclanthology.org/2022.coling-1.291
%X At least 5\% of questions submitted to search engines ask about cause-effect relationships in some way. To support the development of tailored approaches that can answer such questions, we construct Webis-CausalQA-22, a benchmark corpus of 1.1 million causal questions with answers. We distinguish different types of causal questions using a novel typology derived from a data-driven, manual analysis of questions from ten large question answering (QA) datasets. Using high-precision lexical rules, we extract causal questions of each type from these datasets to create our corpus. As an initial baseline, the state-of-the-art QA model UnifiedQA achieves a ROUGE-L F1 score of 0.48 on our new benchmark.
@inproceedings{bondarenko-etal-2022-causalqa,
abstract = {At least 5{\%} of questions submitted to search engines ask about cause-effect relationships in some way. To support the development of tailored approaches that can answer such questions, we construct Webis-CausalQA-22, a benchmark corpus of 1.1 million causal questions with answers. We distinguish different types of causal questions using a novel typology derived from a data-driven, manual analysis of questions from ten large question answering (QA) datasets. Using high-precision lexical rules, we extract causal questions of each type from these datasets to create our corpus. As an initial baseline, the state-of-the-art QA model UnifiedQA achieves a ROUGE-L F1 score of 0.48 on our new benchmark.},
added-at = {2024-09-10T11:56:37.000+0200},
address = {Gyeongju, Republic of Korea},
author = {Bondarenko, Alexander and Wolska, Magdalena and Heindorf, Stefan and Bl{\"u}baum, Lukas and Ngonga Ngomo, Axel-Cyrille and Stein, Benno and Braslavski, Pavel and Hagen, Matthias and Potthast, Martin},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2eac699710e7b84a9fc8fa6414eef9474/scadsfct},
booktitle = {Proceedings of the 29th International Conference on Computational Linguistics},
interhash = {1e4fba15b489d16ad8f2ac3ca2665c45},
intrahash = {eac699710e7b84a9fc8fa6414eef9474},
keywords = {},
month = oct,
pages = {3296--3308},
publisher = {International Committee on Computational Linguistics},
timestamp = {2024-09-10T15:15:57.000+0200},
title = {{C}ausal{QA}: A Benchmark for Causal Question Answering},
url = {https://aclanthology.org/2022.coling-1.291},
year = 2022
}