Question answering (QA) over knowledge graphs has gained significant momentum over the past five years due to the increasing availability of large knowledge graphs and the rising importance of Question Answering for user interaction. Existing QA systems have been extensively evaluated as black boxes and their performance has been characterised in terms of average results over all the questions of benchmarking datasets (i.e. macro evaluation). Albeit informative, macro evaluation studies do not provide evidence about QA components’ strengths and concrete weaknesses. Therefore, the objective of this article is to analyse and micro evaluate available QA components in order to comprehend which question characteristics impact on their performance. For this, we measure at question level and with respect to different question features the accuracy of 29 components reused in QA frameworks for the DBpedia knowledge graph using state-of-the-art benchmarks. As a result, we provide a perspective on collective failure cases, study the similarities and synergies among QA components for different component types and suggest their characteristics preventing them from effectively solving the corresponding QA tasks. Finally, based on these extensive results, we present conclusive insights for future challenges and research directions in the field of Question Answering over knowledge graphs.
%0 Journal Article
%1 SINGH2020100594
%A Singh, Kuldeep
%A Lytra, Ioanna
%A Radhakrishna, Arun Sethupat
%A Shekarpour, Saeedeh
%A Vidal, Maria-Esther
%A Lehmann, Jens
%D 2020
%J Journal of Web Semantics
%K Entity Experiment Knowledge Question Relation analysis and answering, extraction, graph, linking,
%P 100594
%R https://doi.org/10.1016/j.websem.2020.100594
%T No one is perfect: Analysing the performance of question answering components over the DBpedia knowledge graph
%U https://www.sciencedirect.com/science/article/pii/S1570826820300342
%V 65
%X Question answering (QA) over knowledge graphs has gained significant momentum over the past five years due to the increasing availability of large knowledge graphs and the rising importance of Question Answering for user interaction. Existing QA systems have been extensively evaluated as black boxes and their performance has been characterised in terms of average results over all the questions of benchmarking datasets (i.e. macro evaluation). Albeit informative, macro evaluation studies do not provide evidence about QA components’ strengths and concrete weaknesses. Therefore, the objective of this article is to analyse and micro evaluate available QA components in order to comprehend which question characteristics impact on their performance. For this, we measure at question level and with respect to different question features the accuracy of 29 components reused in QA frameworks for the DBpedia knowledge graph using state-of-the-art benchmarks. As a result, we provide a perspective on collective failure cases, study the similarities and synergies among QA components for different component types and suggest their characteristics preventing them from effectively solving the corresponding QA tasks. Finally, based on these extensive results, we present conclusive insights for future challenges and research directions in the field of Question Answering over knowledge graphs.
@article{SINGH2020100594,
abstract = {Question answering (QA) over knowledge graphs has gained significant momentum over the past five years due to the increasing availability of large knowledge graphs and the rising importance of Question Answering for user interaction. Existing QA systems have been extensively evaluated as black boxes and their performance has been characterised in terms of average results over all the questions of benchmarking datasets (i.e. macro evaluation). Albeit informative, macro evaluation studies do not provide evidence about QA components’ strengths and concrete weaknesses. Therefore, the objective of this article is to analyse and micro evaluate available QA components in order to comprehend which question characteristics impact on their performance. For this, we measure at question level and with respect to different question features the accuracy of 29 components reused in QA frameworks for the DBpedia knowledge graph using state-of-the-art benchmarks. As a result, we provide a perspective on collective failure cases, study the similarities and synergies among QA components for different component types and suggest their characteristics preventing them from effectively solving the corresponding QA tasks. Finally, based on these extensive results, we present conclusive insights for future challenges and research directions in the field of Question Answering over knowledge graphs.},
added-at = {2024-10-02T10:38:17.000+0200},
author = {Singh, Kuldeep and Lytra, Ioanna and Radhakrishna, Arun Sethupat and Shekarpour, Saeedeh and Vidal, Maria-Esther and Lehmann, Jens},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2fc2efaab51485aa8f24107da2482106b/scadsfct},
doi = {https://doi.org/10.1016/j.websem.2020.100594},
interhash = {90d2ad6c473fad1a35538ff502fb572a},
intrahash = {fc2efaab51485aa8f24107da2482106b},
issn = {1570-8268},
journal = {Journal of Web Semantics},
keywords = {Entity Experiment Knowledge Question Relation analysis and answering, extraction, graph, linking,},
pages = 100594,
timestamp = {2024-10-02T10:38:17.000+0200},
title = {No one is perfect: Analysing the performance of question answering components over the DBpedia knowledge graph},
url = {https://www.sciencedirect.com/science/article/pii/S1570826820300342},
volume = 65,
year = 2020
}