@inproceedings{fomicheva-etal-2021-eval4nlp,
title = "The {E}val4{NLP} Shared Task on Explainable Quality Estimation: Overview and Results",
author = "Fomicheva, Marina and
Lertvittayakumjorn, Piyawat and
Zhao, Wei and
Eger, Steffen and
Gao, Yang",
editor = "Gao, Yang and
Eger, Steffen and
Zhao, Wei and
Lertvittayakumjorn, Piyawat and
Fomicheva, Marina",
booktitle = "Proceedings of the 2nd Workshop on Evaluation and Comparison of NLP Systems",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eval4nlp-1.17",
doi = "10.18653/v1/2021.eval4nlp-1.17",
pages = "165--178",
abstract = "In this paper, we introduce the Eval4NLP-2021 shared task on explainable quality estimation. Given a source-translation pair, this shared task requires not only to provide a sentence-level score indicating the overall quality of the translation, but also to explain this score by identifying the words that negatively impact translation quality. We present the data, annotation guidelines and evaluation setup of the shared task, describe the six participating systems, and analyze the results. To the best of our knowledge, this is the first shared task on explainable NLP evaluation metrics. Datasets and results are available at \url{https://github.com/eval4nlp/SharedTask2021}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fomicheva-etal-2021-eval4nlp">
<titleInfo>
<title>The Eval4NLP Shared Task on Explainable Quality Estimation: Overview and Results</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Fomicheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piyawat</namePart>
<namePart type="family">Lertvittayakumjorn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Evaluation and Comparison of NLP Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piyawat</namePart>
<namePart type="family">Lertvittayakumjorn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Fomicheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we introduce the Eval4NLP-2021 shared task on explainable quality estimation. Given a source-translation pair, this shared task requires not only to provide a sentence-level score indicating the overall quality of the translation, but also to explain this score by identifying the words that negatively impact translation quality. We present the data, annotation guidelines and evaluation setup of the shared task, describe the six participating systems, and analyze the results. To the best of our knowledge, this is the first shared task on explainable NLP evaluation metrics. Datasets and results are available at https://github.com/eval4nlp/SharedTask2021.</abstract>
<identifier type="citekey">fomicheva-etal-2021-eval4nlp</identifier>
<identifier type="doi">10.18653/v1/2021.eval4nlp-1.17</identifier>
<location>
<url>https://aclanthology.org/2021.eval4nlp-1.17</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>165</start>
<end>178</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Eval4NLP Shared Task on Explainable Quality Estimation: Overview and Results
%A Fomicheva, Marina
%A Lertvittayakumjorn, Piyawat
%A Zhao, Wei
%A Eger, Steffen
%A Gao, Yang
%Y Gao, Yang
%Y Eger, Steffen
%Y Zhao, Wei
%Y Lertvittayakumjorn, Piyawat
%Y Fomicheva, Marina
%S Proceedings of the 2nd Workshop on Evaluation and Comparison of NLP Systems
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F fomicheva-etal-2021-eval4nlp
%X In this paper, we introduce the Eval4NLP-2021 shared task on explainable quality estimation. Given a source-translation pair, this shared task requires not only to provide a sentence-level score indicating the overall quality of the translation, but also to explain this score by identifying the words that negatively impact translation quality. We present the data, annotation guidelines and evaluation setup of the shared task, describe the six participating systems, and analyze the results. To the best of our knowledge, this is the first shared task on explainable NLP evaluation metrics. Datasets and results are available at https://github.com/eval4nlp/SharedTask2021.
%R 10.18653/v1/2021.eval4nlp-1.17
%U https://aclanthology.org/2021.eval4nlp-1.17
%U https://doi.org/10.18653/v1/2021.eval4nlp-1.17
%P 165-178
Markdown (Informal)
[The Eval4NLP Shared Task on Explainable Quality Estimation: Overview and Results](https://aclanthology.org/2021.eval4nlp-1.17) (Fomicheva et al., Eval4NLP 2021)
ACL