@inproceedings{schwartz-stanovsky-2022-limitations,
title = "On the Limitations of Dataset Balancing: The Lost Battle Against Spurious Correlations",
author = "Schwartz, Roy and
Stanovsky, Gabriel",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2022",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-naacl.168",
doi = "10.18653/v1/2022.findings-naacl.168",
pages = "2182--2194",
abstract = "Recent work has shown that deep learning models in NLP are highly sensitive to low-level correlations between simple features and specific output labels, leading to over-fitting and lack of generalization. To mitigate this problem, a common practice is to balance datasets by adding new instances or by filtering out {``}easy{''} instances (Sakaguchi et al., 2020), culminating in a recent proposal to eliminate single-word correlations altogether (Gardner et al., 2021). In this opinion paper, we identify that despite these efforts, increasingly-powerful models keep exploiting ever-smaller spurious correlations, and as a result even balancing all single-word features is insufficient for mitigating all of these correlations. In parallel, a truly balanced dataset may be bound to {``}throw the baby out with the bathwater{''} and miss important signal encoding common sense and world knowledge. We highlight several alternatives to dataset balancing, focusing on enhancing datasets with richer contexts, allowing models to abstain and interact with users, and turning from large-scale fine-tuning to zero- or few-shot setups.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schwartz-stanovsky-2022-limitations">
<titleInfo>
<title>On the Limitations of Dataset Balancing: The Lost Battle Against Spurious Correlations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roy</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Stanovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent work has shown that deep learning models in NLP are highly sensitive to low-level correlations between simple features and specific output labels, leading to over-fitting and lack of generalization. To mitigate this problem, a common practice is to balance datasets by adding new instances or by filtering out “easy” instances (Sakaguchi et al., 2020), culminating in a recent proposal to eliminate single-word correlations altogether (Gardner et al., 2021). In this opinion paper, we identify that despite these efforts, increasingly-powerful models keep exploiting ever-smaller spurious correlations, and as a result even balancing all single-word features is insufficient for mitigating all of these correlations. In parallel, a truly balanced dataset may be bound to “throw the baby out with the bathwater” and miss important signal encoding common sense and world knowledge. We highlight several alternatives to dataset balancing, focusing on enhancing datasets with richer contexts, allowing models to abstain and interact with users, and turning from large-scale fine-tuning to zero- or few-shot setups.</abstract>
<identifier type="citekey">schwartz-stanovsky-2022-limitations</identifier>
<identifier type="doi">10.18653/v1/2022.findings-naacl.168</identifier>
<location>
<url>https://aclanthology.org/2022.findings-naacl.168</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>2182</start>
<end>2194</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Limitations of Dataset Balancing: The Lost Battle Against Spurious Correlations
%A Schwartz, Roy
%A Stanovsky, Gabriel
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Findings of the Association for Computational Linguistics: NAACL 2022
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F schwartz-stanovsky-2022-limitations
%X Recent work has shown that deep learning models in NLP are highly sensitive to low-level correlations between simple features and specific output labels, leading to over-fitting and lack of generalization. To mitigate this problem, a common practice is to balance datasets by adding new instances or by filtering out “easy” instances (Sakaguchi et al., 2020), culminating in a recent proposal to eliminate single-word correlations altogether (Gardner et al., 2021). In this opinion paper, we identify that despite these efforts, increasingly-powerful models keep exploiting ever-smaller spurious correlations, and as a result even balancing all single-word features is insufficient for mitigating all of these correlations. In parallel, a truly balanced dataset may be bound to “throw the baby out with the bathwater” and miss important signal encoding common sense and world knowledge. We highlight several alternatives to dataset balancing, focusing on enhancing datasets with richer contexts, allowing models to abstain and interact with users, and turning from large-scale fine-tuning to zero- or few-shot setups.
%R 10.18653/v1/2022.findings-naacl.168
%U https://aclanthology.org/2022.findings-naacl.168
%U https://doi.org/10.18653/v1/2022.findings-naacl.168
%P 2182-2194
Markdown (Informal)
[On the Limitations of Dataset Balancing: The Lost Battle Against Spurious Correlations](https://aclanthology.org/2022.findings-naacl.168) (Schwartz & Stanovsky, Findings 2022)
ACL