@inproceedings{gouravajhala-etal-2023-chat,
title = "Chat Disentanglement: Data for New Domains and Methods for More Accurate Annotation",
author = "Gouravajhala, Sai R. and
Vernier, Andrew M. and
Shi, Yiming and
Li, Zihan and
Ackerman, Mark S. and
Kummerfeld, Jonathan K.",
editor = "Muresan, Smaranda and
Chen, Vivian and
Casey, Kennington and
David, Vandyke and
Nina, Dethlefs and
Koji, Inoue and
Erik, Ekstedt and
Stefan, Ultes",
booktitle = "Proceedings of the 21st Annual Workshop of the Australasian Language Technology Association",
month = nov,
year = "2023",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.alta-1.12",
pages = "112--117",
abstract = "Conversation disentanglement is the task of taking a log of intertwined conversations from a shared channel and breaking the log into individual conversations. The standard datasets for disentanglement are in a single domain and were annotated by linguistics experts with careful training for the task. In this paper, we introduce the first multi-domain dataset and a study of annotation by people without linguistics expertise or extensive training. We experiment with several variations in interfaces, conducting user studies with domain experts and crowd workers. We also test a hypothesis from prior work that link-based annotation is more accurate, finding that it actually has comparable accuracy to set-based annotation. Our new dataset will support the development of more useful systems for this task, and our experimental findings suggest that users are capable of improving the usefulness of these systems by accurately annotating their own data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gouravajhala-etal-2023-chat">
<titleInfo>
<title>Chat Disentanglement: Data for New Domains and Methods for More Accurate Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Gouravajhala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Vernier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiming</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zihan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Ackerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Kummerfeld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivian</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kennington</namePart>
<namePart type="family">Casey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vandyke</namePart>
<namePart type="family">David</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dethlefs</namePart>
<namePart type="family">Nina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inoue</namePart>
<namePart type="family">Koji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekstedt</namePart>
<namePart type="family">Erik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ultes</namePart>
<namePart type="family">Stefan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Conversation disentanglement is the task of taking a log of intertwined conversations from a shared channel and breaking the log into individual conversations. The standard datasets for disentanglement are in a single domain and were annotated by linguistics experts with careful training for the task. In this paper, we introduce the first multi-domain dataset and a study of annotation by people without linguistics expertise or extensive training. We experiment with several variations in interfaces, conducting user studies with domain experts and crowd workers. We also test a hypothesis from prior work that link-based annotation is more accurate, finding that it actually has comparable accuracy to set-based annotation. Our new dataset will support the development of more useful systems for this task, and our experimental findings suggest that users are capable of improving the usefulness of these systems by accurately annotating their own data.</abstract>
<identifier type="citekey">gouravajhala-etal-2023-chat</identifier>
<location>
<url>https://aclanthology.org/2023.alta-1.12</url>
</location>
<part>
<date>2023-11</date>
<extent unit="page">
<start>112</start>
<end>117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Chat Disentanglement: Data for New Domains and Methods for More Accurate Annotation
%A Gouravajhala, Sai R.
%A Vernier, Andrew M.
%A Shi, Yiming
%A Li, Zihan
%A Ackerman, Mark S.
%A Kummerfeld, Jonathan K.
%Y Muresan, Smaranda
%Y Chen, Vivian
%Y Casey, Kennington
%Y David, Vandyke
%Y Nina, Dethlefs
%Y Koji, Inoue
%Y Erik, Ekstedt
%Y Stefan, Ultes
%S Proceedings of the 21st Annual Workshop of the Australasian Language Technology Association
%D 2023
%8 November
%I Association for Computational Linguistics
%C Melbourne, Australia
%F gouravajhala-etal-2023-chat
%X Conversation disentanglement is the task of taking a log of intertwined conversations from a shared channel and breaking the log into individual conversations. The standard datasets for disentanglement are in a single domain and were annotated by linguistics experts with careful training for the task. In this paper, we introduce the first multi-domain dataset and a study of annotation by people without linguistics expertise or extensive training. We experiment with several variations in interfaces, conducting user studies with domain experts and crowd workers. We also test a hypothesis from prior work that link-based annotation is more accurate, finding that it actually has comparable accuracy to set-based annotation. Our new dataset will support the development of more useful systems for this task, and our experimental findings suggest that users are capable of improving the usefulness of these systems by accurately annotating their own data.
%U https://aclanthology.org/2023.alta-1.12
%P 112-117
Markdown (Informal)
[Chat Disentanglement: Data for New Domains and Methods for More Accurate Annotation](https://aclanthology.org/2023.alta-1.12) (Gouravajhala et al., ALTA 2023)
ACL