@inproceedings{ansell-etal-2021-polylm,
title = "{P}oly{LM}: Learning about Polysemy through Language Modeling",
author = "Ansell, Alan and
Bravo-Marquez, Felipe and
Pfahringer, Bernhard",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.45",
doi = "10.18653/v1/2021.eacl-main.45",
pages = "563--574",
abstract = "To avoid the {``}meaning conflation deficiency{''} of word embeddings, a number of models have aimed to embed individual word senses. These methods at one time performed well on tasks such as word sense induction (WSI), but they have since been overtaken by task-specific techniques which exploit contextualized embeddings. However, sense embeddings and contextualization need not be mutually exclusive. We introduce PolyLM, a method which formulates the task of learning sense embeddings as a language modeling problem, allowing contextualization techniques to be applied. PolyLM is based on two underlying assumptions about word senses: firstly, that the probability of a word occurring in a given context is equal to the sum of the probabilities of its individual senses occurring; and secondly, that for a given occurrence of a word, one of its senses tends to be much more plausible in the context than the others. We evaluate PolyLM on WSI, showing that it performs considerably better than previous sense embedding techniques, and matches the current state-of-the-art specialized WSI method despite having six times fewer parameters. Code and pre-trained models are available at \url{https://github.com/AlanAnsell/PolyLM}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ansell-etal-2021-polylm">
<titleInfo>
<title>PolyLM: Learning about Polysemy through Language Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ansell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felipe</namePart>
<namePart type="family">Bravo-Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernhard</namePart>
<namePart type="family">Pfahringer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Merlo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>To avoid the “meaning conflation deficiency” of word embeddings, a number of models have aimed to embed individual word senses. These methods at one time performed well on tasks such as word sense induction (WSI), but they have since been overtaken by task-specific techniques which exploit contextualized embeddings. However, sense embeddings and contextualization need not be mutually exclusive. We introduce PolyLM, a method which formulates the task of learning sense embeddings as a language modeling problem, allowing contextualization techniques to be applied. PolyLM is based on two underlying assumptions about word senses: firstly, that the probability of a word occurring in a given context is equal to the sum of the probabilities of its individual senses occurring; and secondly, that for a given occurrence of a word, one of its senses tends to be much more plausible in the context than the others. We evaluate PolyLM on WSI, showing that it performs considerably better than previous sense embedding techniques, and matches the current state-of-the-art specialized WSI method despite having six times fewer parameters. Code and pre-trained models are available at https://github.com/AlanAnsell/PolyLM.</abstract>
<identifier type="citekey">ansell-etal-2021-polylm</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.45</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.45</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>563</start>
<end>574</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PolyLM: Learning about Polysemy through Language Modeling
%A Ansell, Alan
%A Bravo-Marquez, Felipe
%A Pfahringer, Bernhard
%Y Merlo, Paola
%Y Tiedemann, Jorg
%Y Tsarfaty, Reut
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F ansell-etal-2021-polylm
%X To avoid the “meaning conflation deficiency” of word embeddings, a number of models have aimed to embed individual word senses. These methods at one time performed well on tasks such as word sense induction (WSI), but they have since been overtaken by task-specific techniques which exploit contextualized embeddings. However, sense embeddings and contextualization need not be mutually exclusive. We introduce PolyLM, a method which formulates the task of learning sense embeddings as a language modeling problem, allowing contextualization techniques to be applied. PolyLM is based on two underlying assumptions about word senses: firstly, that the probability of a word occurring in a given context is equal to the sum of the probabilities of its individual senses occurring; and secondly, that for a given occurrence of a word, one of its senses tends to be much more plausible in the context than the others. We evaluate PolyLM on WSI, showing that it performs considerably better than previous sense embedding techniques, and matches the current state-of-the-art specialized WSI method despite having six times fewer parameters. Code and pre-trained models are available at https://github.com/AlanAnsell/PolyLM.
%R 10.18653/v1/2021.eacl-main.45
%U https://aclanthology.org/2021.eacl-main.45
%U https://doi.org/10.18653/v1/2021.eacl-main.45
%P 563-574
Markdown (Informal)
[PolyLM: Learning about Polysemy through Language Modeling](https://aclanthology.org/2021.eacl-main.45) (Ansell et al., EACL 2021)
ACL
- Alan Ansell, Felipe Bravo-Marquez, and Bernhard Pfahringer. 2021. PolyLM: Learning about Polysemy through Language Modeling. In Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pages 563–574, Online. Association for Computational Linguistics.