@inproceedings{mubarak-etal-2017-abusive,
title = "Abusive Language Detection on {A}rabic Social Media",
author = "Mubarak, Hamdy and
Darwish, Kareem and
Magdy, Walid",
editor = "Waseem, Zeerak and
Chung, Wendy Hui Kyong and
Hovy, Dirk and
Tetreault, Joel",
booktitle = "Proceedings of the First Workshop on Abusive Language Online",
month = aug,
year = "2017",
address = "Vancouver, BC, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-3008",
doi = "10.18653/v1/W17-3008",
pages = "52--56",
abstract = "In this paper, we present our work on detecting abusive language on Arabic social media. We extract a list of obscene words and hashtags using common patterns used in offensive and rude communications. We also classify Twitter users according to whether they use any of these words or not in their tweets. We expand the list of obscene words using this classification, and we report results on a newly created dataset of classified Arabic tweets (obscene, offensive, and clean). We make this dataset freely available for research, in addition to the list of obscene words and hashtags. We are also publicly releasing a large corpus of classified user comments that were deleted from a popular Arabic news site due to violations the site{'}s rules and guidelines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mubarak-etal-2017-abusive">
<titleInfo>
<title>Abusive Language Detection on Arabic Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hamdy</namePart>
<namePart type="family">Mubarak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Abusive Language Online</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Waseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wendy</namePart>
<namePart type="given">Hui</namePart>
<namePart type="given">Kyong</namePart>
<namePart type="family">Chung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, BC, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present our work on detecting abusive language on Arabic social media. We extract a list of obscene words and hashtags using common patterns used in offensive and rude communications. We also classify Twitter users according to whether they use any of these words or not in their tweets. We expand the list of obscene words using this classification, and we report results on a newly created dataset of classified Arabic tweets (obscene, offensive, and clean). We make this dataset freely available for research, in addition to the list of obscene words and hashtags. We are also publicly releasing a large corpus of classified user comments that were deleted from a popular Arabic news site due to violations the site’s rules and guidelines.</abstract>
<identifier type="citekey">mubarak-etal-2017-abusive</identifier>
<identifier type="doi">10.18653/v1/W17-3008</identifier>
<location>
<url>https://aclanthology.org/W17-3008</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>52</start>
<end>56</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Abusive Language Detection on Arabic Social Media
%A Mubarak, Hamdy
%A Darwish, Kareem
%A Magdy, Walid
%Y Waseem, Zeerak
%Y Chung, Wendy Hui Kyong
%Y Hovy, Dirk
%Y Tetreault, Joel
%S Proceedings of the First Workshop on Abusive Language Online
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, BC, Canada
%F mubarak-etal-2017-abusive
%X In this paper, we present our work on detecting abusive language on Arabic social media. We extract a list of obscene words and hashtags using common patterns used in offensive and rude communications. We also classify Twitter users according to whether they use any of these words or not in their tweets. We expand the list of obscene words using this classification, and we report results on a newly created dataset of classified Arabic tweets (obscene, offensive, and clean). We make this dataset freely available for research, in addition to the list of obscene words and hashtags. We are also publicly releasing a large corpus of classified user comments that were deleted from a popular Arabic news site due to violations the site’s rules and guidelines.
%R 10.18653/v1/W17-3008
%U https://aclanthology.org/W17-3008
%U https://doi.org/10.18653/v1/W17-3008
%P 52-56
Markdown (Informal)
[Abusive Language Detection on Arabic Social Media](https://aclanthology.org/W17-3008) (Mubarak et al., ALW 2017)
ACL
- Hamdy Mubarak, Kareem Darwish, and Walid Magdy. 2017. Abusive Language Detection on Arabic Social Media. In Proceedings of the First Workshop on Abusive Language Online, pages 52–56, Vancouver, BC, Canada. Association for Computational Linguistics.