@inproceedings{kumar-etal-2024-subtle,
title = "Subtle Biases Need Subtler Measures: Dual Metrics for Evaluating Representative and Affinity Bias in Large Language Models",
author = "Kumar, Abhishek and
Yunusov, Sarfaroz and
Emami, Ali",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.acl-long.23",
doi = "10.18653/v1/2024.acl-long.23",
pages = "375--392",
abstract = "Research on Large Language Models (LLMs) has often neglected subtle biases that, although less apparent, can significantly influence the models{'} outputs toward particular social narratives. This study addresses two such biases within LLMs: representative bias, which denotes a tendency of LLMs to generate outputs that mirror the experiences of certain identity groups, and affinity bias, reflecting the models{'} evaluative preferences for specific narratives or viewpoints. We introduce two novel metrics to measure these biases: the Representative Bias Score (RBS) and the Affinity Bias Score (ABS), and present the Creativity-Oriented Generation Suite (CoGS), a collection of open-ended tasks such as short story writing and poetry composition, designed with customized rubrics to detect these subtle biases. Our analysis uncovers marked representative biases in prominent LLMs, with a preference for identities associated with being white, straight, and men. Furthermore, our investigation of affinity bias reveals distinctive evaluative patterns within each model, akin to {`}bias fingerprints{'}. This trend is also seen in human evaluators, highlighting a complex interplay between human and machine bias perceptions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-etal-2024-subtle">
<titleInfo>
<title>Subtle Biases Need Subtler Measures: Dual Metrics for Evaluating Representative and Affinity Bias in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhishek</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarfaroz</namePart>
<namePart type="family">Yunusov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Emami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Research on Large Language Models (LLMs) has often neglected subtle biases that, although less apparent, can significantly influence the models’ outputs toward particular social narratives. This study addresses two such biases within LLMs: representative bias, which denotes a tendency of LLMs to generate outputs that mirror the experiences of certain identity groups, and affinity bias, reflecting the models’ evaluative preferences for specific narratives or viewpoints. We introduce two novel metrics to measure these biases: the Representative Bias Score (RBS) and the Affinity Bias Score (ABS), and present the Creativity-Oriented Generation Suite (CoGS), a collection of open-ended tasks such as short story writing and poetry composition, designed with customized rubrics to detect these subtle biases. Our analysis uncovers marked representative biases in prominent LLMs, with a preference for identities associated with being white, straight, and men. Furthermore, our investigation of affinity bias reveals distinctive evaluative patterns within each model, akin to ‘bias fingerprints’. This trend is also seen in human evaluators, highlighting a complex interplay between human and machine bias perceptions.</abstract>
<identifier type="citekey">kumar-etal-2024-subtle</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.23</identifier>
<location>
<url>https://aclanthology.org/2024.acl-long.23</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>375</start>
<end>392</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Subtle Biases Need Subtler Measures: Dual Metrics for Evaluating Representative and Affinity Bias in Large Language Models
%A Kumar, Abhishek
%A Yunusov, Sarfaroz
%A Emami, Ali
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F kumar-etal-2024-subtle
%X Research on Large Language Models (LLMs) has often neglected subtle biases that, although less apparent, can significantly influence the models’ outputs toward particular social narratives. This study addresses two such biases within LLMs: representative bias, which denotes a tendency of LLMs to generate outputs that mirror the experiences of certain identity groups, and affinity bias, reflecting the models’ evaluative preferences for specific narratives or viewpoints. We introduce two novel metrics to measure these biases: the Representative Bias Score (RBS) and the Affinity Bias Score (ABS), and present the Creativity-Oriented Generation Suite (CoGS), a collection of open-ended tasks such as short story writing and poetry composition, designed with customized rubrics to detect these subtle biases. Our analysis uncovers marked representative biases in prominent LLMs, with a preference for identities associated with being white, straight, and men. Furthermore, our investigation of affinity bias reveals distinctive evaluative patterns within each model, akin to ‘bias fingerprints’. This trend is also seen in human evaluators, highlighting a complex interplay between human and machine bias perceptions.
%R 10.18653/v1/2024.acl-long.23
%U https://aclanthology.org/2024.acl-long.23
%U https://doi.org/10.18653/v1/2024.acl-long.23
%P 375-392
Markdown (Informal)
[Subtle Biases Need Subtler Measures: Dual Metrics for Evaluating Representative and Affinity Bias in Large Language Models](https://aclanthology.org/2024.acl-long.23) (Kumar et al., ACL 2024)
ACL