@inproceedings{das-srihari-2024-uniwiz,
title = "{UNIWIZ}: A Unified Large Language Model Orchestrated Wizard for Safe Knowledge Grounded Conversations",
author = "Das, Souvik and
Srihari, Rohini",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.102",
doi = "10.18653/v1/2024.findings-acl.102",
pages = "1749--1762",
abstract = "Large Language Models (LLMs) have made significant progress in integrating safety and knowledge alignment. However, adversarial actors can manipulate these models into generating unsafe responses, and excessive safety alignment can lead to unintended hallucinations. To address these challenges, we introduce UniWiz, a novel 2-step data orchestration framework that unifies safety and knowledge data generation. We propose a {``}safety-priming{''} method to generate synthetic safety data and overcome safety bottlenecks. We also inject relevant knowledge into conversations by retrieving factual information from curated sources. UniWiz dataset consists of 17,638 quality-controlled conversations and 10,000 augmented preference data. Pretrained models fine-tuned on UniWiz show improvements across various metrics and outperform state-of-the-art instruction-tuned models trained on much larger datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="das-srihari-2024-uniwiz">
<titleInfo>
<title>UNIWIZ: A Unified Large Language Model Orchestrated Wizard for Safe Knowledge Grounded Conversations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Souvik</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rohini</namePart>
<namePart type="family">Srihari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) have made significant progress in integrating safety and knowledge alignment. However, adversarial actors can manipulate these models into generating unsafe responses, and excessive safety alignment can lead to unintended hallucinations. To address these challenges, we introduce UniWiz, a novel 2-step data orchestration framework that unifies safety and knowledge data generation. We propose a “safety-priming” method to generate synthetic safety data and overcome safety bottlenecks. We also inject relevant knowledge into conversations by retrieving factual information from curated sources. UniWiz dataset consists of 17,638 quality-controlled conversations and 10,000 augmented preference data. Pretrained models fine-tuned on UniWiz show improvements across various metrics and outperform state-of-the-art instruction-tuned models trained on much larger datasets.</abstract>
<identifier type="citekey">das-srihari-2024-uniwiz</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.102</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.102</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>1749</start>
<end>1762</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UNIWIZ: A Unified Large Language Model Orchestrated Wizard for Safe Knowledge Grounded Conversations
%A Das, Souvik
%A Srihari, Rohini
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F das-srihari-2024-uniwiz
%X Large Language Models (LLMs) have made significant progress in integrating safety and knowledge alignment. However, adversarial actors can manipulate these models into generating unsafe responses, and excessive safety alignment can lead to unintended hallucinations. To address these challenges, we introduce UniWiz, a novel 2-step data orchestration framework that unifies safety and knowledge data generation. We propose a “safety-priming” method to generate synthetic safety data and overcome safety bottlenecks. We also inject relevant knowledge into conversations by retrieving factual information from curated sources. UniWiz dataset consists of 17,638 quality-controlled conversations and 10,000 augmented preference data. Pretrained models fine-tuned on UniWiz show improvements across various metrics and outperform state-of-the-art instruction-tuned models trained on much larger datasets.
%R 10.18653/v1/2024.findings-acl.102
%U https://aclanthology.org/2024.findings-acl.102
%U https://doi.org/10.18653/v1/2024.findings-acl.102
%P 1749-1762
Markdown (Informal)
[UNIWIZ: A Unified Large Language Model Orchestrated Wizard for Safe Knowledge Grounded Conversations](https://aclanthology.org/2024.findings-acl.102) (Das & Srihari, Findings 2024)
ACL