Publications
Search
Bonial, Claire; Lukin, Stephanie M.; Abrams, Mitchell; Baker, Anthony; Donatelli, Lucia; Foots, Ashley; Hayes, Cory J.; Henry, Cassidy; Hudson, Taylor; Marge, Matthew; Pollard, Kimberly A.; Artstein, Ron; Traum, David; Voss, Clare R.
Human–robot dialogue annotation for multi-modal common ground Journal Article
In: Lang Resources & Evaluation, 2024, ISSN: 1574-020X, 1574-0218.
@article{bonial_humanrobot_2024,
title = {Human–robot dialogue annotation for multi-modal common ground},
author = {Claire Bonial and Stephanie M. Lukin and Mitchell Abrams and Anthony Baker and Lucia Donatelli and Ashley Foots and Cory J. Hayes and Cassidy Henry and Taylor Hudson and Matthew Marge and Kimberly A. Pollard and Ron Artstein and David Traum and Clare R. Voss},
url = {https://link.springer.com/10.1007/s10579-024-09784-2},
doi = {10.1007/s10579-024-09784-2},
issn = {1574-020X, 1574-0218},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Lang Resources & Evaluation},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Georgila, Kallirroi
Comparing Pre-Trained Embeddings and Domain-Independent Features for Regression-Based Evaluation of Task-Oriented Dialogue Systems Proceedings Article
In: Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue, pp. 610–623, Association for Computational Linguistics, Kyoto, Japan, 2024.
@inproceedings{georgila_comparing_2024,
title = {Comparing Pre-Trained Embeddings and Domain-Independent Features for Regression-Based Evaluation of Task-Oriented Dialogue Systems},
author = {Kallirroi Georgila},
url = {https://aclanthology.org/2024.sigdial-1.52},
doi = {10.18653/v1/2024.sigdial-1.52},
year = {2024},
date = {2024-09-01},
urldate = {2024-10-15},
booktitle = {Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue},
pages = {610–623},
publisher = {Association for Computational Linguistics},
address = {Kyoto, Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Gunasekara, Chulaka; Kim, Seokhwan; D'Haro, Luis Fernando; Rastogi, Abhinav; Chen, Yun-Nung; Eric, Mihail; Hedayatnia, Behnam; Gopalakrishnan, Karthik; Liu, Yang; Huang, Chao-Wei; Hakkani-Tür, Dilek; Li, Jinchao; Zhu, Qi; Luo, Lingxiao; Liden, Lars; Huang, Kaili; Shayandeh, Shahin; Liang, Runze; Peng, Baolin; Zhang, Zheng; Shukla, Swadheen; Huang, Minlie; Gao, Jianfeng; Mehri, Shikib; Feng, Yulan; Gordon, Carla; Alavi, Seyed Hossein; Traum, David; Eskenazi, Maxine; Beirami, Ahmad; Cho, Eunjoon; Crook, Paul A.; De, Ankita; Geramifard, Alborz; Kottur, Satwik; Moon, Seungwhan; Poddar, Shivani; Subba, Rajen
Overview of the Ninth Dialog System Technology Challenge: DSTC9 Journal Article
In: IEEE/ACM Trans. Audio Speech Lang. Process., pp. 1–10, 2024, ISSN: 2329-9290, 2329-9304.
@article{gunasekara_overview_2024,
title = {Overview of the Ninth Dialog System Technology Challenge: DSTC9},
author = {Chulaka Gunasekara and Seokhwan Kim and Luis Fernando D'Haro and Abhinav Rastogi and Yun-Nung Chen and Mihail Eric and Behnam Hedayatnia and Karthik Gopalakrishnan and Yang Liu and Chao-Wei Huang and Dilek Hakkani-Tür and Jinchao Li and Qi Zhu and Lingxiao Luo and Lars Liden and Kaili Huang and Shahin Shayandeh and Runze Liang and Baolin Peng and Zheng Zhang and Swadheen Shukla and Minlie Huang and Jianfeng Gao and Shikib Mehri and Yulan Feng and Carla Gordon and Seyed Hossein Alavi and David Traum and Maxine Eskenazi and Ahmad Beirami and Eunjoon Cho and Paul A. Crook and Ankita De and Alborz Geramifard and Satwik Kottur and Seungwhan Moon and Shivani Poddar and Rajen Subba},
url = {https://ieeexplore.ieee.org/document/10595468/},
doi = {10.1109/TASLP.2024.3426331},
issn = {2329-9290, 2329-9304},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-15},
journal = {IEEE/ACM Trans. Audio Speech Lang. Process.},
pages = {1–10},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Artstein, Ron; Chen, Elizabeth
Augmenting Training Data for a Virtual Character Using GPT-3.5 Proceedings Article
In: Tyhe Florida Artificial Intelligence Research Society, 2024.
@inproceedings{artstein_augmenting_nodate,
title = {Augmenting Training Data for a Virtual Character Using GPT-3.5},
author = {Ron Artstein and Elizabeth Chen},
url = {https://journals.flvc.org/FLAIRS/article/view/135552},
year = {2024},
date = {2024-05-13},
volume = {37},
publisher = {Tyhe Florida Artificial Intelligence Research Society},
abstract = {This paper compares different methods of using a large lan-guage model (GPT-3.5) for creating synthetic training datafor a retrieval-based conversational character. The trainingdata are in the form of linked questions and answers, whichallow a classifier to retrieve a pre-recorded answer to an un-seen question; the intuition is that a large language modelcould predict what human users might ask, thus saving theeffort of collecting real user questions as training data. Re-sults show small improvements in test performance for allsynthetic datasets. However, a classifier trained on only smallamounts of collected user data resulted in a higher F-scorethan the classifiers trained on much larger amounts of syn-thetic data generated using GPT-3.5. Based on these results,we see a potential in using large language models for gener-ating training data, but at this point it is not as valuable ascollecting actual user data for training.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Georgila, Kallirroi; Traum, David
Evaluation of Off-the-shelf Whisper Models for Speech Recognition Across Diverse Dialogue Domains Proceedings Article
In: Proceedings of the 14th International Workshop on Spoken Dialogue Systems Technology, Sapporo, Japan, 2024.
@inproceedings{georgila_evaluation_2024,
title = {Evaluation of Off-the-shelf Whisper Models for Speech Recognition Across Diverse Dialogue Domains},
author = {Kallirroi Georgila and David Traum},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://kgeorgila.github.io/publications/georgila_iwsds24.pdf},
year = {2024},
date = {2024-03-01},
booktitle = {Proceedings of the 14th International Workshop on Spoken Dialogue Systems Technology},
address = {Sapporo, Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Harris, Vera; Braggs, Robert; Traum, David
I’m not sure I heard you right, but I think I know what you mean – investigations into the impact of speech recognition errors on response selection for a virtual human. Proceedings Article
In: Sapporo Japan, 2024.
@inproceedings{harris_im_2024,
title = {I’m not sure I heard you right, but I think I know what you mean – investigations into the impact of speech recognition errors on response selection for a virtual human.},
author = {Vera Harris and Robert Braggs and David Traum},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://people.ict.usc.edu/~traum/Papers/23-harris-iwsds2024.pdf},
year = {2024},
date = {2024-03-01},
address = {Sapporo Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Brixey, Jacqueline; Traum, David
Why should a dialogue system speak more than one language? Proceedings Article
In: Sapporo Japan, 2024.
@inproceedings{brixey_why_2024,
title = {Why should a dialogue system speak more than one language?},
author = {Jacqueline Brixey and David Traum},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://people.ict.usc.edu/~traum/Papers/24-Why%20should%20a%20dialogue%20system%20speak%20more%20than%20one%20language.pdf},
year = {2024},
date = {2024-03-01},
address = {Sapporo Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Singh, Ishika; Traum, David; Thomason, Jesse
TwoStep: Multi-agent Task Planning using Classical Planners and Large Language Models Miscellaneous
2024, (arXiv:2403.17246 [cs]).
@misc{singh_twostep_2024,
title = {TwoStep: Multi-agent Task Planning using Classical Planners and Large Language Models},
author = {Ishika Singh and David Traum and Jesse Thomason},
url = {http://arxiv.org/abs/2403.17246},
year = {2024},
date = {2024-03-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {Classical planning formulations like the Planning Domain Definition Language (PDDL) admit action sequences guaranteed to achieve a goal state given an initial state if any are possible. However, reasoning problems defined in PDDL do not capture temporal aspects of action taking, for example that two agents in the domain can execute an action simultaneously if postconditions of each do not interfere with preconditions of the other. A human expert can decompose a goal into largely independent constituent parts and assign each agent to one of these subgoals to take advantage of simultaneous actions for faster execution of plan steps, each using only single agent planning. By contrast, large language models (LLMs) used for directly inferring plan steps do not guarantee execution success, but do leverage commonsense reasoning to assemble action sequences. We combine the strengths of classical planning and LLMs by approximating human intuitions for two-agent planning goal decomposition. We demonstrate that LLM-based goal decomposition leads to faster planning times than solving multi-agent PDDL problems directly while simultaneously achieving fewer plan execution steps than a single agent plan alone and preserving execution success. Additionally, we find that LLM-based approximations of subgoals can achieve similar multi-agent execution steps than those specified by human experts. Website and resources at https://glamor-usc.github.io/twostep},
note = {arXiv:2403.17246 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Frummet, Alexander; Speggiorin, Alessandro; Elsweiler, David; Leuski, Anton; Dalton, Jeff
Cooking with Conversation: Enhancing User Engagement and Learning with a Knowledge-Enhancing Assistant Journal Article
In: ACM Trans. Inf. Syst., pp. 3649500, 2024, ISSN: 1046-8188, 1558-2868.
@article{frummet_cooking_2024,
title = {Cooking with Conversation: Enhancing User Engagement and Learning with a Knowledge-Enhancing Assistant},
author = {Alexander Frummet and Alessandro Speggiorin and David Elsweiler and Anton Leuski and Jeff Dalton},
url = {https://dl.acm.org/doi/10.1145/3649500},
doi = {10.1145/3649500},
issn = {1046-8188, 1558-2868},
year = {2024},
date = {2024-03-01},
urldate = {2024-04-16},
journal = {ACM Trans. Inf. Syst.},
pages = {3649500},
abstract = {We present two empirical studies to investigate users’ expectations and behaviours when using digital assistants, such as Alexa and Google Home, in a kitchen context: First, a survey (N=200) queries participants on their expectations for the kinds of information that such systems should be able to provide. While consensus exists on expecting information about cooking steps and processes, younger participants who enjoy cooking express a higher likelihood of expecting details on food history or the science of cooking. In a follow-up Wizard-of-Oz study (N = 48), users were guided through the steps of a recipe either by an
active
wizard that alerted participants to information it could provide or a
passive
wizard who only answered questions that were provided by the user. The
active
policy led to almost double the number of conversational utterances and 1.5 times more knowledge-related user questions compared to the
passive
policy. Also, it resulted in 1.7 times more knowledge communicated than the
passive
policy. We discuss the findings in the context of related work and reveal implications for the design and use of such assistants for cooking and other purposes such as DIY and craft tasks, as well as the lessons we learned for evaluating such systems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
active
wizard that alerted participants to information it could provide or a
passive
wizard who only answered questions that were provided by the user. The
active
policy led to almost double the number of conversational utterances and 1.5 times more knowledge-related user questions compared to the
passive
policy. Also, it resulted in 1.7 times more knowledge communicated than the
passive
policy. We discuss the findings in the context of related work and reveal implications for the design and use of such assistants for cooking and other purposes such as DIY and craft tasks, as well as the lessons we learned for evaluating such systems.
Gilani, Setareh Nasihati; Pollard, Kimberly; Traum, David
Multimodal Prediction of User's Performance in High-Stress Dialogue Interactions Proceedings Article
In: International Cconference on Multimodal Interaction, pp. 71–75, ACM, Paris France, 2023, ISBN: 979-8-4007-0321-8.
@inproceedings{nasihati_gilani_multimodal_2023,
title = {Multimodal Prediction of User's Performance in High-Stress Dialogue Interactions},
author = {Setareh Nasihati Gilani and Kimberly Pollard and David Traum},
url = {https://dl.acm.org/doi/10.1145/3610661.3617166},
doi = {10.1145/3610661.3617166},
isbn = {979-8-4007-0321-8},
year = {2023},
date = {2023-10-01},
urldate = {2023-12-07},
booktitle = {International Cconference on Multimodal Interaction},
pages = {71–75},
publisher = {ACM},
address = {Paris France},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lukin, Stephanie M.; Pollard, Kimberly A.; Bonial, Claire; Hudson, Taylor; Arstein, Ron; Voss, Clare; Traum, David
Navigating to Success in Multi-Modal Human-Robot Collaboration: Analysis and Corpus Release Miscellaneous
2023, (arXiv:2310.17568 [cs]).
@misc{lukin_navigating_2023,
title = {Navigating to Success in Multi-Modal Human-Robot Collaboration: Analysis and Corpus Release},
author = {Stephanie M. Lukin and Kimberly A. Pollard and Claire Bonial and Taylor Hudson and Ron Arstein and Clare Voss and David Traum},
url = {http://arxiv.org/abs/2310.17568},
year = {2023},
date = {2023-10-01},
urldate = {2023-12-07},
publisher = {arXiv},
abstract = {Human-guided robotic exploration is a useful approach to gathering information at remote locations, especially those that might be too risky, inhospitable, or inaccessible for humans. Maintaining common ground between the remotely-located partners is a challenge, one that can be facilitated by multi-modal communication. In this paper, we explore how participants utilized multiple modalities to investigate a remote location with the help of a robotic partner. Participants issued spoken natural language instructions and received from the robot: text-based feedback, continuous 2D LIDAR mapping, and upon-request static photographs. We noticed that different strategies were adopted in terms of use of the modalities, and hypothesize that these differences may be correlated with success at several exploration sub-tasks. We found that requesting photos may have improved the identification and counting of some key entities (doorways in particular) and that this strategy did not hinder the amount of overall area exploration. Future work with larger samples may reveal the effects of more nuanced photo and dialogue strategies, which can inform the training of robotic agents. Additionally, we announce the release of our unique multi-modal corpus of human-robot communication in an exploration context: SCOUT, the Situated Corpus on Understanding Transactions.},
note = {arXiv:2310.17568 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Gainer, Alesia; Aptaker, Allison; Artstein, Ron; Cobbins, David; Core, Mark; Gordon, Carla; Leuski, Anton; Li, Zongjian; Merchant, Chirag; Nelson, David; Soleymani, Mohammad; Traum, David
DIVIS: Digital Interactive Victim Intake Simulator Proceedings Article
In: Proceedings of the 23rd ACM International Conference on Intelligent Virtual Agents, pp. 1–2, ACM, Würzburg Germany, 2023, ISBN: 978-1-4503-9994-4.
@inproceedings{gainer_divis_2023,
title = {DIVIS: Digital Interactive Victim Intake Simulator},
author = {Alesia Gainer and Allison Aptaker and Ron Artstein and David Cobbins and Mark Core and Carla Gordon and Anton Leuski and Zongjian Li and Chirag Merchant and David Nelson and Mohammad Soleymani and David Traum},
url = {https://dl.acm.org/doi/10.1145/3570945.3607328},
doi = {10.1145/3570945.3607328},
isbn = {978-1-4503-9994-4},
year = {2023},
date = {2023-09-01},
urldate = {2024-02-20},
booktitle = {Proceedings of the 23rd ACM International Conference on Intelligent Virtual Agents},
pages = {1–2},
publisher = {ACM},
address = {Würzburg Germany},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Pal, Debaditya; Leuski, Anton; Traum, David
Comparing Statistical Models for Retrieval based Question-answering Dialogue: BERT vs Relevance Models Journal Article
In: FLAIRS, vol. 36, 2023, ISSN: 2334-0762.
@article{pal_comparing_2023,
title = {Comparing Statistical Models for Retrieval based Question-answering Dialogue: BERT vs Relevance Models},
author = {Debaditya Pal and Anton Leuski and David Traum},
url = {https://journals.flvc.org/FLAIRS/article/view/133386},
doi = {10.32473/flairs.36.133386},
issn = {2334-0762},
year = {2023},
date = {2023-05-01},
urldate = {2023-08-23},
journal = {FLAIRS},
volume = {36},
abstract = {In this paper, we compare the performance of four models in a retrieval based question answering dialogue task on two moderately sized corpora (textasciitilde 10,000 utterances). One model is a statistical model and uses cross language relevance while the others are deep neural networks utilizing the BERT architecture along with different retrieval methods. The statistical model has previously outperformed LSTM based neural networks in a similar task whereas BERT has been proven to perform well on a variety of NLP tasks, achieving state-of-the-art results in many of them. Results show that the statistical cross language relevance model outperforms the BERT based architectures in learning question-answer mappings. BERT achieves better results by mapping new questions to existing questions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Georgila, Kallirroi
Considerations for Child Speech Synthesis for Dialogue Systems Proceedings Article
In: Los Angeles, CA, 2023.
@inproceedings{georgila_considerations_2023,
title = {Considerations for Child Speech Synthesis for Dialogue Systems},
author = {Kallirroi Georgila},
url = {https://kgeorgila.github.io/publications/georgila_aiaic23.pdf},
year = {2023},
date = {2023-03-01},
address = {Los Angeles, CA},
abstract = {We present a number of important issues for consideration with regard to child speech synthesis for dialogue systems. We specifically discuss challenges in building child synthetic voices compared to adult synthetic voices, synthesizing expressive conversational speech, and evaluating speech synthesis quality.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Traum, David
Socially Interactive Agent Dialogue Book Section
In: The Handbook on Socially Interactive Agents: 20 years of Research on Embodied Conversational Agents, Intelligent Virtual Agents, and Social Robotics Volume 2: Interactivity, Platforms, Application, vol. 48, pp. 45–76, Association for Computing Machinery, New York, NY, USA, 2022, ISBN: 978-1-4503-9896-1.
@incollection{traum_socially_2022,
title = {Socially Interactive Agent Dialogue},
author = {David Traum},
url = {https://doi.org/10.1145/3563659.3563663},
isbn = {978-1-4503-9896-1},
year = {2022},
date = {2022-11-01},
urldate = {2023-03-31},
booktitle = {The Handbook on Socially Interactive Agents: 20 years of Research on Embodied Conversational Agents, Intelligent Virtual Agents, and Social Robotics Volume 2: Interactivity, Platforms, Application},
volume = {48},
pages = {45–76},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
edition = {1},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Brixey, Jacqueline; Traum, David
Towards an Automatic Speech Recognizer for the Choctaw language Proceedings Article
In: 1st Workshop on Speech for Social Good (S4SG), pp. 6–9, ISCA, 2022.
@inproceedings{brixey_towards_2022,
title = {Towards an Automatic Speech Recognizer for the Choctaw language},
author = {Jacqueline Brixey and David Traum},
url = {https://www.isca-speech.org/archive/s4sg_2022/brixey22_s4sg.html},
doi = {10.21437/S4SG.2022-2},
year = {2022},
date = {2022-09-01},
urldate = {2023-03-31},
booktitle = {1st Workshop on Speech for Social Good (S4SG)},
pages = {6–9},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Georgila, Kallirroi
Comparing Regression Methods for Dialogue System Evaluation on a Richly Annotated Corpus Proceedings Article
In: Proceedings of the 26th Workshop on the Semantics and Pragmatics of Dialogue - Full Papers, 2022.
@inproceedings{georgila_comparing_2022,
title = {Comparing Regression Methods for Dialogue System Evaluation on a Richly Annotated Corpus},
author = {Kallirroi Georgila},
url = {http://semdial.org/anthology/papers/Z/Z22/Z22-3011/},
year = {2022},
date = {2022-08-01},
urldate = {2023-03-31},
booktitle = {Proceedings of the 26th Workshop on the Semantics and Pragmatics of Dialogue - Full Papers},
abstract = {Wecompare various state-of-the-art regression methods for predicting user ratings of their interaction with a dialogue system using a richly annotated corpus. We vary the size of the training data and, in particular for kernel-based methods, we vary the type of kernel used. Furthermore, we experiment with various domainindependent features, including feature combinations that do not rely on complex annotations. We present detailed results in terms of root mean square error, and Pearson’s r and Spearman’s ρ correlations. Our results show that in many cases Gaussian Process Regression leads to modest but statistically significant gains compared to Support Vector Regression (a strong baseline), and that the type of kernel used matters. The gains are even larger when compared to linear regression. The larger the training data set the higher the gains but for some cases more data may result in over-fitting. Finally, some feature combinations work better than others but overall the best results are obtained when all features are used.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Speggiorin, Alessandro; Dalton, Jeffrey; Leuski, Anton
TaskMAD: A Platform for Multimodal Task-Centric Knowledge-Grounded Conversational Experimentation Proceedings Article
In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 3240–3244, ACM, Madrid Spain, 2022, ISBN: 978-1-4503-8732-3.
@inproceedings{speggiorin_taskmad_2022,
title = {TaskMAD: A Platform for Multimodal Task-Centric Knowledge-Grounded Conversational Experimentation},
author = {Alessandro Speggiorin and Jeffrey Dalton and Anton Leuski},
url = {https://dl.acm.org/doi/10.1145/3477495.3531679},
doi = {10.1145/3477495.3531679},
isbn = {978-1-4503-8732-3},
year = {2022},
date = {2022-07-01},
urldate = {2022-09-22},
booktitle = {Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {3240–3244},
publisher = {ACM},
address = {Madrid Spain},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Tur, Ada; Traum, David
Comparing Approaches to Language Understanding for Human-Robot Dialogue: An Error Taxonomy and Analysis Proceedings Article
In: Proceedings of the Thirteenth Language Resources and Evaluation Conference, pp. 5813–5820, European Language Resources Association, Marseille, France, 2022.
@inproceedings{tur_comparing_2022,
title = {Comparing Approaches to Language Understanding for Human-Robot Dialogue: An Error Taxonomy and Analysis},
author = {Ada Tur and David Traum},
url = {https://aclanthology.org/2022.lrec-1.625},
year = {2022},
date = {2022-06-01},
urldate = {2023-02-10},
booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference},
pages = {5813–5820},
publisher = {European Language Resources Association},
address = {Marseille, France},
abstract = {In this paper, we compare two different approaches to language understanding for a human-robot interaction domain in which a human commander gives navigation instructions to a robot. We contrast a relevance-based classifier with a GPT-2 model, using about 2000 input-output examples as training data. With this level of training data, the relevance-based model outperforms the GPT-2 based model 79% to 8%. We also present a taxonomy of types of errors made by each model, indicating that they have somewhat different strengths and weaknesses, so we also examine the potential for a combined model.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Tadimeti, Divya; Georgila, Kallirroi; Traum, David
Evaluation of Off-the-shelf Speech Recognizers on Different Accents in a Dialogue Domain Proceedings Article
In: Proceedings of the Language Resources and Evaluation Conference, pp. 6001–6008, European Language Resources Association, Marseille, France, 2022.
@inproceedings{tadimeti_evaluation_2022,
title = {Evaluation of Off-the-shelf Speech Recognizers on Different Accents in a Dialogue Domain},
author = {Divya Tadimeti and Kallirroi Georgila and David Traum},
url = {https://aclanthology.org/2022.lrec-1.645},
year = {2022},
date = {2022-06-01},
booktitle = {Proceedings of the Language Resources and Evaluation Conference},
pages = {6001–6008},
publisher = {European Language Resources Association},
address = {Marseille, France},
abstract = {We evaluate several publicly available off-the-shelf (commercial and research) automatic speech recognition (ASR) systems on dialogue agent-directed English speech from speakers with General American vs. non-American accents. Our results show that the performance of the ASR systems for non-American accents is considerably worse than for General American accents. Depending on the recognizer, the absolute difference in performance between General American accents and all non-American accents combined can vary approximately from 2% to 12%, with relative differences varying approximately between 16% and 49%. This drop in performance becomes even larger when we consider specific categories of non-American accents indicating a need for more diligent collection of and training on non-native English speaker data in order to narrow this performance gap. There are performance differences across ASR systems, and while the same general pattern holds, with more errors for non-American accents, there are some accents for which the best recognizer is different than in the overall case. We expect these results to be useful for dialogue system designers in developing more robust inclusive dialogue systems, and for ASR providers in taking into account performance requirements for different accents.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Filter
2024
Bonial, Claire; Lukin, Stephanie M.; Abrams, Mitchell; Baker, Anthony; Donatelli, Lucia; Foots, Ashley; Hayes, Cory J.; Henry, Cassidy; Hudson, Taylor; Marge, Matthew; Pollard, Kimberly A.; Artstein, Ron; Traum, David; Voss, Clare R.
Human–robot dialogue annotation for multi-modal common ground Journal Article
In: Lang Resources & Evaluation, 2024, ISSN: 1574-020X, 1574-0218.
@article{bonial_humanrobot_2024,
title = {Human–robot dialogue annotation for multi-modal common ground},
author = {Claire Bonial and Stephanie M. Lukin and Mitchell Abrams and Anthony Baker and Lucia Donatelli and Ashley Foots and Cory J. Hayes and Cassidy Henry and Taylor Hudson and Matthew Marge and Kimberly A. Pollard and Ron Artstein and David Traum and Clare R. Voss},
url = {https://link.springer.com/10.1007/s10579-024-09784-2},
doi = {10.1007/s10579-024-09784-2},
issn = {1574-020X, 1574-0218},
year = {2024},
date = {2024-11-01},
urldate = {2024-12-05},
journal = {Lang Resources & Evaluation},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Georgila, Kallirroi
Comparing Pre-Trained Embeddings and Domain-Independent Features for Regression-Based Evaluation of Task-Oriented Dialogue Systems Proceedings Article
In: Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue, pp. 610–623, Association for Computational Linguistics, Kyoto, Japan, 2024.
@inproceedings{georgila_comparing_2024,
title = {Comparing Pre-Trained Embeddings and Domain-Independent Features for Regression-Based Evaluation of Task-Oriented Dialogue Systems},
author = {Kallirroi Georgila},
url = {https://aclanthology.org/2024.sigdial-1.52},
doi = {10.18653/v1/2024.sigdial-1.52},
year = {2024},
date = {2024-09-01},
urldate = {2024-10-15},
booktitle = {Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue},
pages = {610–623},
publisher = {Association for Computational Linguistics},
address = {Kyoto, Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Gunasekara, Chulaka; Kim, Seokhwan; D'Haro, Luis Fernando; Rastogi, Abhinav; Chen, Yun-Nung; Eric, Mihail; Hedayatnia, Behnam; Gopalakrishnan, Karthik; Liu, Yang; Huang, Chao-Wei; Hakkani-Tür, Dilek; Li, Jinchao; Zhu, Qi; Luo, Lingxiao; Liden, Lars; Huang, Kaili; Shayandeh, Shahin; Liang, Runze; Peng, Baolin; Zhang, Zheng; Shukla, Swadheen; Huang, Minlie; Gao, Jianfeng; Mehri, Shikib; Feng, Yulan; Gordon, Carla; Alavi, Seyed Hossein; Traum, David; Eskenazi, Maxine; Beirami, Ahmad; Cho, Eunjoon; Crook, Paul A.; De, Ankita; Geramifard, Alborz; Kottur, Satwik; Moon, Seungwhan; Poddar, Shivani; Subba, Rajen
Overview of the Ninth Dialog System Technology Challenge: DSTC9 Journal Article
In: IEEE/ACM Trans. Audio Speech Lang. Process., pp. 1–10, 2024, ISSN: 2329-9290, 2329-9304.
@article{gunasekara_overview_2024,
title = {Overview of the Ninth Dialog System Technology Challenge: DSTC9},
author = {Chulaka Gunasekara and Seokhwan Kim and Luis Fernando D'Haro and Abhinav Rastogi and Yun-Nung Chen and Mihail Eric and Behnam Hedayatnia and Karthik Gopalakrishnan and Yang Liu and Chao-Wei Huang and Dilek Hakkani-Tür and Jinchao Li and Qi Zhu and Lingxiao Luo and Lars Liden and Kaili Huang and Shahin Shayandeh and Runze Liang and Baolin Peng and Zheng Zhang and Swadheen Shukla and Minlie Huang and Jianfeng Gao and Shikib Mehri and Yulan Feng and Carla Gordon and Seyed Hossein Alavi and David Traum and Maxine Eskenazi and Ahmad Beirami and Eunjoon Cho and Paul A. Crook and Ankita De and Alborz Geramifard and Satwik Kottur and Seungwhan Moon and Shivani Poddar and Rajen Subba},
url = {https://ieeexplore.ieee.org/document/10595468/},
doi = {10.1109/TASLP.2024.3426331},
issn = {2329-9290, 2329-9304},
year = {2024},
date = {2024-07-01},
urldate = {2024-08-15},
journal = {IEEE/ACM Trans. Audio Speech Lang. Process.},
pages = {1–10},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Artstein, Ron; Chen, Elizabeth
Augmenting Training Data for a Virtual Character Using GPT-3.5 Proceedings Article
In: Tyhe Florida Artificial Intelligence Research Society, 2024.
Abstract | Links | BibTeX | Tags:
@inproceedings{artstein_augmenting_nodate,
title = {Augmenting Training Data for a Virtual Character Using GPT-3.5},
author = {Ron Artstein and Elizabeth Chen},
url = {https://journals.flvc.org/FLAIRS/article/view/135552},
year = {2024},
date = {2024-05-13},
volume = {37},
publisher = {Tyhe Florida Artificial Intelligence Research Society},
abstract = {This paper compares different methods of using a large lan-guage model (GPT-3.5) for creating synthetic training datafor a retrieval-based conversational character. The trainingdata are in the form of linked questions and answers, whichallow a classifier to retrieve a pre-recorded answer to an un-seen question; the intuition is that a large language modelcould predict what human users might ask, thus saving theeffort of collecting real user questions as training data. Re-sults show small improvements in test performance for allsynthetic datasets. However, a classifier trained on only smallamounts of collected user data resulted in a higher F-scorethan the classifiers trained on much larger amounts of syn-thetic data generated using GPT-3.5. Based on these results,we see a potential in using large language models for gener-ating training data, but at this point it is not as valuable ascollecting actual user data for training.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Georgila, Kallirroi; Traum, David
Evaluation of Off-the-shelf Whisper Models for Speech Recognition Across Diverse Dialogue Domains Proceedings Article
In: Proceedings of the 14th International Workshop on Spoken Dialogue Systems Technology, Sapporo, Japan, 2024.
@inproceedings{georgila_evaluation_2024,
title = {Evaluation of Off-the-shelf Whisper Models for Speech Recognition Across Diverse Dialogue Domains},
author = {Kallirroi Georgila and David Traum},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://kgeorgila.github.io/publications/georgila_iwsds24.pdf},
year = {2024},
date = {2024-03-01},
booktitle = {Proceedings of the 14th International Workshop on Spoken Dialogue Systems Technology},
address = {Sapporo, Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Harris, Vera; Braggs, Robert; Traum, David
I’m not sure I heard you right, but I think I know what you mean – investigations into the impact of speech recognition errors on response selection for a virtual human. Proceedings Article
In: Sapporo Japan, 2024.
@inproceedings{harris_im_2024,
title = {I’m not sure I heard you right, but I think I know what you mean – investigations into the impact of speech recognition errors on response selection for a virtual human.},
author = {Vera Harris and Robert Braggs and David Traum},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://people.ict.usc.edu/~traum/Papers/23-harris-iwsds2024.pdf},
year = {2024},
date = {2024-03-01},
address = {Sapporo Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Brixey, Jacqueline; Traum, David
Why should a dialogue system speak more than one language? Proceedings Article
In: Sapporo Japan, 2024.
@inproceedings{brixey_why_2024,
title = {Why should a dialogue system speak more than one language?},
author = {Jacqueline Brixey and David Traum},
url = {chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://people.ict.usc.edu/~traum/Papers/24-Why%20should%20a%20dialogue%20system%20speak%20more%20than%20one%20language.pdf},
year = {2024},
date = {2024-03-01},
address = {Sapporo Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Singh, Ishika; Traum, David; Thomason, Jesse
TwoStep: Multi-agent Task Planning using Classical Planners and Large Language Models Miscellaneous
2024, (arXiv:2403.17246 [cs]).
Abstract | Links | BibTeX | Tags:
@misc{singh_twostep_2024,
title = {TwoStep: Multi-agent Task Planning using Classical Planners and Large Language Models},
author = {Ishika Singh and David Traum and Jesse Thomason},
url = {http://arxiv.org/abs/2403.17246},
year = {2024},
date = {2024-03-01},
urldate = {2024-08-15},
publisher = {arXiv},
abstract = {Classical planning formulations like the Planning Domain Definition Language (PDDL) admit action sequences guaranteed to achieve a goal state given an initial state if any are possible. However, reasoning problems defined in PDDL do not capture temporal aspects of action taking, for example that two agents in the domain can execute an action simultaneously if postconditions of each do not interfere with preconditions of the other. A human expert can decompose a goal into largely independent constituent parts and assign each agent to one of these subgoals to take advantage of simultaneous actions for faster execution of plan steps, each using only single agent planning. By contrast, large language models (LLMs) used for directly inferring plan steps do not guarantee execution success, but do leverage commonsense reasoning to assemble action sequences. We combine the strengths of classical planning and LLMs by approximating human intuitions for two-agent planning goal decomposition. We demonstrate that LLM-based goal decomposition leads to faster planning times than solving multi-agent PDDL problems directly while simultaneously achieving fewer plan execution steps than a single agent plan alone and preserving execution success. Additionally, we find that LLM-based approximations of subgoals can achieve similar multi-agent execution steps than those specified by human experts. Website and resources at https://glamor-usc.github.io/twostep},
note = {arXiv:2403.17246 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Frummet, Alexander; Speggiorin, Alessandro; Elsweiler, David; Leuski, Anton; Dalton, Jeff
Cooking with Conversation: Enhancing User Engagement and Learning with a Knowledge-Enhancing Assistant Journal Article
In: ACM Trans. Inf. Syst., pp. 3649500, 2024, ISSN: 1046-8188, 1558-2868.
Abstract | Links | BibTeX | Tags:
@article{frummet_cooking_2024,
title = {Cooking with Conversation: Enhancing User Engagement and Learning with a Knowledge-Enhancing Assistant},
author = {Alexander Frummet and Alessandro Speggiorin and David Elsweiler and Anton Leuski and Jeff Dalton},
url = {https://dl.acm.org/doi/10.1145/3649500},
doi = {10.1145/3649500},
issn = {1046-8188, 1558-2868},
year = {2024},
date = {2024-03-01},
urldate = {2024-04-16},
journal = {ACM Trans. Inf. Syst.},
pages = {3649500},
abstract = {We present two empirical studies to investigate users’ expectations and behaviours when using digital assistants, such as Alexa and Google Home, in a kitchen context: First, a survey (N=200) queries participants on their expectations for the kinds of information that such systems should be able to provide. While consensus exists on expecting information about cooking steps and processes, younger participants who enjoy cooking express a higher likelihood of expecting details on food history or the science of cooking. In a follow-up Wizard-of-Oz study (N = 48), users were guided through the steps of a recipe either by an
active
wizard that alerted participants to information it could provide or a
passive
wizard who only answered questions that were provided by the user. The
active
policy led to almost double the number of conversational utterances and 1.5 times more knowledge-related user questions compared to the
passive
policy. Also, it resulted in 1.7 times more knowledge communicated than the
passive
policy. We discuss the findings in the context of related work and reveal implications for the design and use of such assistants for cooking and other purposes such as DIY and craft tasks, as well as the lessons we learned for evaluating such systems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
active
wizard that alerted participants to information it could provide or a
passive
wizard who only answered questions that were provided by the user. The
active
policy led to almost double the number of conversational utterances and 1.5 times more knowledge-related user questions compared to the
passive
policy. Also, it resulted in 1.7 times more knowledge communicated than the
passive
policy. We discuss the findings in the context of related work and reveal implications for the design and use of such assistants for cooking and other purposes such as DIY and craft tasks, as well as the lessons we learned for evaluating such systems.
2023
Gilani, Setareh Nasihati; Pollard, Kimberly; Traum, David
Multimodal Prediction of User's Performance in High-Stress Dialogue Interactions Proceedings Article
In: International Cconference on Multimodal Interaction, pp. 71–75, ACM, Paris France, 2023, ISBN: 979-8-4007-0321-8.
@inproceedings{nasihati_gilani_multimodal_2023,
title = {Multimodal Prediction of User's Performance in High-Stress Dialogue Interactions},
author = {Setareh Nasihati Gilani and Kimberly Pollard and David Traum},
url = {https://dl.acm.org/doi/10.1145/3610661.3617166},
doi = {10.1145/3610661.3617166},
isbn = {979-8-4007-0321-8},
year = {2023},
date = {2023-10-01},
urldate = {2023-12-07},
booktitle = {International Cconference on Multimodal Interaction},
pages = {71–75},
publisher = {ACM},
address = {Paris France},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lukin, Stephanie M.; Pollard, Kimberly A.; Bonial, Claire; Hudson, Taylor; Arstein, Ron; Voss, Clare; Traum, David
Navigating to Success in Multi-Modal Human-Robot Collaboration: Analysis and Corpus Release Miscellaneous
2023, (arXiv:2310.17568 [cs]).
Abstract | Links | BibTeX | Tags:
@misc{lukin_navigating_2023,
title = {Navigating to Success in Multi-Modal Human-Robot Collaboration: Analysis and Corpus Release},
author = {Stephanie M. Lukin and Kimberly A. Pollard and Claire Bonial and Taylor Hudson and Ron Arstein and Clare Voss and David Traum},
url = {http://arxiv.org/abs/2310.17568},
year = {2023},
date = {2023-10-01},
urldate = {2023-12-07},
publisher = {arXiv},
abstract = {Human-guided robotic exploration is a useful approach to gathering information at remote locations, especially those that might be too risky, inhospitable, or inaccessible for humans. Maintaining common ground between the remotely-located partners is a challenge, one that can be facilitated by multi-modal communication. In this paper, we explore how participants utilized multiple modalities to investigate a remote location with the help of a robotic partner. Participants issued spoken natural language instructions and received from the robot: text-based feedback, continuous 2D LIDAR mapping, and upon-request static photographs. We noticed that different strategies were adopted in terms of use of the modalities, and hypothesize that these differences may be correlated with success at several exploration sub-tasks. We found that requesting photos may have improved the identification and counting of some key entities (doorways in particular) and that this strategy did not hinder the amount of overall area exploration. Future work with larger samples may reveal the effects of more nuanced photo and dialogue strategies, which can inform the training of robotic agents. Additionally, we announce the release of our unique multi-modal corpus of human-robot communication in an exploration context: SCOUT, the Situated Corpus on Understanding Transactions.},
note = {arXiv:2310.17568 [cs]},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Gainer, Alesia; Aptaker, Allison; Artstein, Ron; Cobbins, David; Core, Mark; Gordon, Carla; Leuski, Anton; Li, Zongjian; Merchant, Chirag; Nelson, David; Soleymani, Mohammad; Traum, David
DIVIS: Digital Interactive Victim Intake Simulator Proceedings Article
In: Proceedings of the 23rd ACM International Conference on Intelligent Virtual Agents, pp. 1–2, ACM, Würzburg Germany, 2023, ISBN: 978-1-4503-9994-4.
@inproceedings{gainer_divis_2023,
title = {DIVIS: Digital Interactive Victim Intake Simulator},
author = {Alesia Gainer and Allison Aptaker and Ron Artstein and David Cobbins and Mark Core and Carla Gordon and Anton Leuski and Zongjian Li and Chirag Merchant and David Nelson and Mohammad Soleymani and David Traum},
url = {https://dl.acm.org/doi/10.1145/3570945.3607328},
doi = {10.1145/3570945.3607328},
isbn = {978-1-4503-9994-4},
year = {2023},
date = {2023-09-01},
urldate = {2024-02-20},
booktitle = {Proceedings of the 23rd ACM International Conference on Intelligent Virtual Agents},
pages = {1–2},
publisher = {ACM},
address = {Würzburg Germany},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Pal, Debaditya; Leuski, Anton; Traum, David
Comparing Statistical Models for Retrieval based Question-answering Dialogue: BERT vs Relevance Models Journal Article
In: FLAIRS, vol. 36, 2023, ISSN: 2334-0762.
Abstract | Links | BibTeX | Tags:
@article{pal_comparing_2023,
title = {Comparing Statistical Models for Retrieval based Question-answering Dialogue: BERT vs Relevance Models},
author = {Debaditya Pal and Anton Leuski and David Traum},
url = {https://journals.flvc.org/FLAIRS/article/view/133386},
doi = {10.32473/flairs.36.133386},
issn = {2334-0762},
year = {2023},
date = {2023-05-01},
urldate = {2023-08-23},
journal = {FLAIRS},
volume = {36},
abstract = {In this paper, we compare the performance of four models in a retrieval based question answering dialogue task on two moderately sized corpora (textasciitilde 10,000 utterances). One model is a statistical model and uses cross language relevance while the others are deep neural networks utilizing the BERT architecture along with different retrieval methods. The statistical model has previously outperformed LSTM based neural networks in a similar task whereas BERT has been proven to perform well on a variety of NLP tasks, achieving state-of-the-art results in many of them. Results show that the statistical cross language relevance model outperforms the BERT based architectures in learning question-answer mappings. BERT achieves better results by mapping new questions to existing questions.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Georgila, Kallirroi
Considerations for Child Speech Synthesis for Dialogue Systems Proceedings Article
In: Los Angeles, CA, 2023.
Abstract | Links | BibTeX | Tags:
@inproceedings{georgila_considerations_2023,
title = {Considerations for Child Speech Synthesis for Dialogue Systems},
author = {Kallirroi Georgila},
url = {https://kgeorgila.github.io/publications/georgila_aiaic23.pdf},
year = {2023},
date = {2023-03-01},
address = {Los Angeles, CA},
abstract = {We present a number of important issues for consideration with regard to child speech synthesis for dialogue systems. We specifically discuss challenges in building child synthetic voices compared to adult synthetic voices, synthesizing expressive conversational speech, and evaluating speech synthesis quality.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Traum, David
Socially Interactive Agent Dialogue Book Section
In: The Handbook on Socially Interactive Agents: 20 years of Research on Embodied Conversational Agents, Intelligent Virtual Agents, and Social Robotics Volume 2: Interactivity, Platforms, Application, vol. 48, pp. 45–76, Association for Computing Machinery, New York, NY, USA, 2022, ISBN: 978-1-4503-9896-1.
@incollection{traum_socially_2022,
title = {Socially Interactive Agent Dialogue},
author = {David Traum},
url = {https://doi.org/10.1145/3563659.3563663},
isbn = {978-1-4503-9896-1},
year = {2022},
date = {2022-11-01},
urldate = {2023-03-31},
booktitle = {The Handbook on Socially Interactive Agents: 20 years of Research on Embodied Conversational Agents, Intelligent Virtual Agents, and Social Robotics Volume 2: Interactivity, Platforms, Application},
volume = {48},
pages = {45–76},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
edition = {1},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Brixey, Jacqueline; Traum, David
Towards an Automatic Speech Recognizer for the Choctaw language Proceedings Article
In: 1st Workshop on Speech for Social Good (S4SG), pp. 6–9, ISCA, 2022.
@inproceedings{brixey_towards_2022,
title = {Towards an Automatic Speech Recognizer for the Choctaw language},
author = {Jacqueline Brixey and David Traum},
url = {https://www.isca-speech.org/archive/s4sg_2022/brixey22_s4sg.html},
doi = {10.21437/S4SG.2022-2},
year = {2022},
date = {2022-09-01},
urldate = {2023-03-31},
booktitle = {1st Workshop on Speech for Social Good (S4SG)},
pages = {6–9},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Georgila, Kallirroi
Comparing Regression Methods for Dialogue System Evaluation on a Richly Annotated Corpus Proceedings Article
In: Proceedings of the 26th Workshop on the Semantics and Pragmatics of Dialogue - Full Papers, 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{georgila_comparing_2022,
title = {Comparing Regression Methods for Dialogue System Evaluation on a Richly Annotated Corpus},
author = {Kallirroi Georgila},
url = {http://semdial.org/anthology/papers/Z/Z22/Z22-3011/},
year = {2022},
date = {2022-08-01},
urldate = {2023-03-31},
booktitle = {Proceedings of the 26th Workshop on the Semantics and Pragmatics of Dialogue - Full Papers},
abstract = {Wecompare various state-of-the-art regression methods for predicting user ratings of their interaction with a dialogue system using a richly annotated corpus. We vary the size of the training data and, in particular for kernel-based methods, we vary the type of kernel used. Furthermore, we experiment with various domainindependent features, including feature combinations that do not rely on complex annotations. We present detailed results in terms of root mean square error, and Pearson’s r and Spearman’s ρ correlations. Our results show that in many cases Gaussian Process Regression leads to modest but statistically significant gains compared to Support Vector Regression (a strong baseline), and that the type of kernel used matters. The gains are even larger when compared to linear regression. The larger the training data set the higher the gains but for some cases more data may result in over-fitting. Finally, some feature combinations work better than others but overall the best results are obtained when all features are used.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Speggiorin, Alessandro; Dalton, Jeffrey; Leuski, Anton
TaskMAD: A Platform for Multimodal Task-Centric Knowledge-Grounded Conversational Experimentation Proceedings Article
In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 3240–3244, ACM, Madrid Spain, 2022, ISBN: 978-1-4503-8732-3.
@inproceedings{speggiorin_taskmad_2022,
title = {TaskMAD: A Platform for Multimodal Task-Centric Knowledge-Grounded Conversational Experimentation},
author = {Alessandro Speggiorin and Jeffrey Dalton and Anton Leuski},
url = {https://dl.acm.org/doi/10.1145/3477495.3531679},
doi = {10.1145/3477495.3531679},
isbn = {978-1-4503-8732-3},
year = {2022},
date = {2022-07-01},
urldate = {2022-09-22},
booktitle = {Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {3240–3244},
publisher = {ACM},
address = {Madrid Spain},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Tur, Ada; Traum, David
Comparing Approaches to Language Understanding for Human-Robot Dialogue: An Error Taxonomy and Analysis Proceedings Article
In: Proceedings of the Thirteenth Language Resources and Evaluation Conference, pp. 5813–5820, European Language Resources Association, Marseille, France, 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{tur_comparing_2022,
title = {Comparing Approaches to Language Understanding for Human-Robot Dialogue: An Error Taxonomy and Analysis},
author = {Ada Tur and David Traum},
url = {https://aclanthology.org/2022.lrec-1.625},
year = {2022},
date = {2022-06-01},
urldate = {2023-02-10},
booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference},
pages = {5813–5820},
publisher = {European Language Resources Association},
address = {Marseille, France},
abstract = {In this paper, we compare two different approaches to language understanding for a human-robot interaction domain in which a human commander gives navigation instructions to a robot. We contrast a relevance-based classifier with a GPT-2 model, using about 2000 input-output examples as training data. With this level of training data, the relevance-based model outperforms the GPT-2 based model 79% to 8%. We also present a taxonomy of types of errors made by each model, indicating that they have somewhat different strengths and weaknesses, so we also examine the potential for a combined model.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Tadimeti, Divya; Georgila, Kallirroi; Traum, David
Evaluation of Off-the-shelf Speech Recognizers on Different Accents in a Dialogue Domain Proceedings Article
In: Proceedings of the Language Resources and Evaluation Conference, pp. 6001–6008, European Language Resources Association, Marseille, France, 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{tadimeti_evaluation_2022,
title = {Evaluation of Off-the-shelf Speech Recognizers on Different Accents in a Dialogue Domain},
author = {Divya Tadimeti and Kallirroi Georgila and David Traum},
url = {https://aclanthology.org/2022.lrec-1.645},
year = {2022},
date = {2022-06-01},
booktitle = {Proceedings of the Language Resources and Evaluation Conference},
pages = {6001–6008},
publisher = {European Language Resources Association},
address = {Marseille, France},
abstract = {We evaluate several publicly available off-the-shelf (commercial and research) automatic speech recognition (ASR) systems on dialogue agent-directed English speech from speakers with General American vs. non-American accents. Our results show that the performance of the ASR systems for non-American accents is considerably worse than for General American accents. Depending on the recognizer, the absolute difference in performance between General American accents and all non-American accents combined can vary approximately from 2% to 12%, with relative differences varying approximately between 16% and 49%. This drop in performance becomes even larger when we consider specific categories of non-American accents indicating a need for more diligent collection of and training on non-native English speaker data in order to narrow this performance gap. There are performance differences across ASR systems, and while the same general pattern holds, with more errors for non-American accents, there are some accents for which the best recognizer is different than in the overall case. We expect these results to be useful for dialogue system designers in developing more robust inclusive dialogue systems, and for ASR providers in taking into account performance requirements for different accents.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Karkada, Deepthi; Manuvinakurike, Ramesh; Paetzel-Prüsmann, Maike; Georgila, Kallirroi
Strategy-level Entrainment of Dialogue System Users in a Creative Visual Reference Resolution Task Proceedings Article
In: Proceedings of the Thirteenth Language Resources and Evaluation Conference, pp. 5768–5777, European Language Resources Association, Marseille, France, 2022.
Abstract | Links | BibTeX | Tags:
@inproceedings{karkada_strategy-level_2022,
title = {Strategy-level Entrainment of Dialogue System Users in a Creative Visual Reference Resolution Task},
author = {Deepthi Karkada and Ramesh Manuvinakurike and Maike Paetzel-Prüsmann and Kallirroi Georgila},
url = {https://aclanthology.org/2022.lrec-1.620},
year = {2022},
date = {2022-06-01},
urldate = {2023-03-31},
booktitle = {Proceedings of the Thirteenth Language Resources and Evaluation Conference},
pages = {5768–5777},
publisher = {European Language Resources Association},
address = {Marseille, France},
abstract = {In this work, we study entrainment of users playing a creative reference resolution game with an autonomous dialogue system. The language understanding module in our dialogue system leverages annotated human-wizard conversational data, openly available knowledge graphs, and crowd-augmented data. Unlike previous entrainment work, our dialogue system does not attempt to make the human conversation partner adopt lexical items in their dialogue, but rather to adapt their descriptive strategy to one that is simpler to parse for our natural language understanding unit. By deploying this dialogue system through a crowd-sourced study, we show that users indeed entrain on a “strategy-level” without the change of strategy impinging on their creativity. Our work thus presents a promising future research direction for developing dialogue management systems that can strategically influence people's descriptive strategy to ease the system's language understanding in creative tasks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Paun, Silviu; Artstein, Ron; Poesio, Massimo
Probabilistic Models of Annotation Book Section
In: Paun, Silviu; Artstein, Ron; Poesio, Massimo (Ed.): Statistical Methods for Annotation Analysis, pp. 105–145, Springer International Publishing, Cham, 2022, ISBN: 978-3-031-03763-4.
@incollection{paun_probabilistic_2022-1,
title = {Probabilistic Models of Annotation},
author = {Silviu Paun and Ron Artstein and Massimo Poesio},
editor = {Silviu Paun and Ron Artstein and Massimo Poesio},
url = {https://doi.org/10.1007/978-3-031-03763-4_5},
doi = {10.1007/978-3-031-03763-4_5},
isbn = {978-3-031-03763-4},
year = {2022},
date = {2022-01-01},
urldate = {2023-03-31},
booktitle = {Statistical Methods for Annotation Analysis},
pages = {105–145},
publisher = {Springer International Publishing},
address = {Cham},
series = {Synthesis Lectures on Human Language Technologies},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Paun, Silviu; Artstein, Ron; Poesio, Massimo
Using Agreement Measures for CL Annotation Tasks Book Section
In: Paun, Silviu; Artstein, Ron; Poesio, Massimo (Ed.): Statistical Methods for Annotation Analysis, pp. 47–78, Springer International Publishing, Cham, 2022, ISBN: 978-3-031-03763-4.
Abstract | Links | BibTeX | Tags:
@incollection{paun_using_2022,
title = {Using Agreement Measures for CL Annotation Tasks},
author = {Silviu Paun and Ron Artstein and Massimo Poesio},
editor = {Silviu Paun and Ron Artstein and Massimo Poesio},
url = {https://doi.org/10.1007/978-3-031-03763-4_3},
doi = {10.1007/978-3-031-03763-4_3},
isbn = {978-3-031-03763-4},
year = {2022},
date = {2022-01-01},
urldate = {2023-03-31},
booktitle = {Statistical Methods for Annotation Analysis},
pages = {47–78},
publisher = {Springer International Publishing},
address = {Cham},
series = {Synthesis Lectures on Human Language Technologies},
abstract = {We will now review the use of intercoder agreement measures in CL since Carletta’s original paper in the light of the discussion in the previous sections. We begin with a summary of Krippendorff’s recommendations about measuring reliability (Krippendorff, 2004a, Chapter 11), then discuss how coefficients of agreement have been used in CL to measure the reliability of annotation, focusing in particular on the types of annotation where there has been some debate concerning the most appropriate measures of agreement.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Paun, Silviu; Artstein, Ron; Poesio, Massimo
Probabilistic Models of Agreement Book Section
In: Paun, Silviu; Artstein, Ron; Poesio, Massimo (Ed.): Statistical Methods for Annotation Analysis, pp. 79–101, Springer International Publishing, Cham, 2022, ISBN: 978-3-031-03763-4.
@incollection{paun_probabilistic_2022,
title = {Probabilistic Models of Agreement},
author = {Silviu Paun and Ron Artstein and Massimo Poesio},
editor = {Silviu Paun and Ron Artstein and Massimo Poesio},
url = {https://doi.org/10.1007/978-3-031-03763-4_4},
doi = {10.1007/978-3-031-03763-4_4},
isbn = {978-3-031-03763-4},
year = {2022},
date = {2022-01-01},
urldate = {2023-03-31},
booktitle = {Statistical Methods for Annotation Analysis},
pages = {79–101},
publisher = {Springer International Publishing},
address = {Cham},
series = {Synthesis Lectures on Human Language Technologies},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Paun, Silviu; Artstein, Ron; Poesio, Massimo
Learning from Multi-Annotated Corpora Book Section
In: Paun, Silviu; Artstein, Ron; Poesio, Massimo (Ed.): Statistical Methods for Annotation Analysis, pp. 147–165, Springer International Publishing, Cham, 2022, ISBN: 978-3-031-03763-4.
@incollection{paun_learning_2022,
title = {Learning from Multi-Annotated Corpora},
author = {Silviu Paun and Ron Artstein and Massimo Poesio},
editor = {Silviu Paun and Ron Artstein and Massimo Poesio},
url = {https://doi.org/10.1007/978-3-031-03763-4_6},
doi = {10.1007/978-3-031-03763-4_6},
isbn = {978-3-031-03763-4},
year = {2022},
date = {2022-01-01},
urldate = {2023-03-31},
booktitle = {Statistical Methods for Annotation Analysis},
pages = {147–165},
publisher = {Springer International Publishing},
address = {Cham},
series = {Synthesis Lectures on Human Language Technologies},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Hoegen, Jessie; DeVault, David; Gratch, Jonathan
Exploring the Function of Expressions in Negotiation: the DyNego-WOZ Corpus Journal Article
In: IEEE Transactions on Affective Computing, pp. 1–12, 2022, ISSN: 1949-3045, (Conference Name: IEEE Transactions on Affective Computing).
Abstract | Links | BibTeX | Tags:
@article{hoegen_exploring_2022,
title = {Exploring the Function of Expressions in Negotiation: the DyNego-WOZ Corpus},
author = {Jessie Hoegen and David DeVault and Jonathan Gratch},
doi = {10.1109/TAFFC.2022.3223030},
issn = {1949-3045},
year = {2022},
date = {2022-01-01},
journal = {IEEE Transactions on Affective Computing},
pages = {1–12},
abstract = {For affective computing to have an impact outside the laboratory, facial expressions must be studied in rich naturalistic situations. We argue negotiations are one such situation as they are ubiquitous in daily life, often evoke strong emotions, and perceived emotion shapes decisions and outcomes. Negotiations are a growing focus in AI research and applications, including agents that negotiate directly with people and attempt to use affective information. We introduce the DyNego-WOZ Corpus, which includes dyadic negotiation between participants and wizard-controlled virtual humans. We demonstrate the value of this corpus to the affective computing community by examining participants' facial expressions in response to a virtual human negotiation partner. We show that people's facial expressions typically co-occur with the end of their partner's speech (suggesting they reflect a reaction to the content of this speech), that these reactions do not correspond to prototypical emotional expressions, and that these reactions can help predict the expresser's subsequent action. We highlight challenges in working with such naturalistic data, including difficulties of expression recognition during speech, and the extreme variability of expressions, both across participants and within a negotiation. Our findings reinforce arguments that facial expressions convey more than emotional state but serve important communicative functions.},
note = {Conference Name: IEEE Transactions on Affective Computing},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Paun, Silviu; Artstein, Ron; Poesio, Massimo
Statistical Methods for Annotation Analysis Book
Springer International Publishing, Cham, 2022, ISBN: 978-3-031-03753-5 978-3-031-03763-4.
@book{paun_statistical_2022,
title = {Statistical Methods for Annotation Analysis},
author = {Silviu Paun and Ron Artstein and Massimo Poesio},
url = {https://link.springer.com/10.1007/978-3-031-03763-4},
doi = {10.1007/978-3-031-03763-4},
isbn = {978-3-031-03753-5 978-3-031-03763-4},
year = {2022},
date = {2022-01-01},
urldate = {2022-09-28},
publisher = {Springer International Publishing},
address = {Cham},
keywords = {},
pubstate = {published},
tppubtype = {book}
}
Marge, Matthew; Espy-Wilson, Carol; Ward, Nigel G.; Alwan, Abeer; Artzi, Yoav; Bansal, Mohit; Blankenship, Gil; Chai, Joyce; Daumé, Hal; Dey, Debadeepta; Harper, Mary; Howard, Thomas; Kennington, Casey; Kruijff-Korbayová, Ivana; Manocha, Dinesh; Matuszek, Cynthia; Mead, Ross; Mooney, Raymond; Moore, Roger K.; Ostendorf, Mari; Pon-Barry, Heather; Rudnicky, Alexander I.; Scheutz, Matthias; Amant, Robert St.; Sun, Tong; Tellex, Stefanie; Traum, David; Yu, Zhou
Spoken language interaction with robots: Recommendations for future research Journal Article
In: Computer Speech & Language, vol. 71, pp. 101255, 2022, ISSN: 08852308.
@article{marge_spoken_2022,
title = {Spoken language interaction with robots: Recommendations for future research},
author = {Matthew Marge and Carol Espy-Wilson and Nigel G. Ward and Abeer Alwan and Yoav Artzi and Mohit Bansal and Gil Blankenship and Joyce Chai and Hal Daumé and Debadeepta Dey and Mary Harper and Thomas Howard and Casey Kennington and Ivana Kruijff-Korbayová and Dinesh Manocha and Cynthia Matuszek and Ross Mead and Raymond Mooney and Roger K. Moore and Mari Ostendorf and Heather Pon-Barry and Alexander I. Rudnicky and Matthias Scheutz and Robert St. Amant and Tong Sun and Stefanie Tellex and David Traum and Zhou Yu},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0885230821000620},
doi = {10.1016/j.csl.2021.101255},
issn = {08852308},
year = {2022},
date = {2022-01-01},
urldate = {2022-09-23},
journal = {Computer Speech & Language},
volume = {71},
pages = {101255},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
Hernandez, Stephanie; Artstein, Ron
Annotating low-confidence questions improves classifier performance Journal Article
In: Proceedings of the 25th Workshop on the Semantics and Pragmatics of Dialogue - Poster Abstracts, 2021.
Abstract | Links | BibTeX | Tags:
@article{hernandez_annotating_2021,
title = {Annotating low-confidence questions improves classifier performance},
author = {Stephanie Hernandez and Ron Artstein},
url = {https://par.nsf.gov/biblio/10313591-annotating-low-confidence-questions-improves-classifier-performance},
year = {2021},
date = {2021-09-01},
urldate = {2023-03-31},
journal = {Proceedings of the 25th Workshop on the Semantics and Pragmatics of Dialogue - Poster Abstracts},
abstract = {This paper compares methods to select data for annotation in order to improve a classifier used in a question-answering dialogue system. With a classifier trained on 1,500 questions, adding 300 training questions on which the classifier is least confident results in consistently improved performance, whereas adding 300 arbitrarily selected training questions does not yield consistent improvement, and sometimes even degrades performance. The paper uses a new method for comparative evaluation of classifiers for dialogue, which scores each classifier based on the number of appropriate responses retrieved.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Johnson, Emmanuel; Gratch, Jonathan; Boberg, Jill; DeVault, David; Kim, Peter; Lucas, Gale
Using Intelligent Agents to Examine Gender in Negotiations Proceedings Article
In: Proceedings of the 21th ACM International Conference on Intelligent Virtual Agents, pp. 90–97, ACM, Virtual Event Japan, 2021, ISBN: 978-1-4503-8619-7.
@inproceedings{johnson_using_2021,
title = {Using Intelligent Agents to Examine Gender in Negotiations},
author = {Emmanuel Johnson and Jonathan Gratch and Jill Boberg and David DeVault and Peter Kim and Gale Lucas},
url = {https://dl.acm.org/doi/10.1145/3472306.3478348},
doi = {10.1145/3472306.3478348},
isbn = {978-1-4503-8619-7},
year = {2021},
date = {2021-09-01},
urldate = {2022-09-28},
booktitle = {Proceedings of the 21th ACM International Conference on Intelligent Virtual Agents},
pages = {90–97},
publisher = {ACM},
address = {Virtual Event Japan},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Gratch, Jonathan; Lucas, Gale
Rapport Between Humans and Socially Interactive Agents Book Section
In: Lugrin, Birgit; Pelachaud, Catherine; Traum, David (Ed.): The Handbook on Socially Interactive Agents, pp. 433–462, ACM, New York, NY, USA, 2021, ISBN: 978-1-4503-8720-0.
@incollection{gratch_rapport_2021,
title = {Rapport Between Humans and Socially Interactive Agents},
author = {Jonathan Gratch and Gale Lucas},
editor = {Birgit Lugrin and Catherine Pelachaud and David Traum},
url = {https://dl.acm.org/doi/10.1145/3477322.3477335},
doi = {10.1145/3477322.3477335},
isbn = {978-1-4503-8720-0},
year = {2021},
date = {2021-09-01},
urldate = {2022-09-28},
booktitle = {The Handbook on Socially Interactive Agents},
pages = {433–462},
publisher = {ACM},
address = {New York, NY, USA},
edition = {1},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Bonial, Claire; Abrams, Mitchell; Baker, Anthony L.; Hudson, Taylor; Lukin, Stephanie; Traum, David; Voss, Clare
Context is key: Annotating situated dialogue relations in multi-floor dialogue Proceedings Article
In: 2021.
@inproceedings{bonial_context_2021,
title = {Context is key: Annotating situated dialogue relations in multi-floor dialogue},
author = {Claire Bonial and Mitchell Abrams and Anthony L. Baker and Taylor Hudson and Stephanie Lukin and David Traum and Clare Voss},
url = {http://semdial.org/anthology/papers/Z/Z21/Z21-3006/},
year = {2021},
date = {2021-09-01},
urldate = {2022-09-23},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Chaffey, Patricia; Traum, David
Identity models for role-play dialogue characters Proceedings Article
In: 2021.
@inproceedings{chaffey_identity_2021,
title = {Identity models for role-play dialogue characters},
author = {Patricia Chaffey and David Traum},
url = {http://semdial.org/anthology/papers/Z/Z21/Z21-4022/},
year = {2021},
date = {2021-09-01},
urldate = {2022-09-23},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Lugrin, Birgit; Pelachaud, Catherine; Traum, David (Ed.)
1, ACM, New York, NY, USA, 2021, ISBN: 978-1-4503-8720-0.
@book{lugrin_handbook_2021,
title = {The Handbook on Socially Interactive Agents: 20 years of Research on Embodied Conversational Agents, Intelligent Virtual Agents, and Social Robotics Volume 1: Methods, Behavior, Cognition},
editor = {Birgit Lugrin and Catherine Pelachaud and David Traum},
url = {https://dl.acm.org/doi/book/10.1145/3477322},
doi = {10.1145/3477322},
isbn = {978-1-4503-8720-0},
year = {2021},
date = {2021-09-01},
urldate = {2022-09-23},
publisher = {ACM},
address = {New York, NY, USA},
edition = {1},
keywords = {},
pubstate = {published},
tppubtype = {book}
}
Bonial, Claire; Abrams, Mitchell; Traum, David; Voss, Clare
Builder, we have done it: Evaluating & Extending Dialogue-AMR NLU Pipeline for Two Collaborative Domains Proceedings Article
In: Proceedings of the 14th International Conference on Computational Semantics (IWCS), pp. 173–183, Association for Computational Linguistics, Groningen, The Netherlands (online), 2021.
Abstract | Links | BibTeX | Tags:
@inproceedings{bonial_builder_2021,
title = {Builder, we have done it: Evaluating & Extending Dialogue-AMR NLU Pipeline for Two Collaborative Domains},
author = {Claire Bonial and Mitchell Abrams and David Traum and Clare Voss},
url = {https://aclanthology.org/2021.iwcs-1.17},
year = {2021},
date = {2021-06-01},
urldate = {2022-09-23},
booktitle = {Proceedings of the 14th International Conference on Computational Semantics (IWCS)},
pages = {173–183},
publisher = {Association for Computational Linguistics},
address = {Groningen, The Netherlands (online)},
abstract = {We adopt, evaluate, and improve upon a two-step natural language understanding (NLU) pipeline that incrementally tames the variation of unconstrained natural language input and maps to executable robot behaviors. The pipeline first leverages Abstract Meaning Representation (AMR) parsing to capture the propositional content of the utterance, and second converts this into “Dialogue-AMR,” which augments standard AMR with information on tense, aspect, and speech acts. Several alternative approaches and training datasets are evaluated for both steps and corresponding components of the pipeline, some of which outperform the original. We extend the Dialogue-AMR annotation schema to cover a different collaborative instruction domain and evaluate on both domains. With very little training data, we achieve promising performance in the new domain, demonstrating the scalability of this approach.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Gervits, Felix; Leuski, Anton; Bonial, Claire; Gordon, Carla; Traum, David
A Classification-Based Approach to Automating Human-Robot Dialogue Journal Article
In: pp. 13, 2021.
Abstract | Links | BibTeX | Tags:
@article{gervits_classication-based_2021,
title = {A Classification-Based Approach to Automating Human-Robot Dialogue},
author = {Felix Gervits and Anton Leuski and Claire Bonial and Carla Gordon and David Traum},
url = {https://link.springer.com/chapter/10.1007/978-981-15-9323-9_10},
doi = {https://doi.org/10.1007/978-981-15-9323-9_10},
year = {2021},
date = {2021-03-01},
pages = {13},
abstract = {We present a dialogue system based on statistical classification which was used to automate human-robot dialogue in a collaborative navigation domain. The classifier was trained on a small corpus of multi-floor Wizard-of-Oz dialogue including two wizards: one standing in for dialogue capabilities and another for navigation. Below, we describe the implementation details of the classifier and show how it was used to automate the dialogue wizard. We evaluate our system on several sets of source data from the corpus and find that response accuracy is generally high, even with very limited training data. Another contribution of this work is the novel demonstration of a dialogue manager that uses the classifier to engage in multifloor dialogue with two different human roles. Overall, this approach is useful for enabling spoken dialogue systems to produce robust and accurate responses to natural language input, and for robots that need to interact with humans in a team setting.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kawano, Seiya; Yoshino, Koichiro; Traum, David; Nakamura, Satoshi
Dialogue Structure Parsing on Multi-Floor Dialogue Based on Multi-Task Learning Proceedings Article
In: 1st RobotDial Workshop on Dialogue Models for Human-Robot Interaction, pp. 21–29, ISCA, 2021.
Abstract | Links | BibTeX | Tags:
@inproceedings{kawano_dialogue_2021,
title = {Dialogue Structure Parsing on Multi-Floor Dialogue Based on Multi-Task Learning},
author = {Seiya Kawano and Koichiro Yoshino and David Traum and Satoshi Nakamura},
url = {http://www.isca-speech.org/archive/RobotDial_2021/abstracts/4.html},
doi = {10.21437/RobotDial.2021-4},
year = {2021},
date = {2021-01-01},
urldate = {2021-04-15},
booktitle = {1st RobotDial Workshop on Dialogue Models for Human-Robot Interaction},
pages = {21–29},
publisher = {ISCA},
abstract = {A multi-floor dialogue consists of multiple sets of dialogue participants, each conversing within their own floor, but also at least one multicommunicating member who is a participant of multiple floors and coordinating each to achieve a shared dialogue goal. The structure of such dialogues can be complex, involving intentional structure and relations that are within or across floors. In this study, we propose a neural dialogue structure parser based on multi-task learning and an attention mechanism on multi-floor dialogues in a collaborative robot navigation domain. Our experimental results show that our proposed model improved the dialogue structure parsing performance more than those of single models, which are trained on each dialogue structure parsing task in multi-floor dialogues.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Gervits, Felix; Leuski, Anton; Bonial, Claire; Gordon, Carla; Traum, David
A Classification-Based Approach to Automating Human-Robot Dialogue Book Section
In: Marchi, Erik; Siniscalchi, Sabato Marco; Cumani, Sandro; Salerno, Valerio Mario; Li, Haizhou (Ed.): Increasing Naturalness and Flexibility in Spoken Dialogue Interaction: 10th International Workshop on Spoken Dialogue Systems, pp. 115–127, Springer, Singapore, 2021, ISBN: 978-981-15-9323-9.
Abstract | Links | BibTeX | Tags:
@incollection{gervits_classification-based_2021,
title = {A Classification-Based Approach to Automating Human-Robot Dialogue},
author = {Felix Gervits and Anton Leuski and Claire Bonial and Carla Gordon and David Traum},
editor = {Erik Marchi and Sabato Marco Siniscalchi and Sandro Cumani and Valerio Mario Salerno and Haizhou Li},
url = {https://doi.org/10.1007/978-981-15-9323-9_10},
doi = {10.1007/978-981-15-9323-9_10},
isbn = {978-981-15-9323-9},
year = {2021},
date = {2021-01-01},
urldate = {2022-09-23},
booktitle = {Increasing Naturalness and Flexibility in Spoken Dialogue Interaction: 10th International Workshop on Spoken Dialogue Systems},
pages = {115–127},
publisher = {Springer},
address = {Singapore},
series = {Lecture Notes in Electrical Engineering},
abstract = {We present a dialogue system based on statistical classification which was used to automate human-robot dialogue in a collaborative navigation domain. The classifier was trained on a small corpus of multi-floor Wizard-of-Oz dialogue including two wizards: one standing in for dialogue capabilities and another for navigation. Below, we describe the implementation details of the classifier and show how it was used to automate the dialogue wizard. We evaluate our system on several sets of source data from the corpus and find that response accuracy is generally high, even with very limited training data. Another contribution of this work is the novel demonstration of a dialogue manager that uses the classifier to engage in multi-floor dialogue with two different human roles. Overall, this approach is useful for enabling spoken dialogue systems to produce robust and accurate responses to natural language input, and for robots that need to interact with humans in a team setting.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
2020
Brixey, Jacqueline; Traum, David
Masheli: A Choctaw-English bilingual chatbot Book Section
In: Conversational Dialogue Systems for the Next Decade, pp. 41–50, Springer, Switzerland, 2020.
Abstract | Links | BibTeX | Tags:
@incollection{brixey_masheli_2020,
title = {Masheli: A Choctaw-English bilingual chatbot},
author = {Jacqueline Brixey and David Traum},
url = {https://link.springer.com/chapter/10.1007/978-981-15-8395-7_4},
year = {2020},
date = {2020-10-01},
booktitle = {Conversational Dialogue Systems for the Next Decade},
pages = {41–50},
publisher = {Springer},
address = {Switzerland},
abstract = {We present the implementation of an autonomous Choctaw-English bilingual chatbot. Choctaw is an American indigenous language. The intended use of the chatbot is for Choctaw language learners to pratice conversational skills. The system’s backend is NPCEditor, a response selection program that is trained on linked questions and answers. The chatbot’s answers are stories and conversational utterances in both languages. We experiment with the ability of NPCEditor to appropriately respond to language mixed utterances, and describe a pilot study with Choctaw-English speakers.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Gordon, Carla; Georgila, Kallirroi; Yanov, Volodymyr; Traum, David
Towards Personalization of Spoken Dialogue System Communication Strategies Book Section
In: Conversational Dialogue Systems for the Next Decade, vol. 704, pp. 145–160, Springer Singapore, Singapore, 2020, ISBN: 978-981-15-8394-0 978-981-15-8395-7.
Abstract | Links | BibTeX | Tags:
@incollection{gordon_towards_2020,
title = {Towards Personalization of Spoken Dialogue System Communication Strategies},
author = {Carla Gordon and Kallirroi Georgila and Volodymyr Yanov and David Traum},
url = {http://link.springer.com/10.1007/978-981-15-8395-7_11},
isbn = {978-981-15-8394-0 978-981-15-8395-7},
year = {2020},
date = {2020-09-01},
booktitle = {Conversational Dialogue Systems for the Next Decade},
volume = {704},
pages = {145–160},
publisher = {Springer Singapore},
address = {Singapore},
abstract = {This study examines the effects of 3 conversational traits – Register, Explicitness, and Misunderstandings – on user satisfaction and the perception of specific subjective features for Virtual Home Assistant spoken dialogue systems. Eight different system profiles were created, each representing a different combination of these 3 traits. We then utilized a novel Wizard of Oz data collection tool and recruited participants who interacted with the 8 different system profiles, and then rated the systems on 7 subjective features. Surprisingly, we found that systems which made errors were preferred overall, with the statistical analysis revealing error-prone systems were rated higher than systems which made no errors for all 7 of the subjective features rated. There were also some interesting interaction effects between the 3 conversational traits, such as implicit confirmations being preferred for systems employing a “conversational” Register, while explicit confirmations were preferred for systems employing a “formal” Register, even though there was no overall main effect for Explicitness. This experimental framework offers a fine-grained approach to the evaluation of user satisfaction which looks towards the personalization of communication strategies for spoken dialogue systems.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Czyzewski, Adam; Dalton, Jeffrey; Leuski, Anton
Agent Dialogue: A Platform for Conversational Information Seeking Experimentation Proceedings Article
In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2121–2124, ACM, Virtual Event China, 2020, ISBN: 978-1-4503-8016-4.
Abstract | Links | BibTeX | Tags:
@inproceedings{czyzewski_agent_2020,
title = {Agent Dialogue: A Platform for Conversational Information Seeking Experimentation},
author = {Adam Czyzewski and Jeffrey Dalton and Anton Leuski},
url = {https://dl.acm.org/doi/10.1145/3397271.3401397},
doi = {10.1145/3397271.3401397},
isbn = {978-1-4503-8016-4},
year = {2020},
date = {2020-07-01},
booktitle = {Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {2121–2124},
publisher = {ACM},
address = {Virtual Event China},
abstract = {Conversational Information Seeking (CIS) is an emerging area of Information Retrieval focused on interactive search systems. As a result there is a need for new benchmark datasets and tools to enable their creation. In this demo we present the Agent Dialogue (AD) platform, an open-source system developed for researchers to perform Wizard-of-Oz CIS experiments. AD is a scalable cloud-native platform developed with Docker and Kubernetes with a flexible and modular micro-service architecture built on production-grade stateof-the-art open-source tools (Kubernetes, gRPC streaming, React, and Firebase). It supports varied front-ends and has the ability to interface with multiple existing agent systems, including Google Assistant and open-source search libraries. It includes support for centralized structure logging as well as offline relevance annotation.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Brixey, Jacqueline; Artstein, Ron
ChoCo: a multimodal corpus of the Choctaw language Journal Article
In: Language Resources and Evaluation, 2020, ISSN: 1574-020X, 1574-0218.
Abstract | Links | BibTeX | Tags:
@article{brixey_choco_2020,
title = {ChoCo: a multimodal corpus of the Choctaw language},
author = {Jacqueline Brixey and Ron Artstein},
url = {http://link.springer.com/10.1007/s10579-020-09494-5},
doi = {10.1007/s10579-020-09494-5},
issn = {1574-020X, 1574-0218},
year = {2020},
date = {2020-07-01},
journal = {Language Resources and Evaluation},
abstract = {This article presents a general use corpus for Choctaw, an American indigenous language (ISO 639-2: cho, endonym: Chahta). The corpus contains audio, video, and text resources, with many texts also translated in English. The Oklahoma Choctaw and the Mississippi Choctaw variants of the language are represented in the corpus. The data set provides documentation support for this threatened language, and allows researchers and language teachers access to a diverse collection of resources.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Georgila, Kallirroi; Gordon, Carla; Yanov, Volodymyr; Traum, David
Predicting Ratings of Real Dialogue Participants from Artificial Data and Ratings of Human Dialogue Observers Proceedings Article
In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 726–734, European Language Resources Association, Marseille, France, 2020, ISBN: 979-10-95546-34-4.
Abstract | Links | BibTeX | Tags:
@inproceedings{georgila_predicting_2020,
title = {Predicting Ratings of Real Dialogue Participants from Artificial Data and Ratings of Human Dialogue Observers},
author = {Kallirroi Georgila and Carla Gordon and Volodymyr Yanov and David Traum},
url = {https://aclanthology.org/2020.lrec-1.91},
isbn = {979-10-95546-34-4},
year = {2020},
date = {2020-05-01},
urldate = {2023-03-31},
booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference},
pages = {726–734},
publisher = {European Language Resources Association},
address = {Marseille, France},
abstract = {We collected a corpus of dialogues in a Wizard of Oz (WOz) setting in the Internet of Things (IoT) domain. We asked users participating in these dialogues to rate the system on a number of aspects, namely, intelligence, naturalness, personality, friendliness, their enjoyment, overall quality, and whether they would recommend the system to others. Then we asked dialogue observers, i.e., Amazon Mechanical Turkers (MTurkers), to rate these dialogues on the same aspects. We also generated simulated dialogues between dialogue policies and simulated users and asked MTurkers to rate them again on the same aspects. Using linear regression, we developed dialogue evaluation functions based on features from the simulated dialogues and the MTurkers' ratings, the WOz dialogues and the MTurkers' ratings, and the WOz dialogues and the WOz participants' ratings. We applied all these dialogue evaluation functions to a held-out portion of our WOz dialogues, and we report results on the predictive power of these different types of dialogue evaluation functions. Our results suggest that for three conversational aspects (intelligence, naturalness, overall quality) just training evaluation functions on simulated data could be sufficient.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Bonial, Claire; Donatelli, Lucia; Abrams, Mitchell; Lukin, Stephanie M; Tratz, Stephen; Marge, Matthew; Artstein, Ron; Traum, David; Voss, Clare R
Dialogue-AMR: Abstract Meaning Representation for Dialogue Proceedings Article
In: Proceedings of the 12th Language Resources and Evaluation Conference, pp. 12, European Language Resources Association, Marseille, France, 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{bonial_dialogue-amr_2020,
title = {Dialogue-AMR: Abstract Meaning Representation for Dialogue},
author = {Claire Bonial and Lucia Donatelli and Mitchell Abrams and Stephanie M Lukin and Stephen Tratz and Matthew Marge and Ron Artstein and David Traum and Clare R Voss},
url = {https://www.aclweb.org/anthology/2020.lrec-1.86/},
year = {2020},
date = {2020-05-01},
booktitle = {Proceedings of the 12th Language Resources and Evaluation Conference},
pages = {12},
publisher = {European Language Resources Association},
address = {Marseille, France},
abstract = {This paper describes a schema that enriches Abstract Meaning Representation (AMR) in order to provide a semantic representation for facilitating Natural Language Understanding (NLU) in dialogue systems. AMR offers a valuable level of abstraction of the propositional content of an utterance; however, it does not capture the illocutionary force or speaker’s intended contribution in the broader dialogue context (e.g., make a request or ask a question), nor does it capture tense or aspect. We explore dialogue in the domain of human-robot interaction, where a conversational robot is engaged in search and navigation tasks with a human partner. To address the limitations of standard AMR, we develop an inventory of speech acts suitable for our domain, and present “Dialogue-AMR”, an enhanced AMR that represents not only the content of an utterance, but the illocutionary force behind it, as well as tense and aspect. To showcase the coverage of the schema, we use both manual and automatic methods to construct the “DialAMR” corpus—a corpus of human-robot dialogue annotated with standard AMR and our enriched Dialogue-AMR schema. Our automated methods can be used to incorporate AMR into a larger NLU pipeline supporting human-robot dialogue.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Alavi, Seyed Hossein; Leuski, Anton; Traum, David
Which Model Should We Use for a Real-World Conversational Dialogue System? a Cross-Language Relevance Model or a Deep Neural Net? Proceedings Article
In: Proceedings of the 12th Language Resources and Evaluation Conference, pp. 735–742, European Language Resources Association, Marseille, France, 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{alavi_which_2020,
title = {Which Model Should We Use for a Real-World Conversational Dialogue System? a Cross-Language Relevance Model or a Deep Neural Net?},
author = {Seyed Hossein Alavi and Anton Leuski and David Traum},
url = {https://www.aclweb.org/anthology/2020.lrec-1.92/},
year = {2020},
date = {2020-05-01},
booktitle = {Proceedings of the 12th Language Resources and Evaluation Conference},
pages = {735–742},
publisher = {European Language Resources Association},
address = {Marseille, France},
abstract = {We compare two models for corpus-based selection of dialogue responses: one based on cross-language relevance with a cross-language LSTM model. Each model is tested on multiple corpora, collected from two different types of dialogue source material. Results show that while the LSTM model performs adequately on a very large corpus (millions of utterances), its performance is dominated by the cross-language relevance model for a more moderate-sized corpus (ten thousands of utterances).},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Chaffey, Patricia; Artstein, Ron; Georgila, Kallirroi; Pollard, Kimberly A.; Gilani, Setareh Nasihati; Krum, David M.; Nelson, David; Huynh, Kevin; Gainer, Alesia; Alavi, Seyed Hossein; Yahata, Rhys; Leuski, Anton; Yanov, Volodymyr; Traum, David
Human swarm interaction using plays, audibles, and a virtual spokesperson Proceedings Article
In: Proceedings of Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications II, pp. 40, SPIE, Online Only, United States, 2020, ISBN: 978-1-5106-3603-3 978-1-5106-3604-0.
Abstract | Links | BibTeX | Tags:
@inproceedings{chaffey_human_2020,
title = {Human swarm interaction using plays, audibles, and a virtual spokesperson},
author = {Patricia Chaffey and Ron Artstein and Kallirroi Georgila and Kimberly A. Pollard and Setareh Nasihati Gilani and David M. Krum and David Nelson and Kevin Huynh and Alesia Gainer and Seyed Hossein Alavi and Rhys Yahata and Anton Leuski and Volodymyr Yanov and David Traum},
url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/11413/2557573/Human-swarm-interaction-using-plays-audibles-and-a-virtual-spokesperson/10.1117/12.2557573.full},
doi = {10.1117/12.2557573},
isbn = {978-1-5106-3603-3 978-1-5106-3604-0},
year = {2020},
date = {2020-04-01},
booktitle = {Proceedings of Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications II},
pages = {40},
publisher = {SPIE},
address = {Online Only, United States},
abstract = {This study explores two hypotheses about human-agent teaming: 1. Real-time coordination among a large set of autonomous robots can be achieved using predefined “plays” which define how to execute a task, and “audibles” which modify the play on the fly; 2. A spokesperson agent can serve as a representative for a group of robots, relaying information between the robots and human teammates. These hypotheses are tested in a simulated game environment: a human participant leads a search-and-rescue operation to evacuate a town threatened by an approaching wildfire, with the object of saving as many lives as possible. The participant communicates verbally with a virtual agent controlling a team of ten aerial robots and one ground vehicle, while observing a live map display with real-time location of the fire and identified survivors. Since full automation is not currently possible, two human controllers control the agent’s speech and actions, and input parameters to the robots, which then operate autonomously until the parameters are changed. Designated plays include monitoring the spread of fire, searching for survivors, broadcasting warnings, guiding residents to safety, and sending the rescue vehicle. A successful evacuation of all the residents requires personal intervention in some cases (e.g., stubborn residents) while delegating other responsibilities to the spokesperson agent and robots, all in a rapidly changing scene. The study records the participants’ verbal and nonverbal behavior in order to identify strategies people use when communicating with robotic swarms, and to collect data for eventual automation.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Shmueli-Scheuer, Michal; Artstein, Ron; Khazaeni, Yasaman; Fang, Hao; Liao, Q. Vera
user2agent: 2nd Workshop on User-Aware Conversational Agents Proceedings Article
In: Proceedings of the 25th International Conference on Intelligent User Interfaces Companion, pp. 9–10, Association for Computing Machinery, New York, NY, USA, 2020, ISBN: 978-1-4503-7513-9.
Abstract | Links | BibTeX | Tags:
@inproceedings{shmueli-scheuer_user2agent_2020,
title = {user2agent: 2nd Workshop on User-Aware Conversational Agents},
author = {Michal Shmueli-Scheuer and Ron Artstein and Yasaman Khazaeni and Hao Fang and Q. Vera Liao},
url = {https://doi.org/10.1145/3379336.3379356},
doi = {10.1145/3379336.3379356},
isbn = {978-1-4503-7513-9},
year = {2020},
date = {2020-03-01},
urldate = {2023-03-31},
booktitle = {Proceedings of the 25th International Conference on Intelligent User Interfaces Companion},
pages = {9–10},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
series = {IUI '20},
abstract = {Conversational agents are becoming increasingly popular. These systems present an extremely rich and challenging research space for addressing many aspects of user awareness and adaptation, such as user profiles, contexts, personalities, emotions, social dynamics, conversational styles, etc. Adaptive interfaces are of long-standing interest for the HCI community. Meanwhile, new machine learning approaches are introduced in the current generation of conversational agents, such as deep learning, reinforcement learning, and active learning. It is imperative to consider how various aspects of user-awareness should be handled by these new techniques. The goal of this workshop is to bring together researchers in HCI, user modeling, and the AI and NLP communities from both industry and academia, who are interested in advancing the state-of-the-art on the topic of user-aware conversational agents. Through a focused and open exchange of ideas and discussions, we will work to identify central research topics in user-aware conversational agents and develop a strong interdisciplinary foundation to address them.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Georgila, Kallirroi; Leuski, Anton; Yanov, Volodymyr; Traum, David
Evaluation of Off-the-shelf Speech Recognizers Across Diverse Dialogue Domains Proceedings Article
In: Proceedings of the Twelfth Language Resources and Evaluation Conference, pp. 6469–6476, European Language Resources Association, Sapporo, Japan, 2020.
Abstract | Links | BibTeX | Tags:
@inproceedings{georgila_evaluation_2020,
title = {Evaluation of Off-the-shelf Speech Recognizers Across Diverse Dialogue Domains},
author = {Kallirroi Georgila and Anton Leuski and Volodymyr Yanov and David Traum},
url = {https://aclanthology.org/2020.lrec-1.797.pdf},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the Twelfth Language Resources and Evaluation Conference},
pages = {6469–6476},
publisher = {European Language Resources Association},
address = {Sapporo, Japan},
abstract = {We evaluate several publicly available off-the-shelf (commercial and research) automatic speech recognition (ASR) systems across diverse dialogue domains (in US-English). Our evaluation is aimed at non-experts with limited experience in speech recognition. Our goal is not only to compare a variety of ASR systems on several diverse data sets but also to measure how much ASR technology has advanced since our previous large-scale evaluations on the same data sets. Our results show that the performance of each speech recognizer can vary significantly depending on the domain. Furthermore, despite major recent progress in ASR technology, current state-of-the-art speech recognizers perform poorly in domains that require special vocabulary and language models, and under noisy conditions. We expect that our evaluation will prove useful to ASR consumers and dialogue system designers.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Uryupina, Olga; Artstein, Ron; Bristot, Antonella; Cavicchio, Federica; Delogu, Francesca; Rodriguez, Kepa J.; Poesio, Massimo
Annotating a broad range of anaphoric phenomena, in a variety of genres: the ARRAU Corpus Journal Article
In: Natural Language Engineering, vol. 26, no. 1, pp. 95–128, 2020, ISSN: 1351-3249, 1469-8110, (Publisher: Cambridge University Press).
Abstract | Links | BibTeX | Tags:
@article{uryupina_annotating_2020,
title = {Annotating a broad range of anaphoric phenomena, in a variety of genres: the ARRAU Corpus},
author = {Olga Uryupina and Ron Artstein and Antonella Bristot and Federica Cavicchio and Francesca Delogu and Kepa J. Rodriguez and Massimo Poesio},
url = {https://www.cambridge.org/core/journals/natural-language-engineering/article/abs/annotating-a-broad-range-of-anaphoric-phenomena-in-a-variety-of-genres-the-arrau-corpus/17E7FA2CB2E36C213E2649479593B6B0},
doi = {10.1017/S1351324919000056},
issn = {1351-3249, 1469-8110},
year = {2020},
date = {2020-01-01},
urldate = {2023-03-31},
journal = {Natural Language Engineering},
volume = {26},
number = {1},
pages = {95–128},
abstract = {This paper presents the second release of arrau, a multigenre corpus of anaphoric information created over 10 years to provide data for the next generation of coreference/anaphora resolution systems combining different types of linguistic and world knowledge with advanced discourse modeling supporting rich linguistic annotations. The distinguishing features of arrau include the following: treating all NPs as markables, including non-referring NPs, and annotating their (non-) referentiality status; distinguishing between several categories of non-referentiality and annotating non-anaphoric mentions; thorough annotation of markable boundaries (minimal/maximal spans, discontinuous markables); annotating a variety of mention attributes, ranging from morphosyntactic parameters to semantic category; annotating the genericity status of mentions; annotating a wide range of anaphoric relations, including bridging relations and discourse deixis; and, finally, annotating anaphoric ambiguity. The current version of the dataset contains 350K tokens and is publicly available from LDC. In this paper, we discuss in detail all the distinguishing features of the corpus, so far only partially presented in a number of conference and workshop papers, and we also discuss the development between the first release of arrau in 2008 and this second one.},
note = {Publisher: Cambridge University Press},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Shinagawa, Seitaro; Yoshino, Koichiro; Alavi, Seyed Hossein; Georgila, Kallirroi; Traum, David; Sakti, Sakriani; Nakamura, Satoshi
An Interactive Image Editing System Using an Uncertainty-Based Confirmation Strategy Journal Article
In: IEEE Access, vol. 8, pp. 98471–98480, 2020, ISSN: 2169-3536, (Conference Name: IEEE Access).
Abstract | Links | BibTeX | Tags:
@article{shinagawa_interactive_2020,
title = {An Interactive Image Editing System Using an Uncertainty-Based Confirmation Strategy},
author = {Seitaro Shinagawa and Koichiro Yoshino and Seyed Hossein Alavi and Kallirroi Georgila and David Traum and Sakriani Sakti and Satoshi Nakamura},
url = {https://ieeexplore.ieee.org/abstract/document/9099288},
doi = {10.1109/ACCESS.2020.2997012},
issn = {2169-3536},
year = {2020},
date = {2020-01-01},
journal = {IEEE Access},
volume = {8},
pages = {98471–98480},
abstract = {We propose an interactive image editing system that has a confirmation dialogue strategy using an entropy-based uncertainty calculation on its generated images with Deep Convolutional Generative Adversarial Networks (DCGAN). DCGAN is an image generative model that learns an image manifold of a given dataset and enables continuous change of an image. Our proposed image editing system combines DCGAN with a natural language interface that accepts image editing requests in natural language. Although such a system is helpful for human users, it often faces uncertain requests to generate acceptable images. A promising approach to solve this problem is introducing a dialogue process that shows multiple candidates and confirms the user's intention. However, confirming every editing request creates redundant dialogues. To achieve more efficient dialogues, we propose an entropy-based dialogue strategy that decides when the system should confirm, and enables effective image editing through a dialogue that reduces redundant confirmations. We conducted image editing dialogue experiments using an avatar face illustration dataset for editing by natural language requests. Through quantitative and qualitative analysis, our results show that our entropy-based confirmation strategy achieved an effective dialogue by generating images desired by users.},
note = {Conference Name: IEEE Access},
keywords = {},
pubstate = {published},
tppubtype = {article}
}