My research focuses on making machine learning more useful, more
interpretable, and able to learn and interact from humans. This helps
users sift through decades of documents; discover when individuals lie,
reframe, or change the topic in a conversation; or to compete against
humans in games that are based in natural language.
@inproceedings{Srikanth:Sarkar:Y.:M.:C.:Rudinger:Boyd-Graber-2024,
Title = {Pregnant Questions: The Importance of Pragmatic Awareness in Maternal Health Question Answering},
Author = {Neha Srikanth and Rupak Sarkar and Mane, Heran Y. and Aparicio, Elizabeth M. and Nguyen, Quynh C. and Rachel Rudinger and Jordan Boyd-Graber},
Booktitle = {North American Association for Computational Linguistics},
Year = {2024},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_naacl_pregnant.pdf},
}
@inproceedings{Si:Goyal:Wu:Zhao:Feng:III:Boyd-Graber-2024,
Title = {Large Language Models Help Humans Verify Truthfulness---Except When They Are Convincingly Wrong},
Author = {Chenglei Si and Navita Goyal and Tongshuang Wu and Chen Zhao and Shi Feng and Hal Daum\'{e} {III} and Jordan Boyd-Graber},
Booktitle = {North American Association for Computational Linguistics},
Year = {2024},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_naacl_convincingly.pdf},
}
@inproceedings{Grissom-II:Shoemaker:Goldman:Shi:Stewart:Rytting:Findlater:Boyd-Graber:Li:Grissom-II:Boyd-Graber-2024,
Author = {Alvin {Grissom II} and Jo Shoemaker and Benjamin Goldman and Ruikang Shi and Craig Stewart and C. Anton Rytting and Leah Findlater and Jordan Boyd-Graber},
Location = {Torino, Italy},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_lrec_siminthelp.pdf},
Booktitle = {Linguistic Resources and Evaluation Conference},
Year = {2024},
Title = {Rapidly Piloting Real-time Linguistic Assistance for Simultaneous Interpreters with Untrained Bilingual Surrogates},
}
Quynh C. Nguyen, Elizabeth M. Aparicio, Michelle Jasczynski, Amara Channell Doig, Xiaohe Yue, Heran Mane, Neha Punklik Srikanth, Francia Ximena Marin Gutierrez, Nataly Delcid, Xin He, and Jordan Boyd-Graber. Randomized Pilot of Rosie, a Health Education Question-and-Answer Chatbot for New Mothers. Journal of Medical Internet Research: Journal of Formative Research, 2024. [Bibtex]
@article{Nguyen:Aparicio:Jasczynski:Doig:Yue:Mane:Srikanth:Gutierrez:Delcid:He:Boyd-Graber-2024,
Title = {Randomized Pilot of Rosie, a Health Education Question-and-Answer Chatbot for New Mothers},
Author = {Quynh C. Nguyen and Elizabeth M. Aparicio and Michelle Jasczynski and Amara Channell Doig and Xiaohe Yue and Heran Mane and Neha Srikanth and Francia Ximena Marin Gutierrez and Nataly Delcid and Xin He and Jordan Boyd-Graber},
Journal = {Journal of Medical Internet Research: Journal of Formative Research},
Year = {2024},
Url = {https://formative.jmir.org/2024/1/e51361},
}
@article{Mondal:Li:Hou:Natarajan:Garimella:Bandyopadhyay:Boyd-Graber-2024,
Title = {SciDoc2Diagrammer-MAF: Towards Generation of Scientific Diagrams from Documents guided by Multi-Aspect Feedback Refinement},
Author = {Ishani Mondal and Zongxia Li and Yufang Hou and Anandhavelu Natarajan and Aparna Garimella and Sambaran Bandyopadhyay and Jordan Boyd-Graber},
Year = {2024},
Journal = {Findings of the Empirical Methods in Natural Language Processing},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_emnlp_diagramgen.pdf},
}
@article{Li:Mondal:Nghiem:Liang:Boyd-Graber-2024,
Title = {PEDANTS (Precise Evaluations of Diverse Answer Nominee Text for Skinflints): Use Evaluation Metrics Wisely---Efficient Evaluation Analysis and Benchmarking for Open-Domain Question Answering},
Author = {Zongxia Li and Ishani Mondal and Huy Nghiem and Yijun Liang and Jordan Boyd-Graber},
Journal = {Findings of the Empirical Methods in Natural Language Processing},
Location = {Miami},
Year = {2024},
Url = {https://arxiv.org/abs/2402.11161},
}
@article{Wu:Guan:Li:Huang:Liu:Wang:Xian:Shrivastava:Huang:Boyd-Graber:Zhou:Manocha-2024,
Title = {AUTOHALLUSION: Automatic Generation of Hallucination Benchmarks for Vision-Language Models},
Author = {Xiyang Wu and Tianrui Guan and Dianqi Li and Shuaiyi Huang and Xiaoyu Liu and Xijun Wang and Ruiqi Xian and Abhinav Shrivastava and Furong Huang and Jordan Boyd-Graber and Tianyi Zhou and Dinesh Manocha},
Journal = {Findings of the Empirical Methods in Natural Language Processing},
Year = {2024},
Location = {Miami},
Url = {https://arxiv.org/abs/2406.10900},
}
@inproceedings{Li:Mao:Stephens:Goel:Walpole:Fung:Dima:Boyd-Graber-2024,
Title = {TENOR: Topic Enabled Neural Organization and Recommendation: Evaluating Topic Models in Task Based Settings},
Author = {Zongxia Li and Andrew Mao and Daniel Kofi Stephens and Pranav Goel and Emily Walpole and Juan Francisco Fung and Alden Dima and Jordan Lee Boyd-Graber},
Booktitle = {European Association for Computational Linguistics},
Year = {2024},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_eacl_tenor.pdf},
}
@inproceedings{Mondal:S:Natarajan:Garimella:Bandyopadhyay:Boyd-Graber-2024,
Title = {Presentations by the People, for the People: Harnessing LLMs for Generating Persona-Aware Slides from Documents},
Author = {Ishani Mondal and Shwetha S and Anandhavelu Natarajan and Aparna Garimella and Sambaran Bandyopadhyay and Jordan Boyd-Graber},
Booktitle = {European Association for Computational Linguistics},
Year = {2024},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_eacl_slides.pdf},
}
@inproceedings{Kabir:Sung:Bandyopadhyay:Zou:Chandra:Boyd-Graber-2024,
Title = {You Make me Feel like a Natural Question: Training QA Systems on Transformed Trivia Questions},
Author = {Tasnim Kabir and Yoo Yeon Sung and Saptarashmi Bandyopadhyay and Hao Zou and Abhranil Chandra and Jordan Lee Boyd-Graber},
Booktitle = {Empirical Methods in Natural Language Processing},
Location = {Miami},
Year = {2024},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_emnlp_natural.pdf},
}
Accessible Abstract: Many of the questions for training AIs how to answer questions come from the queries users type into search engines (like Google's Natural Questions). Is there a cheaper---perhaps even better---way? We propose a "naturalization" technique to turn high-quality, rigorously edited trivia questions into examples that resembles Natural Questions. Training on our naturalized questions and testing on natural questions comes close to the results with using Natural Questions, and we can improve results on MMLU (a standard modern evaluation set) by using our data.
@inproceedings{Shu:Balepur:Feng:Boyd-Graber-2024,
Title = {KARL: Knowledge-Aware Retrieval and Representations aid Retention and Learning in Students},
Author = {Matthew Shu and Nishant Balepur and Shi Feng and Jordan Boyd-Graber},
Booktitle = {Empirical Methods in Natural Language Processing},
Location = {Miami},
Year = {2024},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_emnlp_karl.pdf},
}
Accessible Abstract: Flashcard help students study by figuring out which flashcards to show students and when. However, current systems do not pay attention to what information (the actual text of the flashcards) to make these predictions. This paper introduces KARL, a new flashcard scheduler that uses language models to encode the text of flashcards. We host KARL in our own flashcard app for 500+ learners and show that students using KARL learn more efficiently than when they use other traditional systems that only know, for example, that a student has studied Flashcard \#24601 on Monday and got it wrong.
@inproceedings{Gor:Daume-III:Boyd-Graber-2024,
Title = {Do great minds think alike? Investigating Human-AI Complementarity in Question Answering with CAIMIRA},
Author = {Maharshi Gor and Hal {Daum\'{e} III} Tianyi Zhou and Jordan Boyd-Graber},
Booktitle = {Empirical Methods in Natural Language Processing},
Year = {2024},
Location = {Miami},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_emnlp_caimira.pdf},
}
Accessible Abstract: CAIMIRA discovers the skills that humans and AIs use to answer questions. By scraping websites where trivia nerds answer really difficult questions and posing those questions to AI models like GPT-4 and LLaMA-3-70B, while humans excel in knowledge-based abductive reasoning, AI outperforms on fact-based historical recall. This research suggests future challenges should focus on more complex reasoning and nuanced language tasks to better align AI development with human cognitive strengths.
@inproceedings{Balepur:Shu:Hoyle:Robey:Feng:Goldfarb-Tarrant:Boyd-Graber-2024,
Title = {A SMART Mnemonic Sounds like "Glue Tonic": Mixing LLMs with Student Feedback to Make Mnemonic Learning Stick},
Author = {Nishant Balepur and Matthew Shu and Alexander Hoyle and Alison Robey and Shi Feng and Seraphina Goldfarb-Tarrant and Jordan Boyd-Graber},
Booktitle = {Empirical Methods in Natural Language Processing},
Year = {2024},
Location = {Miami},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_emnlp_mnemonic.pdf},
}
Accessible Abstract: Learning vocabulary (e.g., benevolent) can be tedious, but using mnemonics (e.g., benevolent sounds like "benefits," and a kind boss gives benefits) makes it more engaging and effective. This paper introduces SMART, a large language model trained to produce mnemonics based on feedback from flashcard learners. Students struggle to predict which mnemonics will help them most. Still, by training SMART on both student preferences and learning outcomes, we can generate mnemonics as effectively as GPT-4, but at a much lower cost.
Wichayaporn Wongkamjan and Feng Gu and Yanze Wang and Ulf Hermjakob and Jonathan May and Brandon M. Stewart and Jonathan K. Kummerfeld and Denis Peskoff and Jordan Lee Boyd-Graber. More Victories, Less Cooperation: Assessing Cicero’s Diplomacy Play. Association for Computational Linguistics, 2024. [Bibtex]
@inproceedings{Boyd-Graber-2024,
Title = {More Victories, Less Cooperation: Assessing Cicero’s Diplomacy Play},
Booktitle = {Association for Computational Linguistics},
Year = {2024},
Location = {Bangkok, Thailand},
Url = {http://umiacs.umd.edu/~jbg//docs/2024_acl_cicero.pdf},
}
Accessible Abstract: Meta's recent AI, Cicero, grabbed headlines by its ability to beat humans at the game of Diplomacy: notable because players of the game not just need to make the right moves but also need to negotiate with each other in natural language. This paper investigates why it wins so many games, measuring its ability to persuade and trick other players. While Cicero wins just about every game, this is because of superhuman strategy, not superhuman communication, suggesting there is still further room for developing Diplomacy-playing AIs.
@article{Sung:Fleisig:Mondal:Boyd-Graber-Preprint,
Title = {ADVSCORE: A Metric for the Evaluation and Creation of Adversarial Benchmarks},
Author = {Yoo Yeon Sung and Eve Fleisig and Ishani Mondal and Jordan Lee Boyd-Graber},
Journal = {ArXiv},
Year = {Preprint},
Url = {https://arxiv.org/abs/2406.16342},
}
Benjamin Börschinger, Jordan Boyd-Graber, Christian Buck, Jannis Bulian, Massimiliano Ciaramita, Michelle Chen Huebscher, Wojciech Gajewski, Yannic Kilcher, Rodrigo Nogueira, and Lierni Sestorain Saralegu. Meta Answering for Machine Reading. ArXiv, Preprint. [Preprint] [Bibtex]
@article{B\"orschinger:Boyd-Graber:Buck:Bulian:Ciaramita:Huebscher:Gajewski:Kilcher:Nogueira:Saralegu-Preprint,
Title = {Meta Answering for Machine Reading},
Author = {Benjamin B\"orschinger and Jordan Boyd-Graber and Christian Buck and Jannis Bulian and Massimiliano Ciaramita and Michelle Chen Huebscher and Wojciech Gajewski and Yannic Kilcher and Rodrigo Nogueira and Lierni Sestorain Saralegu},
Journal = {ArXiv},
Year = {Preprint},
Url = {https://arxiv.org/abs/1911.04156},
}
@article{Rodriguez:Feng:Iyyer:He:Boyd-Graber-Preprint,
Title = {Quizbowl: The Case for Incremental Question Answering},
Author = {Pedro Rodriguez and Shi Feng and Mohit Iyyer and He He and Jordan Boyd-Graber},
Journal = {ArXiv},
Year = {Preprint},
Url = {https://arxiv.org/abs/1904.04792},
}