@inproceedings{b83c864989ed4e24867c5df30f392bca,
title = "NYCU-NLP at SemEval-2024 Task 2: Aggregating Large Language Models in Biomedical Natural Language Inference for Clinical Trials",
abstract = "This study describes the model design of the NYCU-NLP system for the SemEval-2024 Task 2 that focuses on natural language inference for clinical trials. We aggregate several large language models to determine the inference relation (i.e., entailment or contradiction) between clinical trial reports and statements that may be manipulated with designed interventions to investigate the faithfulness and consistency of the developed models. First, we use ChatGPT v3.5 to augment original statements in training data and then fine-tune the SOLAR model with all augmented data. During the testing inference phase, we fine-tune the OpenChat model to reduce the influence of interventions and fed a cleaned statement into the fine-tuned SOLAR model for label prediction. Our submission produced a faithfulness score of 0.9236, ranking second of 32 participating teams, and ranked first for consistency with a score of 0.8092.",
author = "Lee, {Lung Hao} and Chiou, {Chen Ya} and Lin, {Tzu Mi}",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 18th International Workshop on Semantic Evaluation, SemEval 2024, co-located with the 2024 Annual Conference of the North American Chapter of the Association for Computational Linguistics, NAACL 2024 ; Conference date: 20-06-2024 Through 21-06-2024",
year = "2024",
language = "English",
series = "SemEval 2024 - 18th International Workshop on Semantic Evaluation, Proceedings of the Workshop",
publisher = "Association for Computational Linguistics (ACL)",
pages = "1455--1462",
editor = "Ojha, {Atul Kr.} and Dohruoz, {A. Seza} and Madabushi, {Harish Tayyar} and {Da San Martino}, Giovanni and Sara Rosenthal and Aiala Rosa",
booktitle = "SemEval 2024 - 18th International Workshop on Semantic Evaluation, Proceedings of the Workshop",
address = "美國",
}