@inproceedings{374bb4110e8440e29a9a47c1929a20c6,
title = "Can machine learning algorithms predict publication outcomes? A case study of COVID-19 preprints",
abstract = "The COVID-19 pandemic catalyzed a large body of scientific work, much of which was completed and disseminated with groundbreaking speed. A significant portion of COVID-related work was posted to preprint servers and COVID-related preprints were more widely cited than their counterparts. This work leverages information retrieval, natural language processing, and supervised learning to predict the subsequent publication, within a year, of COVID-related papers posted to preprint servers in peer-reviewed venues. Our work is inspired by prior work surveying human experts for the same task. We compare the performance of ML and human predictions and discuss the implications of our findings for scientific publishing. The findings demonstrate that the Multi-Layer Perceptron yielded the highest performance, achieving a macro F1 score of 0.674 on the held-out set. This underscores the challenge of accurately predicting the outcomes of the human peer review process. The data and code are available at https://github.com/Sai90000/preprint_prediction.git.",
author = "Sai Koneru and Xin Wei and Jian Wu and Sarah Rajtmajer",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 23rd IEEE International Conference on Data Mining Workshops, ICDMW 2023 ; Conference date: 01-12-2023 Through 04-12-2023",
year = "2023",
doi = "10.1109/ICDMW60847.2023.00138",
language = "English (US)",
series = "IEEE International Conference on Data Mining Workshops, ICDMW",
publisher = "IEEE Computer Society",
pages = "1050--1057",
editor = "Jihe Wang and Yi He and Dinh, {Thang N.} and Christan Grant and Meikang Qiu and Witold Pedrycz",
booktitle = "Proceedings - 23rd IEEE International Conference on Data Mining Workshops, ICDMW 2023",
address = "United States",
}