@inproceedings{e225a925615e4c7aa80f3a4708145c25,
title = "SciBERTSUM: Extractive Summarization for Scientific Documents",
abstract = "The summarization literature focuses on the summarization of news articles. The news articles in the CNN-DailyMail are relatively short documents with about 30 sentences per document on average. We introduce SciBERTSUM, our summarization framework designed for the summarization of long documents like scientific papers with more than 500 sentences. SciBERTSUM extends BERTSUM to long documents by 1) adding a section embedding layer to include section information in the sentence vector and 2) applying a sparse attention mechanism where each sentences will attend locally to nearby sentences and only a small number of sentences attend globally to all other sentences. We used slides generated by the authors of scientific papers as reference summaries since they contain the technical details from the paper. The results show the superiority of our model in terms of ROUGE scores. (The code is available at https://github.com/atharsefid/SciBERTSUM ).",
author = "Athar Sefid and Giles, {C. Lee}",
note = "Publisher Copyright: {\textcopyright} 2022, Springer Nature Switzerland AG.; 15th IAPR International Workshop on Document Analysis Systems, DAS 2022 ; Conference date: 22-05-2022 Through 25-05-2022",
year = "2022",
doi = "10.1007/978-3-031-06555-2_46",
language = "English (US)",
isbn = "9783031065545",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "688--701",
editor = "Seiichi Uchida and Elisa Barney and V{\'e}ronique Eglin",
booktitle = "Document Analysis Systems - 15th IAPR International Workshop, DAS 2022, Proceedings",
address = "Germany",
}