@article{ba1db5ae99904c80b2d44b6cfb8ad64b,
title = "Large expert-curated database for benchmarking document similarity detection in biomedical literature search",
abstract = "Document recommendation systems for locating relevant literature have mostly relied on methods developed a decade ago. This is largely due to the lack of a large offline gold-standard benchmark of relevant documents that cover a variety of research fields such that newly developed literature search techniques can be compared, improved and translated into practice. To overcome this bottleneck, we have established the RElevant LIterature SearcH consortium consisting of more than 1500 scientists from 84 countries, who have collectively annotated the relevance of over 180 000 PubMed-listed articles with regard to their respective seed (input) article/s. The majority of annotations were contributed by highly experienced, original authors of the seed articles. The collected data cover 76% of all unique PubMed Medical Subject Headings descriptors. No systematic biases were observed across different experience levels, research fields or time spent on annotations. More importantly, annotations of the same document pairs contributed by different scientists were highly concordant. We further show that the three representative baseline methods used to generate recommended articles for evaluation (Okapi Best Matching 25, Term Frequency-Inverse Document Frequency and PubMed Related Articles) had similar overall performances. Additionally, we found that these methods each tend to produce distinct collections of recommended articles, suggesting that a hybrid method may be required to completely capture all relevant articles. The established database server located at https://relishdb.ict.griffith.edu.au is freely available for the downloading of annotation data and the blind testing of new methods. We expect that this benchmark will be useful for stimulating the development of new powerful techniques for title and title/abstract-based search engines for relevant articles in biomedical science.",
keywords = "Relevant Literature Search Consortium (RELISH), Literature searches, PubMed, Search engines, Biomedical research, Document recommendation systems",
author = "Peter Brown and {RELISH Consortium} and Yaoqi Zhou and Tan, {Aik Choon} and El-Esawi, {Mohamed A.} and Thomas Liehr and Oliver Blanck and Gladue, {Douglas P.} and Almeida, {Gabriel M.F.} and Tomislav Cernava and Sorzano, {Carlos O.} and Yeung, {Andy W.K.} and Engel, {Michael S.} and Chandrasekaran, {Arun R.} and Thilo Muth and Staege, {Martin S.} and Daulatabad, {Swapna V.} and Darius Widera and Junpeng Zhang and Adrian Meule and Ken Honjo and Olivier Pourret and Yin, {Cong Cong} and Zhongheng Zhang and Marco Cascella and Flegel, {Willy A.} and Goodyear, {Carl S.} and {van Raaij}, {Mark J.} and Zuzanna Bukowy-Bieryllo and Campana, {Luca G.} and Kurniawan, {Nicholas A.} and David Lalaouna and H{\"u}ttner, {Felix J.} and Ammerman, {Brooke A.} and Felix Ehret and Cobine, {Paul A.} and Tan, {Ene Choo} and Hyemin Han and Wenfeng Xia and Christopher McCrum and Dings, {Ruud P.M.} and Francesco Marinello and Henrik Nilsson and Brett Nixon and Konstantinos Voskarides and Long Yang and Costa, {Vincent D.} and Johan Bengtsson-Palme and William Bradshaw and Grimm, {Dominik G.} and Nitin Kumar and Elvis Martis and Daniel Prieto and Sabnis, {Sandeep C.} and Amer, {Said E.D.R.} and Liew, {Alan W.C.} and Paul Perco and Farid Rahimi and Giuseppe Riva and Chongxing Zhang and Devkota, {Hari P.} and Koichi Ogami and Zarrin Basharat and Walter Fierz and Robert Siebers and Tan, {Kok H.} and Boehme, {Karen A.} and Peter Brenneisen and Brown, {James A.L.} and Dalrymple, {Brian P.} and Harvey, {David J.} and Grace Ng and Sebastiaan Werten and Mark Bleackley and Zhanwu Dai and Raman Dhariwal and Yael Gelfer and Hartmann, {Marcus D.} and Pawel Miotla and Radu Tamaian and Pragashnie Govender and Gurney-Champion, {Oliver J.} and Kauppila, {Joonas H.} and Xiaolei Zhang and Natalia Echeverr{\'i}a and Santhilal Subhash and Hannes Sallmon and Marco Tofani and Taeok Bae and Oliver Bosch and Cu{\'i}v, {P{\'a}raic O.} and Antoine Danchin and Barthelemy Diouf and Tuomas Eerola and Evangelos Evangelou and Fabian Filipp and Hannes Klump and Lukasz Kurgan and Smith, {Simon S.} and Olivier Terrier and Neil Tuttle and Ascher, {David B.} and Janga, {Sarath C.} and Schulte, {Leon N.} and Daniel Becker and Christopher Browngardt and Bush, {Stephen J.} and Guillaume Gaullier and Kazuki Ide and Clement Meseko and Werner, {Gijsbert D.A.} and Jan Zaucha and Al-Farha, {Abd A.} and Greenwald, {Noah F.} and Popoola, {Segun I.} and Shaifur Rahman and Jialin Xu and Yang, {Sunny Y.} and Noboru Hiroi and Alper, {Ozgul M.} and Baker, {Chris I.} and Michael Bitzer and George Chacko and Birgit Debrabant and Ray Dixon and Evelyne Forano and Matthew Gilliham and Sarah Kelly and Klempnauer, {Karl Heinz} and Lidbury, {Brett A.} and Lin, {Michael Z.} and Iseult Lynch and Wujun Ma and Maibach, {Edward W.} and Mather, {Diane E.} and Nandakumar, {Kutty S.} and Ohgami, {Robert S.} and Piero Parchi and Patrizio Tressoldi and Yu Xue and Charles Armitage and Pierre Barraud and Stella Chatzitheochari and Coelho, {Luis P.} and Jiajie Diao and Doxey, {Andrew C.} and Ang{\'e}lique Gobet and Pingzhao Hu and Stefan Kaiser and Mitchell, {Kate M.} and Salama, {Mohamed F.} and Shabalin, {Ivan G.} and Haijun Song and Dejan Stevanovic and Ali Yadollahpour and Erliang Zeng and Katharina Zinke and Alimba, {C. G.} and Beyene, {Tariku J.} and Zehong Cao and Chan, {Sherwin S.} and Michael Gatchell and Andreas Kleppe and Marcin Piotrowski and Gonzalo Torga and Woldesemayat, {Adugna A.} and Cosacak, {Mehmet I.} and Scott Haston and Ross, {Stephanie A.} and Richard Williams and Alvin Wong and Abramowitz, {Matthew K.} and Andem Effiong and Senhong Lee and Abid, {Muhammad B.} and Cyrus Agarabi and Cedric Alaux and Albrecht, {Dirk R.} and Atkins, {Gerald J.} and Beck, {Charles R.} and Bonvin, {A. M.J.J.} and Emer Bourke and Thomas Brand and Braun, {Ralf J.} and Bull, {James A.} and Pedro Cardoso and Dee Carter and Delahay, {Robin M.} and Bernard Ducommun and Duijf, {Pascal H.G.} and Trevor Epp and Eskelinen, {Eeva Liisa} and Mazyar Fallah and Farber, {Debora B.} and Jose Fernandez-Triana and Frank Feyerabend and Tullio Florio and Michael Friebe and Saori Furuta and Mads Gabrielsen and Jens Gruber and Malgorzata Grybos and Qian Han and Michael Heinrich and Heikki Helanter{\"a} and Michael Huber and Albert Jeltsch and Fan Jiang and Claire Josse and Giuseppe Jurman and Haruyuki Kamiya and {de Keersmaecker}, Kim and Erik Kristiansson and {de Leeuw}, {Frank Erik} and Jiuyong Li and Shide Liang and Lopez-Escamez, {Jose A.} and Lopez-Ruiz, {Francisco J.} and Marchbank, {Kevin J.} and Rolf Marschalek and Mart{\'i}n, {Carmen S.} and Miele, {Adriana E.} and Xavier Montagutelli and Esteban Morcillo and Rosario Nicoletti and Monika Niehof and Ronan O'Toole and Toshihiko Ohtomo and Henrik Oster and Jun Shen and Olga Sukocheva and Enwu Liu and Day, {David A.} and Mangoni, {Arduino A.}",
note = "This is an Open Access article distributed under the terms of the Creative Commons Attribution License CC-BY (http://creativecommons.org/licenses/by/4.0/), which permits unrestricted reuse, distribution, and reproduction in any medium, provided the original work is properly cited. {\textcopyright} The Author(s) 2019. Published by Oxford University Press.",
year = "2019",
doi = "10.1093/database/baz085",
language = "English",
volume = "2019",
pages = "1--67",
journal = "Database: The Journal of Biological Databases and curation",
issn = "1758-0463",
publisher = "Oxford University Press",
}