@article{595001d71a024032804c1ddb6e682191,
title = "Comparative performances of machine learning methods for classifying Crohn Disease patients using genome-wide genotyping data",
abstract = "Crohn Disease (CD) is a complex genetic disorder for which more than 140 genes have been identified using genome wide association studies (GWAS). However, the genetic architecture of the trait remains largely unknown. The recent development of machine learning (ML) approaches incited us to apply them to classify healthy and diseased people according to their genomic information. The Immunochip dataset containing 18,227 CD patients and 34,050 healthy controls enrolled and genotyped by the international Inflammatory Bowel Disease genetic consortium (IIBDGC) has been re-analyzed using a set of ML methods: penalized logistic regression (LR), gradient boosted trees (GBT) and artificial neural networks (NN). The main score used to compare the methods was the Area Under the ROC Curve (AUC) statistics. The impact of quality control (QC), imputing and coding methods on LR results showed that QC methods and imputation of missing genotypes may artificially increase the scores. At the opposite, neither the patient/control ratio nor marker preselection or coding strategies significantly affected the results. LR methods, including Lasso, Ridge and ElasticNet provided similar results with a maximum AUC of 0.80. GBT methods like XGBoost, LightGBM and CatBoost, together with dense NN with one or more hidden layers, provided similar AUC values, suggesting limited epistatic effects in the genetic architecture of the trait. ML methods detected near all the genetic variants previously identified by GWAS among the best predictors plus additional predictors with lower effects. The robustness and complementarity of the different methods are also studied. Compared to LR, non-linear models such as GBT or NN may provide robust complementary approaches to identify and classify genetic markers.",
keywords = "Crohn's disease, genetics research",
author = "Alberto Romagnoni and Simon J{\'e}gou and {Van Steen}, Kristel and Gilles Wainrib and Hugot, {Jean Pierre} and {International Inflammatory Bowel Disease Genetics Consortium (IIBDGC)} and Laurent Peyrin-Biroulet and Mathias Chamaillard and Colombel, {Jean Frederick} and Mario Cottone and Mauro D{\textquoteright}Amato and Renata D{\textquoteright}Inc{\`a} and Jonas Halfvarson and Paul Henderson and Amir Karban and Kennedy, {Nicholas A.} and Khan, {Mohammed Azam} and Marc L{\'e}mann and Arie Levine and Dunecan Massey and Monica Milla and Ng, {Sok Meng Evelyn} and Ioannis Oikonomou and Harald Peeters and Proctor, {Deborah D.} and Jean-Francois Rahier and Paul Rutgeerts and Frank Seibold and Laura Stronati and Taylor, {Kirstin M.} and Leif T{\"o}rkvist and Kullak Ublick and {Van Limbergen}, Johan and {Van Gossum}, Andre and Vatn, {Morten H.} and Hu Zhang and Wei Zhang and Andrews, {Jane M.} and Bampton, {Peter A.} and Murray Barclay and Florin, {Timothy H.} and Richard Gearry and Krupa Krishnaprasad and Lawrance, {Ian C.} and Gillian Mahy and Montgomery, {Grant W.} and Graham Radford-Smith and Roberts, {Rebecca L.} and Simms, {Lisa A.} and Katherine Hanigan and Anthony Croft and Leila Amininijad and Isabelle Cleynen and Olivier Dewit and Denis Franchimont and Michel Georges and Debby Laukens and Harald Peeters and Jean-Francois Rahier and Paul Rutgeerts and Emilie Theatre and {Van Gossum}, Andr{\'e} and Severine Vermeire and Guy Aumais and Leonard Baidoo and Barrie, {Arthur M.} and Karen Beck and Edmond-Jean Bernard and Binion, {David G.} and Alain Bitton and Brant, {Steve R.} and Cho, {Judy H.} and Albert Cohen and Kenneth Croitoru and Daly, {Mark J.} and Datta, {Lisa W.} and Colette Deslandres and Duerr, {Richard H.} and Debra Dutridge and John Ferguson and Joann Fultz and Philippe Goyette and Greenberg, {Gordon R.} and Talin Haritunians and Gilles Jobin and Seymour Katz and Lahaie, {Raymond G.} and McGovern, {Dermot P.} and Linda Nelson and Ng, {Sok Meng} and Kaida Ning and Ioannis Oikonomou and Pierre Par{\'e} and Proctor, {Deborah D.} and Regueiro, {Miguel D.} and Rioux, {John D.} and Elizabeth Ruggiero and Schumm, {L. Philip} and Marc Schwartz and Regan Scott and Yashoda Sharma and Silverberg, {Mark S.} and Denise Spears and Steinhart, {A. Hillary} and Stempak, {Joanne M.} and Swoger, {Jason M.} and Constantina Tsagarelis and Wei Zhang and Clarence Zhang and Hongyu Zhao and Jan Aerts and Tariq Ahmad and Hazel Arbury and Anthony Attwood and Adam Auton and Ball, {Stephen G.} and Balmforth, {Anthony J.} and Chris Barnes and Barrett, {Jeffrey C.} and In{\^e}s Barroso and Anne Barton and Bennett, {Amanda J.} and Sanjeev Bhaskar and Katarzyna Blaszczyk and John Bowes and Brand, {Oliver J.} and Braund, {Peter S.} and Francesca Bredin and Gerome Breen and Brown, {Morris J.} and Bruce, {Ian N.} and Jaswinder Bull and Burren, {Oliver S.} and John Burton and Jake Byrnes and Sian Caesar and Niall Cardin and Clee, {Chris M.} and Coffey, {Alison J.} and {MC Connell}, John and Conrad, {Donald F.} and Cooper, {Jason D.} and Dominiczak, {Anna F.} and Kate Downes and Drummond, {Hazel E.} and Darshna Dudakia and Andrew Dunham and Bernadette Ebbs and Diana Eccles and Sarah Edkins and Cathryn Edwards and Anna Elliot and Paul Emery and Evans, {David M.} and Gareth Evans and Steve Eyre and Anne Farmer and Ferrier, {I. Nicol} and Edward Flynn and Alistair Forbes and Liz Forty and Franklyn, {Jayne A.} and Frayling, {Timothy M.} and Freathy, {Rachel M.} and Eleni Giannoulatou and Polly Gibbs and Paul Gilbert and Katherine Gordon-Smith and Emma Gray and Elaine Green and Groves, {Chris J.} and Detelina Grozeva and Rhian Gwilliam and Anita Hall and Naomi Hammond and Matt Hardy and Pile Harrison and Neelam Hassanali and Husam Hebaishi and Sarah Hines and Anne Hinks and Hitman, {Graham A.} and Lynne Hocking and Chris Holmes and Eleanor Howard and Philip Howard and Howson, {Joanna M.M.} and Debbie Hughes and Sarah Hunt and Isaacs, {John D.} and Mahim Jain and Jewell, {Derek P.} and Toby Johnson and Jolley, {Jennifer D.} and Jones, {Ian R.} and Jones, {Lisa A.} and George Kirov and Langford, {Cordelia F.} and Hana Lango-Allen and Lathrop, {G. Mark} and James Lee and Lee, {Kate L.} and Charlie Lees and Kevin Lewis and Lindgren, {Cecilia M.} and Meeta Maisuria-Armer and Julian Maller and John Mansfield and Marchini, {Jonathan L.} and Paul Martin and Massey, {Dunecan C. O.} and McArdle, {Wendy L.} and Peter McGuffin and McLay, {Kirsten E.} and Gil McVean and Alex Mentzer and Mimmack, {Michael L.} and Morgan, {Ann E.} and Morris, {Andrew P.} and Craig Mowat and Munroe, {Patricia B.} and Simon Myers and William Newman and Nimmo, {Elaine R.} and O{\textquoteright}Donovan, {Michael C.} and Abiodun Onipinla and Ovington, {Nigel R.} and Owen, {Michael J.} and Kimmo Palin and Aarno Palotie and Kirstie Parnell and Richard Pearson and David Pernet and Perry, {John R. B.} and Anne Phillips and Vincent Plagnol and Prescott, {Natalie J.} and Inga Prokopenko and Quail, {Michael A.} and Suzanne Rafelt and Rayner, {Nigel W.} and Reid, {David M.} and Anthony Renwick and Ring, {Susan M.} and Neil Robertson and Samuel Robson and Ellie Russell and Clair, {David St} and Sambrook, {Jennifer G.} and Sanderson, {Jeremy D.} and Stevens, {Helen E.}",
note = "Open Access This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made. Te images or other third party material in this article are included in the article{\textquoteright}s Creative Commons license, unless indicated otherwise in a credit line to the material. If material is not included in the article{\textquoteright}s Creative Commons license and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/.",
year = "2019",
month = jul,
day = "17",
doi = "10.1038/s41598-019-46649-z",
language = "English",
volume = "9",
journal = "Scientific Reports",
issn = "2045-2322",
publisher = "Nature Publishing Group",
}