@article{bc851b270aa04094a1ff583bc86d3380,
title = "Large scale genotype- and phenotype-driven machine learning in Von Hippel-Lindau disease",
abstract = "Von Hippel-Lindau (VHL) disease is a hereditary cancer syndrome where individuals are predisposed to tumor development in the brain, adrenal gland, kidney, and other organs. It is caused by pathogenic variants in the VHL tumor suppressor gene. Standardized disease information has been difficult to collect due to the rarity and diversity of VHL patients. Over 4100 unique articles published until October 2019 were screened for germline genotype–phenotype data. Patient data were translated into standardized descriptions using Human Genome Variation Society gene variant nomenclature and Human Phenotype Ontology terms and has been manually curated into an open-access knowledgebase called Clinical Interpretation of Variants in Cancer. In total, 634 unique VHL variants, 2882 patients, and 1991 families from 427 papers were captured. We identified relationship trends between phenotype and genotype data using classic statistical methods and spectral clustering unsupervised learning. Our analyses reveal earlier onset of pheochromocytoma/paraganglioma and retinal angiomas, phenotype co-occurrences and genotype–phenotype correlations including hotspots. It confirms existing VHL associations and can be used to identify new patterns and associations in VHL disease. Our database serves as an aggregate knowledge translation tool to facilitate sharing information about the pathogenicity of VHL variants.",
keywords = "CIViC, Von Hippel-Lindau, genotype–phenotype, machine learning, spectral clustering",
author = "Andreea Chiorean and Farncombe, {Kirsten M.} and Sean Delong and Veronica Andric and Safa Ansar and Clarissa Chan and Kaitlin Clark and Danos, {Arpad M.} and Yizhuo Gao and Giles, {Rachel H.} and Anna Goldenberg and Payal Jani and Kilannin Krysiak and Lynzey Kujan and Samantha Macpherson and Maher, {Eamonn R.} and McCoy, {Liam G.} and Yasser Salama and Jason Saliba and Lana Sheta and Malachi Griffith and Griffith, {Obi L.} and Lauren Erdman and Arun Ramani and Kim, {Raymond H.}",
note = "Funding Information: We would like to acknowledge Katherine Nathanson for her valuable feedback, Chansonette Badduke and Clare Sheen for their assistance in variant interpretation and Ani, Orchanian‐Cheff for generating the publication search. We would also like to thank the CIViC community for their contributions to the curation and improvement of CIViC data for VHL and beyond. This study is funded by VHL Alliance Research Grant, Starbucks Clinical Genetics/Genomics Research Studentship Award (A. C.), The Bhalwani Family Charitable Foundation (R. H. K.), National Human Genome Research Institute of the National Institutes of Health Award Number R00HG007940 (M. G.) and K22CA188163 (O. L. G.), NCI Award Number U01CA209936 (CIViC project), NCI Award Number U24CA237719 (CIViC project), Cancer Moonshot and Childhood Cancer Data Initiative (CIViC project), Washington University Institute of Clinical and Translational Sciences grant UL1TR002345 from the National Center for Advancing Translational Sciences of the National Institutes of Health (CIViC project), and Children's Discovery Institute of the St. Louis Children's Hospital and Washington University School of Medicine (CIViC project). Publisher Copyright: {\textcopyright} 2022 The Authors. Human Mutation published by Wiley Periodicals LLC.",
year = "2022",
month = sep,
doi = "10.1002/humu.24392",
language = "English",
volume = "43",
pages = "1268--1285",
journal = "Human Mutation",
issn = "1059-7794",
number = "9",
}