@article{ae265cc2ba73420dba9b8395f3262197,
title = "A deep learning approach to automate refinement of somatic variant calling from cancer sequencing data",
abstract = "Cancer genomic analysis requires accurate identification of somatic variants in sequencing data. Manual review to refine somatic variant calls is required as a final step after automated processing. However, manual variant refinement is time-consuming, costly, poorly standardized, and non-reproducible. Here, we systematized and standardized somatic variant refinement using a machine learning approach. The final model incorporates 41,000 variants from 440 sequencing cases. This model accurately recapitulated manual refinement labels for three independent testing sets (13,579 variants) and accurately predicted somatic variants confirmed by orthogonal validation sequencing data (212,158 variants). The model improves on manual somatic refinement by reducing bias on calls otherwise subject to high inter-reviewer variability.",
author = "Ainscough, {Benjamin J.} and Barnell, {Erica K.} and Peter Ronning and Campbell, {Katie M.} and Wagner, {Alex H.} and Fehniger, {Todd A.} and Dunn, {Gavin P.} and Ravindra Uppaluri and Ramaswamy Govindan and Rohan, {Thomas E.} and Malachi Griffith and Mardis, {Elaine R.} and Swamidass, {S. Joshua} and Griffith, {Obi L.}",
note = "Funding Information: The authors thank A. Petti, G. Chang, T. Li, C. Miller, L. Trani, R. Lesurf, Z. Skidmore, K. Krysiak, A. Ramu, and F. Gomez for assisting in data assembly. We also acknowledge L. Trani for performing manual review and for valuable discussion on the project. We gratefully acknowledge L. Wartman, J. DiPersio, M. Jacoby, B. Van Tine, R. Fields, B. Tan, S. Chi, D. Gutmann, and T. Ley for sharing genomic data that made this project possible. The authors also thank the patients and their families for their selfless contribution to the advancement of science. Part of this work was performed as part of the Washington University School of Medicine Genomics Tumor Board, which was funded with private research support from the Division of Oncology and the McDonnell Genome Institute. E.K.B. was supported by the National Cancer Institute (T32GM007200 and U01CA209936). T.E.R. received support from the National Institutes of Health/ National Cancer Institute (NIH/NCI) (R01CA142942) and the Breast Cancer Research Foundation. Select sample data was funded by the Genomics of AML PPG (T. Ley, PI, P01 CA101937). A.H.W. was supported by the NCI (NIH NCI F32CA206247). B.J.A. was supported by the Siteman Cancer Center. S. Swamidass is funded by the National Library of Medicine (NIH NLM R01LM012222 and NIH NLM R01LM012482) and acknowledges support from the Institute for Informatics at Washington University School of Medicine. M.G. is funded by the National Human Genome Research Institute (NIH NHGRI R00HG007940). O.L.G. is funded by the National Cancer Institute (NIH NCI K22CA188163 and NIH NCI U01CA209936). Publisher Copyright: {\textcopyright} 2018, The Author(s), under exclusive licence to Springer Nature America, Inc.",
year = "2018",
month = dec,
day = "1",
doi = "10.1038/s41588-018-0257-y",
language = "English",
volume = "50",
pages = "1735--1743",
journal = "Nature Genetics",
issn = "1061-4036",
number = "12",
}