@article{30e9083f943a46b89fb1d13228fe89ac,
title = "The structure and evolution of centromeric transition regions within the human genome",
abstract = "An understanding of how centromeric transition regions are organized is a critical aspect of chromosome structure and function; however, the sequence context of these regions has been difficult to resolve on the basis of the draft genome sequence. We present a detailed analysis of the structure and assembly of all human pericentromeric regions (5 megabases). Most chromosome arms (35 out of 43) show a gradient of dwindling transcriptional diversity accompanied by an increasing number of interchromosomal duplications in proximity to the centromere. At least 30% of the centromeric transition region structure originates from euchromatic gene-containing segments of DNA that were duplicatively transposed towards pericentromeric regions at a rate of six-seven events per million years during primate evolution. This process has led to the formation of a minimum of 28 new transcripts by exon exaptation and exon shuffling, many of which are primarily expressed in the testis. The distribution of these duplicated segments is nonrandom among pericentromeric regions, suggesting that some regions have served as preferential acceptors of euchromatic DNA.",
author = "Xinwei She and Horvath, {Julie E.} and Zhaoshi Jiang and Ge Liu and Furey, {Terrence S.} and Laurie Christ and Royden Clark and Tina Graves and Gulden, {Cassy L.} and Can Alkan and Bailey, {Jeff A.} and Cenk Sahinalp and Mariano Rocchi and David Haussler and Wilson, {Richard K.} and Webb Miller and Stuart Schwartz and Eichler, {Evan E.}",
note = "Funding Information: Acknowledgements We are grateful to the large-scale sequencing centres (Baylor College of Medicine, Cold Spring Harbor Laboratory, Genome Therapeutics Corporation, Harvard Partners Genome Center, Joint Genome Institute, The NIH Intramural Sequencing Center, The UK-MRC Sequencing Consortium, The University of Oklahoma Advanced Center for Genome Technology, The University of Texas Southwest, The Whitehead Institute for Biomedical Research, The Washington University Genome Sequencing Center and the Wellcome Trust Sanger Institute) for access to all large-scale finished sequence, genome assembly and trace sequence data from the human genome before publication. This work was supported by grants from NIH and DOE to E.E.E. and grants from P.R.I.N.C.E., MURST and Telethon to M.R.",
year = "2004",
month = aug,
day = "19",
doi = "10.1038/nature02806",
language = "English",
volume = "430",
pages = "857--864",
journal = "Nature",
issn = "0028-0836",
number = "7002",
}