Mappability

This package provides whole-genome mappability tracks on human hg19/hg38 assembly. We employed the 100-mers mappability track from the ENCODE Project and computed weighted average of the mappability scores if multiple ENCODE regions overlap with the same bin.

library(WGSmapp)
data("mapp_hg19")
mapp_hg19
## GRanges object with 21591667 ranges and 1 metadata column:
##              seqnames            ranges strand |     score
##                 <Rle>         <IRanges>  <Rle> | <numeric>
##          [1]     chr1       10001-10014      * |    0.0028
##          [2]     chr1             10015      * |    0.3333
##          [3]     chr1       10016-10026      * |       0.5
##          [4]     chr1       10027-10031      * |         1
##          [5]     chr1       10032-10036      * |       0.5
##          ...      ...               ...    ... .       ...
##   [21591663]     chrY 59363020-59363314      * |    0.0028
##   [21591664]     chrY 59363315-59363317      * |    0.3333
##   [21591665]     chrY          59363318      * |      0.25
##   [21591666]     chrY 59363319-59363320      * |    0.3333
##   [21591667]     chrY 59363321-59363517      * |       0.5
##   -------
##   seqinfo: 25 sequences from an unspecified genome

Blacklist regions

hg19

For hg19 reference genome, “blacklist” bins, including segmental duplication regions and gaps in reference assembly from telomere, centromere, and/or heterochromatin regions are included.

library(WGSmapp)
# Get segmental duplication regions
seg.dup = read.table(system.file("extdata", "GRCh37GenomicSuperDup.tab", package = "WGSmapp"), head = TRUE)
# Get hg19 gaps
gaps = read.table(system.file("extdata", "hg19gaps.txt", package = "WGSmapp"), head = TRUE)

head(seg.dup)
##   chrom chromStart chromEnd        name score strand otherChrom otherStart
## 1  chr1      85326    87112 chr1:398212     0      _       chr1     398212
## 2  chr1     398212   400000  chr1:85326     0      _       chr1      85326
## 3  chr1      88000   121417 chr1:235525     0      +       chr1     235525
## 4  chr1     235525   267707  chr1:88000     0      +       chr1      88000
## 5  chr1      91256    92392 chr1:521369     0      +       chr1     521369
## 6  chr1     521369   522487  chr1:91256     0      +       chr1      91256
##   otherEnd otherSize uid posBasesHit testResult verdict chits ccov
## 1   400000      1788   1           0        N/A     N/A   N/A  N/A
## 2    87112      1786   1           0        N/A     N/A   N/A  N/A
## 3   267707     32182   2           0        N/A     N/A   N/A  N/A
## 4   121417     33417   2           0        N/A     N/A   N/A  N/A
## 5   522487      1118   3           0        N/A     N/A   N/A  N/A
## 6    92392      1136   3           0        N/A     N/A   N/A  N/A
##                    alignfile alignL indelN indelS alignB matchB mismatchB
## 1 align_both/0012/both060568   1788      2      2   1786   1757        29
## 2 align_both/0012/both060568   1788      2      2   1786   1757        29
## 3 align_both/0012/both060569  33449     25   1299  32150  31941       209
## 4 align_both/0012/both060569  33449     25   1299  32150  31941       209
## 5 align_both/0012/both060581   1137      4     20   1117   1092        25
## 6 align_both/0012/both060581   1137      4     20   1117   1092        25
##   transitionsB transversionsB fracMatch fracMatchIndel        jcK
## 1           15             14  0.983763       0.982662 0.01641570
## 2           15             14  0.983763       0.982662 0.01641570
## 3          133             76  0.993499       0.992727 0.00652911
## 4          133             76  0.993499       0.992727 0.00652911
## 5           18              7  0.977619       0.974130 0.02272210
## 6           18              7  0.977619       0.974130 0.02272210
##          k2K
## 1 0.01642270
## 2 0.01642270
## 3 0.00653207
## 4 0.00653207
## 5 0.02278150
## 6 0.02278150
head(gaps)
##   bin chrom chromStart  chromEnd   ix n     size            type bridge
## 1   0  chr1  124535434 142535434 1271 N 18000000 heterochromatin     no
## 2  23  chr1  121535434 124535434 1270 N  3000000      centromere     no
## 3  76  chr1    3845268   3995268   47 N   150000          contig     no
## 4  85  chr1   13219912  13319912  154 N   100000          contig     no
## 5  89  chr1   17125658  17175658  196 N    50000           clone    yes
## 6 101  chr1   29878082  30028082  337 N   150000          contig     no

hg38

For hg38 reference genome, “blacklist” bins, including segmental duplication regions and gaps in reference assembly from telomere, centromere, and/or heterochromatin regions are also incorporated in the package.

library(WGSmapp)
# Get segmental duplication regions
seg.dup.hg38 = read.table(system.file("extdata", "GRCh38GenomicSuperDup.tab", package = "WGSmapp"))
# Get hg38 gaps
gaps.hg38 = read.table(system.file("extdata", "hg38gaps.txt", package = "WGSmapp"))

head(seg.dup.hg38)
##     V1     V2     V3          V4 V5 V6   V7     V8     V9   V10 V11  V12
## 1 chr1  10169  37148 chr1:180723  0  + chr1 180723 207666 26943   1 1000
## 2 chr1 180723 207666  chr1:10169  0  + chr1  10169  37148 26979   1 1000
## 3 chr1  88000 121417 chr1:265774  0  + chr1 265774 297956 32182   2 1000
## 4 chr1 265774 297956  chr1:88000  0  + chr1  88000 121417 33417   2 1000
## 5 chr1  88000  92392 chr1:355156  0  + chr1 355156 358335  3179   3 1000
## 6 chr1 355156 358335  chr1:88000  0  + chr1  88000  92392  4392   3 1000
##   V13 V14 V15 V16                         V17   V18 V19  V20   V21   V22
## 1 N/A N/A N/A N/A align_both/0014/both0071547 27025  30  128 26897 26628
## 2 N/A N/A N/A N/A align_both/0014/both0071547 27025  30  128 26897 26628
## 3 N/A N/A N/A N/A align_both/0014/both0071548 33449  25 1299 32150 31941
## 4 N/A N/A N/A N/A align_both/0014/both0071548 33449  25 1299 32150 31941
## 5 N/A N/A N/A N/A align_both/0014/both0071549  4398   8 1225  3173  3104
## 6 N/A N/A N/A N/A align_both/0014/both0071549  4398   8 1225  3173  3104
##   V23 V24 V25       V26       V27         V28         V29
## 1 269 164 105 0.9899989 0.9888959 0.010068396 0.010074269
## 2 269 164 105 0.9899989 0.9888959 0.010068396 0.010074269
## 3 209 133  76 0.9934992 0.9927273 0.006529115 0.006532073
## 4 209 133  76 0.9934992 0.9927273 0.006529115 0.006532073
## 5  69  46  23 0.9782540 0.9757938 0.022067470 0.022109061
## 6  69  46  23 0.9782540 0.9757938 0.022067470 0.022109061
head(gaps.hg38)
##    V1   V2       V3       V4  V5 V6    V7       V8  V9
## 1 585 chr1        0    10000   1  N 10000 telomere  no
## 2 586 chr1   207666   257666   5  N 50000   contig  no
## 3 587 chr1   297968   347968   7  N 50000   contig  no
## 4 589 chr1   535988   585988  10  N 50000   contig  no
## 5 605 chr1  2702781  2746290  48  N 43509 scaffold yes
## 6  85 chr1 12954384 13004384 224  N 50000 scaffold yes

BAM files

The dataset consists of three assembled .bam files of single-cell whole genome sequencing from 10X Genomics Single-Cell CNV solution for illustration purposes. These three cells are from section E of five adjacent tumor dissections of a breast cancer patient. Corresponding cellular barcode tags are “AAAGCAATCTGACGCG”, “GCAGTTACACTGTATG”, and “CTCGTCACAGGTTAAA”.