Created
November 30, 2017 04:54
-
-
Save AliciaSchep/625282c146fabfafc787ce8aaab2259c to your computer and use it in GitHub Desktop.
Compute aggregate footprint around position
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(GenomicRanges) | |
library(S4Vectors) | |
# Footprint does not include +/- 4 correction for ATAC-seq | |
# Modified tabulate funcion ---------------------------------------------------- | |
tabulate2 <- function(x, min_val, max_val) { | |
if (max_val <= min_val) { | |
stop("max_val must be greater than min_val") | |
} | |
if (min_val < 0 && max_val > 0) { | |
n <- rev(tabulate(-1 * (x))[1:(-min_val)]) | |
p <- tabulate(x)[1:max_val] | |
z <- length(which(x == 0)) | |
out <- c(n, z, p) | |
out[which(is.na(out))] <- 0 | |
names(out) <- min_val:max_val | |
return(out) | |
} else if (min_val == 0 && max_val > 0) { | |
p <- tabulate(x)[1:max_val] | |
z <- length(which(x == 0)) | |
out <- c(z, p) | |
out[which(is.na(out))] <- 0 | |
names(out) <- min_val:max_val | |
return(out) | |
} else if (min_val > 0 && max_val > 0) { | |
out <- tabulate(x)[min_val:max_val] | |
out[which(is.na(out))] <- 0 | |
names(out) <- min_val:max_val | |
return(out) | |
} else if (min_val < 0 && max_val == 0) { | |
n <- rev(tabulate(-1 * (x))[1:(-min_val)]) | |
z <- length(which(x == 0)) | |
out <- c(n, z) | |
out[which(is.na(out))] <- 0 | |
names(out) <- min_val:max_val | |
return(out) | |
} else if (min_val < 0 && max_val < 0) { | |
n <- rev(tabulate(-1 * (x))[1:(-min_val)]) | |
out <- n | |
out[which(is.na(out))] <- 0 | |
names(out) <- min_val:max_val | |
return(out) | |
} else { | |
stop("something may be amiss with min_val or max_val") | |
} | |
} | |
# motif_pos should be GenomicRanges of positions of motifs. | |
# Motif positions can be found using motifmatchr | |
# Internal chromVAR function, chromVAR:::bamToFragments, can be used to read in | |
# 'ends' argument (which should be genomic ranges with fragment ends) | |
get_footprint <- function(motif_pos, ends, flank = 250){ | |
tmp = resize(motif_pos, width = 1, fix = "center", ignore.strand = FALSE) | |
o = findOverlaps(motif_pos, ends, maxgap = flank, ignore.strand = TRUE) | |
d = ifelse(as.character(strand(tmp[queryHits(o)])) == "-", | |
start(ends[subjectHits(o)]) - start(tmp[queryHits(o)]), | |
start(tmp[queryHits(o)]) - start(ends[subjectHits(o)])) | |
out = tabulate2(d, min_val = -flank, max_val = flank) | |
return(out) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment