Created
September 19, 2022 23:39
-
-
Save mdozmorov/33a1fa3234b2942dae238e1fcb39c996 to your computer and use it in GitHub Desktop.
mm39 excluderanges download
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download a list of problematic regions (aka blacklist) for the GRCm39/mm39 | |
# mouse genome assembly. Defined by the Boyle-Lab/Blacklist | |
# software, High Signal and Low Mappability regions. | |
# See https://github.com/dozmorovlab/excluderanges for more information. | |
suppressMessages(library(httr)) # https://CRAN.R-project.org/package=httr | |
suppressMessages(library(GenomicRanges)) # https://bioconductor.org/packages/GenomicRanges/ | |
# bedbase_id | |
bedbase_id <- "edc716833d4b5ee75c34a0692fc353d5" | |
# Construct output file name | |
fileNameOut <- "mm39.excluderanges.bed.gz" | |
# API token for BED data | |
token2 <- paste0("http://bedbase.org/api/bed/", bedbase_id, "/file/bed") | |
# Download file | |
GET(url = token2, write_disk(fileNameOut, overwrite = TRUE)) | |
# Read the data in | |
mm39.excluderanges <- readr::read_tsv(fileNameOut, | |
col_names = FALSE, | |
col_types = c("cddcdc")) | |
# Assign column names depending on the number of columns | |
all_columns <- c("chr", "start", "end", "name", "score", "strand", | |
"signalValue", "pValue", "qValue", "peak") | |
colnames(mm39.excluderanges) <- all_columns[1:ncol(mm39.excluderanges)] | |
# Convert to GRanges object | |
mm39.excluderanges <- makeGRangesFromDataFrame(mm39.excluderanges, | |
keep.extra.columns = TRUE) | |
# Seqinfo for mm39 genome | |
chrom_data <- GenomeInfoDb::getChromInfoFromUCSC(genome = "mm39", | |
assembled.molecules.only = TRUE) | |
# Subset and match to chromosomes in the mm39.excluderanges object | |
# Common chromosomes | |
chromosomes_common <- intersect(chrom_data$chrom, seqlevels(mm39.excluderanges)) | |
# Subset mm39.excluderanges | |
mm39.excluderanges <- keepSeqlevels(mm39.excluderanges, chromosomes_common, | |
pruning.mode = "tidy") | |
# Subset chrom_data | |
chrom_data <- chrom_data[chrom_data$chrom %in% chromosomes_common, ] | |
# Match objects | |
chrom_data <- chrom_data[match(seqlevels(mm39.excluderanges), chrom_data$chrom), ] | |
# Assign seqinfo data | |
seqlengths(mm39.excluderanges) <- chrom_data$size | |
isCircular(mm39.excluderanges) <- ifelse(is.na(chrom_data$circular), FALSE, TRUE) | |
genome(mm39.excluderanges) <- "mm39" | |
mm39.excluderanges |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Opes my bad. Here is the edited code.
`suppressMessages(library(httr)) # https://CRAN.R-project.org/package=httr
suppressMessages(library(GenomicRanges)) # https://bioconductor.org/packages/GenomicRanges/
library(readr)
bedbase_id
bedbase_id <- "edc716833d4b5ee75c34a0692fc353d5"
Construct output file name
fileNameOut <- "mm39.excluderanges.bed"
API token for BED data
#token2 <- paste0("http://bedbase.org/api/bed/", bedbase_id, "/file/bed")
Download file
#GET(url = token2, write_disk(fileNameOut, overwrite = TRUE))
Read the data in
mm39.excluderanges <- readr::read_tsv(fileNameOut,
col_names = FALSE)
Assign column names depending on the number of columns
all_columns <- c("chr", "start", "end", "name", "strand","score",
"signalValue", "pValue", "qValue", "peak")
colnames(mm39.excluderanges) <- all_columns[1:ncol(mm39.excluderanges)]
Convert to GRanges object
mm39.excluderanges <- makeGRangesFromDataFrame(mm39.excluderanges,
keep.extra.columns = TRUE)
Seqinfo for mm39 genome
chrom_data <- GenomeInfoDb::getChromInfoFromUCSC(genome = "mm39",
assembled.molecules.only = TRUE)
Subset and match to chromosomes in the mm39.excluderanges object
Common chromosomes
chromosomes_common <- intersect(chrom_data$chrom, seqlevels(mm39.excluderanges))
Subset mm39.excluderanges
mm39.excluderanges <- keepSeqlevels(mm39.excluderanges, chromosomes_common,
pruning.mode = "tidy")
Subset chrom_data
chrom_data <- chrom_data[chrom_data$chrom %in% chromosomes_common, ]
Match objects
chrom_data <- chrom_data[match(seqlevels(mm39.excluderanges), chrom_data$chrom), ]
Assign seqinfo data
seqlengths(mm39.excluderanges) <- chrom_data$size
isCircular(mm39.excluderanges) <- ifelse(is.na(chrom_data$circular), FALSE, TRUE)
genome(mm39.excluderanges) <- "mm39"
mm39.excluderanges
bed<- as.data.frame(mm39.excluderanges)
Rename columns
colnames(bed) <- c("chrom", "chromStart", "chromEnd", "width", "strand", "score", "name")
Reorder columns
bed_2 <- bed[, c("chrom", "chromStart", "chromEnd", "name", "score", "strand")]
write.table( bed, file = "mm39.excluderanges_2.bed",
sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)`