Skip to content

Instantly share code, notes, and snippets.

@pontikos
Created June 9, 2016 18:02
Show Gist options
  • Save pontikos/4bdef5ad9ed4d300d95b42c62b6a30b3 to your computer and use it in GitHub Desktop.
Save pontikos/4bdef5ad9ed4d300d95b42c62b6a30b3 to your computer and use it in GitHub Desktop.
Add kaviar annotation to annotation csv file.
#!/usr/bin/env Rscript
library(Rsamtools)
# '/cluster/scratch3/vyp-scratch2/reference_datasets/Kaviar/Kaviar-160204-Public/vcfs/Kaviar-160204-Public-hg38.vcf.gz'
#f <- '/cluster/scratch3/vyp-scratch2/reference_datasets/Kaviar/Kaviar-160204-Public/vcfs/Kaviar-160204-Public-hg19.vcf.gz'
read('rare_shared_2006_2006A.csv')->d
x <- do.call('rbind', strsplit(d$VARIANT_ID, '_'))
d$chr <- x[,1]
d$pos <- as.numeric(x[,2])
d$ref <- x[,3]
d$alt <- x[,4]
f <- '/cluster/scratch3/vyp-scratch2/reference_datasets/Kaviar/Kaviar-160204-Public/vcfs/Kaviar-160204-Public-hg19.vcf.gz'
tbx <- TabixFile(f)
tbx <- open.TabixFile(tbx)
d[,'kaviar'] <- NA
for (i in 1:nrow(d)) {
chr <- d[i,'chr']
pos <- d[i,'pos']
ref <- d[i,'ref']
alt <- d[i,'alt']
res <- scanTabix(tbx,param=GRanges(chr,IRanges(c(pos,pos),width=1)))
X <- do.call('rbind', strsplit(res[[1]],'\t'))
cat('query', chr, pos, ref, alt, '\n')
if (is.null(X)) {
d[i,'kaviar'] <- NA
next
}
(REF <- X[,4])
(ALT <- strsplit(X[,5],','))
j <- intersect(which(REF==ref),which(sapply( ALT, function(x) alt %in% x )))
if (length(j)) {
j <- j[[1]]
X <- X[j,]
REF <- REF[[j]]
ALT <- ALT[[j]]
index <- which(ALT==alt)
cat('match', REF, ALT, index, '\n')
info <- strsplit( strsplit(X[8],'\\|')[[1]], ';' )[[1]]
info2 <- as.character(lapply( strsplit(info,'='), '[[', 2))
names(info2) <- as.character(lapply( strsplit(info,'='), '[[', 1))
cat(info2, '\n')
d[i,'kaviar'] <- as.numeric(strsplit(info2,',')$AF[which(alt==as.character(ALT))])
} else {
d[i,'kaviar'] <- NA
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment