Skip to content

Instantly share code, notes, and snippets.

@kate-crosby
Created November 18, 2014 16:07
Show Gist options
  • Save kate-crosby/63d368dc62e7f367d0cf to your computer and use it in GitHub Desktop.
Save kate-crosby/63d368dc62e7f367d0cf to your computer and use it in GitHub Desktop.
Bigger files - unzip the gz and use fread
library(data.table)
fulldata <- fread("AmesUSInbreds_AllZeaGBSv1.0_imputed_20130508_chr1.hmp.txt",
header=T, sep = "\t")
dim(fulldata)
fulldata <-data.frame(fulldata)
# Eliminate unwanted columns prior to conversion
drops <- c("alleles","chrom", "pos", "strand", "assembly#", "center", "protLSID",
"assayLSID", "panelLSID", "QCcode")
new.df <- fulldata[,!(names(fulldata) %in% drops)]
dim(new.df)
transpose_it <- t(new.df)
# remove the header - or first row
transpose_it <- transpose_it[-1,]
### Replace hapmap code with nucleotides OR anything like numbers
transpose_it <- as.matrix(transpose_it)
transpose_it[which(transpose_it=="N")] <- "N N"
transpose_it[which(transpose_it=="X")] <- "N N"
transpose_it[which(transpose_it=="A")] <- "A A"
transpose_it[which(transpose_it=="C")] <- "C C"
transpose_it[which(transpose_it=="G")] <- "G G"
transpose_it[which(transpose_it=="T")] <- "T T"
transpose_it[which(transpose_it=="K")] <- "G T"
transpose_it[which(transpose_it=="M")] <- "A C"
transpose_it[which(transpose_it=="R")] <- "A G"
transpose_it[which(transpose_it=="S")] <- "C G"
transpose_it[which(transpose_it=="W")] <- "A T"
transpose_it[which(transpose_it=="Y")] <- "C T"
# Write out as example .ped file - but don't give it that extension, as plink will not understand it
ped.plink.test <- data.frame(transpose_it)
write.table(ped.plink.test, "Ames_chr1.ped", sep = "\t", col.names = FALSE, quote = FALSE)
##################################################
# To make the map file - take subset of data again
##################################################
dim(fulldata)
drops <- c("alleles", "cm", "assembly.", "center", "protLSID",
"assayLSID", "panelLSID", "QCcode")
new.df <- fulldata[,!(names(fulldata) %in% drops)]
# now just reorder chrom first, snp second, position third
new.map <- data.frame(new.df$chrom, new.df$rs., new.df$pos)
# write out to see if plink will accept
write.table(new.map, "Ames_chr1.map", sep = "\t", col.names = FALSE, quote = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment