Created
November 18, 2014 16:07
-
-
Save kate-crosby/63d368dc62e7f367d0cf to your computer and use it in GitHub Desktop.
Bigger files - unzip the gz and use fread
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
fulldata <- fread("AmesUSInbreds_AllZeaGBSv1.0_imputed_20130508_chr1.hmp.txt", | |
header=T, sep = "\t") | |
dim(fulldata) | |
fulldata <-data.frame(fulldata) | |
# Eliminate unwanted columns prior to conversion | |
drops <- c("alleles","chrom", "pos", "strand", "assembly#", "center", "protLSID", | |
"assayLSID", "panelLSID", "QCcode") | |
new.df <- fulldata[,!(names(fulldata) %in% drops)] | |
dim(new.df) | |
transpose_it <- t(new.df) | |
# remove the header - or first row | |
transpose_it <- transpose_it[-1,] | |
### Replace hapmap code with nucleotides OR anything like numbers | |
transpose_it <- as.matrix(transpose_it) | |
transpose_it[which(transpose_it=="N")] <- "N N" | |
transpose_it[which(transpose_it=="X")] <- "N N" | |
transpose_it[which(transpose_it=="A")] <- "A A" | |
transpose_it[which(transpose_it=="C")] <- "C C" | |
transpose_it[which(transpose_it=="G")] <- "G G" | |
transpose_it[which(transpose_it=="T")] <- "T T" | |
transpose_it[which(transpose_it=="K")] <- "G T" | |
transpose_it[which(transpose_it=="M")] <- "A C" | |
transpose_it[which(transpose_it=="R")] <- "A G" | |
transpose_it[which(transpose_it=="S")] <- "C G" | |
transpose_it[which(transpose_it=="W")] <- "A T" | |
transpose_it[which(transpose_it=="Y")] <- "C T" | |
# Write out as example .ped file - but don't give it that extension, as plink will not understand it | |
ped.plink.test <- data.frame(transpose_it) | |
write.table(ped.plink.test, "Ames_chr1.ped", sep = "\t", col.names = FALSE, quote = FALSE) | |
################################################## | |
# To make the map file - take subset of data again | |
################################################## | |
dim(fulldata) | |
drops <- c("alleles", "cm", "assembly.", "center", "protLSID", | |
"assayLSID", "panelLSID", "QCcode") | |
new.df <- fulldata[,!(names(fulldata) %in% drops)] | |
# now just reorder chrom first, snp second, position third | |
new.map <- data.frame(new.df$chrom, new.df$rs., new.df$pos) | |
# write out to see if plink will accept | |
write.table(new.map, "Ames_chr1.map", sep = "\t", col.names = FALSE, quote = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment