Last active
December 7, 2023 02:52
-
-
Save kate-crosby/33c0a359c644e8f92c0d to your computer and use it in GitHub Desktop.
take hmp.txt or hmp.txt.gz from PANZEA and convert to PED & MAP file for plink
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Let's make some fake plink map and ped files - ped files contain the genotype and map the chrom and position | |
############################################################################ | |
# Let's start with the PED file - reading in zip file downloaded from PANZEA | |
############################################################################ | |
fulldata <- read.table("NAM_phasedImputed_1cM_AllZeaGBSv2.3_allChrs.hmp.txt", | |
header=T,comment.char="",stringsAsFactors=F) | |
dim(fulldata) | |
# Eliminate unwanted columns prior to conversion - there's probably a faster | |
# way of doing this | |
drops <- c("alleles","chrom", "pos", "cm", "assembly.", "center", "protLSID", | |
"assayLSID", "panelLSID", "QCcode") | |
new.df <- fulldata[,!(names(fulldata) %in% drops)] | |
dim(new.df) | |
transpose_it <- t(new.df) | |
# remove the header - or first row - not sure if this will be acceptable to PLINK | |
transpose_it <- transpose_it[-1,] | |
### Replace hapmap code with nucleotides OR anything like numbers | |
transpose_it <- as.matrix(transpose_it) | |
transpose_it[which(transpose_it=="N")] <- "N N" | |
transpose_it[which(transpose_it=="A")] <- "A A" | |
transpose_it[which(transpose_it=="C")] <- "C C" | |
transpose_it[which(transpose_it=="M")] <- "A C" | |
# Write out as example .ped file - but don't give it that extension, as plink will not understand it | |
ped.plink.test <- data.frame(transpose_it) | |
write.table(ped.plink.test, "fullNAM.ped", sep = "\t", col.names = FALSE, quote = FALSE) | |
################################################## | |
# To make the map file - take subset of data again | |
################################################## | |
dim(fulldata) | |
drops <- c("alleles", "cm", "assembly.", "center", "protLSID", | |
"assayLSID", "panelLSID", "QCcode") | |
new.df <- fulldata[,!(names(fulldata) %in% drops)] | |
# now just reorder chrom first, snp second, position third | |
new.map <- data.frame(new.df$chrom, new.df$rs., new.df$pos) | |
# write out to see if plink will accept | |
write.table(new.map, "fullNAM.map", sep = "\t", col.names = FALSE, quote = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment