Skip to content

Instantly share code, notes, and snippets.

@kate-crosby
Last active December 7, 2023 02:52
Show Gist options
  • Save kate-crosby/33c0a359c644e8f92c0d to your computer and use it in GitHub Desktop.
Save kate-crosby/33c0a359c644e8f92c0d to your computer and use it in GitHub Desktop.
take hmp.txt or hmp.txt.gz from PANZEA and convert to PED & MAP file for plink
# Let's make some fake plink map and ped files - ped files contain the genotype and map the chrom and position
############################################################################
# Let's start with the PED file - reading in zip file downloaded from PANZEA
############################################################################
fulldata <- read.table("NAM_phasedImputed_1cM_AllZeaGBSv2.3_allChrs.hmp.txt",
header=T,comment.char="",stringsAsFactors=F)
dim(fulldata)
# Eliminate unwanted columns prior to conversion - there's probably a faster
# way of doing this
drops <- c("alleles","chrom", "pos", "cm", "assembly.", "center", "protLSID",
"assayLSID", "panelLSID", "QCcode")
new.df <- fulldata[,!(names(fulldata) %in% drops)]
dim(new.df)
transpose_it <- t(new.df)
# remove the header - or first row - not sure if this will be acceptable to PLINK
transpose_it <- transpose_it[-1,]
### Replace hapmap code with nucleotides OR anything like numbers
transpose_it <- as.matrix(transpose_it)
transpose_it[which(transpose_it=="N")] <- "N N"
transpose_it[which(transpose_it=="A")] <- "A A"
transpose_it[which(transpose_it=="C")] <- "C C"
transpose_it[which(transpose_it=="M")] <- "A C"
# Write out as example .ped file - but don't give it that extension, as plink will not understand it
ped.plink.test <- data.frame(transpose_it)
write.table(ped.plink.test, "fullNAM.ped", sep = "\t", col.names = FALSE, quote = FALSE)
##################################################
# To make the map file - take subset of data again
##################################################
dim(fulldata)
drops <- c("alleles", "cm", "assembly.", "center", "protLSID",
"assayLSID", "panelLSID", "QCcode")
new.df <- fulldata[,!(names(fulldata) %in% drops)]
# now just reorder chrom first, snp second, position third
new.map <- data.frame(new.df$chrom, new.df$rs., new.df$pos)
# write out to see if plink will accept
write.table(new.map, "fullNAM.map", sep = "\t", col.names = FALSE, quote = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment