kate-crosby · December 7, 2023 02:52
diff --git a/hmp2ped.R b/hmp2ped.R
 # Let's make some fake plink map and ped files - ped files contain the genotype and map the chrom and position

 ############################################################################
 # Let's start with the PED file - reading in zip file downloaded from PANZEA
 ############################################################################

 fulldata <- read.table("NAM_phasedImputed_1cM_AllZeaGBSv2.3_allChrs.hmp.txt",
  header=T,comment.char="",stringsAsFactors=F) 

 dim(fulldata)


 # Eliminate unwanted columns prior to conversion - there's probably a faster
 # way of doing this

 drops <- c("alleles","chrom", "pos", "cm", "assembly.", "center", "protLSID",
  "assayLSID", "panelLSID", "QCcode")
 new.df <- fulldata[,!(names(fulldata) %in% drops)]

 dim(new.df)


 transpose_it <- t(new.df)

 # remove the header - or first row - not sure if this will be acceptable to PLINK
 transpose_it <- transpose_it[-1,]

 ### Replace hapmap code with nucleotides OR anything like numbers
 transpose_it <- as.matrix(transpose_it)
 transpose_it[which(transpose_it=="N")] <- "N N"
 transpose_it[which(transpose_it=="A")] <- "A A"
 transpose_it[which(transpose_it=="C")] <- "C C"
 transpose_it[which(transpose_it=="M")] <- "A C"

 # Write out as example .ped file - but don't give it that extension, as plink will not understand it
 ped.plink.test <- data.frame(transpose_it)
 write.table(ped.plink.test, "fullNAM.ped", sep = "\t", col.names = FALSE, quote = FALSE)

 ##################################################
 # To make the map file - take subset of data again
 ##################################################
 dim(fulldata)

 drops <- c("alleles", "cm", "assembly.", "center", "protLSID",
           "assayLSID", "panelLSID", "QCcode")
 new.df <- fulldata[,!(names(fulldata) %in% drops)]

 # now just reorder chrom first, snp second, position third
 new.map <- data.frame(new.df$chrom, new.df$rs., new.df$pos)

 # write out to see if plink will accept
 write.table(new.map, "fullNAM.map", sep = "\t", col.names = FALSE, quote = FALSE)
	# Let's make some fake plink map and ped files - ped files contain the genotype and map the chrom and position

	############################################################################
	# Let's start with the PED file - reading in zip file downloaded from PANZEA
	############################################################################

	fulldata <- read.table("NAM_phasedImputed_1cM_AllZeaGBSv2.3_allChrs.hmp.txt",
	header=T,comment.char="",stringsAsFactors=F)

	dim(fulldata)


	# Eliminate unwanted columns prior to conversion - there's probably a faster
	# way of doing this

	drops <- c("alleles","chrom", "pos", "cm", "assembly.", "center", "protLSID",
	"assayLSID", "panelLSID", "QCcode")
	new.df <- fulldata[,!(names(fulldata) %in% drops)]

	dim(new.df)


	transpose_it <- t(new.df)

	# remove the header - or first row - not sure if this will be acceptable to PLINK
	transpose_it <- transpose_it[-1,]

	### Replace hapmap code with nucleotides OR anything like numbers
	transpose_it <- as.matrix(transpose_it)
	transpose_it[which(transpose_it=="N")] <- "N N"
	transpose_it[which(transpose_it=="A")] <- "A A"
	transpose_it[which(transpose_it=="C")] <- "C C"
	transpose_it[which(transpose_it=="M")] <- "A C"

	# Write out as example .ped file - but don't give it that extension, as plink will not understand it
	ped.plink.test <- data.frame(transpose_it)
	write.table(ped.plink.test, "fullNAM.ped", sep = "\t", col.names = FALSE, quote = FALSE)

	##################################################
	# To make the map file - take subset of data again
	##################################################
	dim(fulldata)

	drops <- c("alleles", "cm", "assembly.", "center", "protLSID",
	"assayLSID", "panelLSID", "QCcode")
	new.df <- fulldata[,!(names(fulldata) %in% drops)]

	# now just reorder chrom first, snp second, position third
	new.map <- data.frame(new.df$chrom, new.df$rs., new.df$pos)

	# write out to see if plink will accept
	write.table(new.map, "fullNAM.map", sep = "\t", col.names = FALSE, quote = FALSE)