Skip to content

Instantly share code, notes, and snippets.

@pontikos
Last active August 29, 2015 14:15
Show Gist options
  • Save pontikos/2decf553be46911074ce to your computer and use it in GitHub Desktop.
Save pontikos/2decf553be46911074ce to your computer and use it in GitHub Desktop.
Populations pca of onekg and aj samples using snpstats. Samples have been LD trimmed using plink.
ibrary(snpStats)
d <- read.plink('all.bed','all.bim','all.fam')
print(dim(X <- d$genotypes))
# snps were everyone is the same thing are boring
#snp.qc <- col.summary(X)
#X <- X[,snp.qc$MAF > 0]
# also should remove singleton variants i.e only present in a single person
onekg <- X[grep('OneKG',rownames(X)),]
# onkg samples were call rate is poor are dropped
#sample.qc <- row.summary(onekg)
#onekg <- onekg[sample.qc$Call.rate > .99,]
message('CEU')
print(dim(ceu <- X[ceu.samples <- grep('OneKG_CEU',rownames(X)),]))
message('FIN')
print(dim(fin <- X[fin.samples <- grep('OneKG_FIN',rownames(X)),]))
message('GBR')
print(dim(gbr <- X[gbr.samples <- grep('OneKG_GBR',rownames(X)),]))
message('IBS')
print(dim(ibs <- X[ibs.samples <- grep('OneKG_IBS',rownames(X)),]))
message('TSI')
print(dim(tsi <- X[tsi.samples <- grep('OneKG_TSI',rownames(X)),]))
message('nonAJ')
print(dim(nonaj <- X[nonaj.samples <- as.character(read.table('/cluster/project8/IBDAJE/batches_from_uclex/nonAJ/samples.txt')[,1]),]))
message('ctlAJ')
print(dim(controls <- X[control.samples <- as.character(read.table('/cluster/project8/IBDAJE/batches_from_uclex/controls/samples.txt')[,1]),]))
message('AJ')
print(dim(unrelated <- X[unrelated.samples <- as.character(read.table('/cluster/project8/IBDAJE/batches_from_uclex/unrelated/samples.txt')[,1]),]))
message('OFG')
print(dim(ofg <- X[ofg.samples <- as.character(read.table('/cluster/project8/IBDAJE/batches_from_uclex/OFG/samples.txt')[,1]),]))
message('ICE')
print(dim(ice <- X[ice.samples <- as.character(read.table('/cluster/project8/IBDAJE/batches_from_uclex/ICE/samples.txt')[,1]),]))
pop.names <- c( 'ICE', 'FIN', 'GBR', 'CEU', 'IBS', 'TSI', 'OFG', 'nonAJ', 'ctlAJ', 'AJ' )
fill <- c( 'darkgreen', 'green', 'blue', 'red', 'pink', 'purple', 'lightblue', 'red', 'black', 'black')
pch <- c( rep(20,7), 25, 20, 24)
print(dim(col.pch <- data.frame(col=rep('black',nrow(X)),pch=rep(20,nrow(X)),row.names=rownames(X))))
#col
col.pch[ ice.samples, 'col' ] <- 'darkgreen'
col.pch[ fin.samples, 'col' ] <- 'green'
col.pch[ gbr.samples, 'col' ] <- 'blue'
col.pch[ ceu.samples, 'col' ] <- 'red'
col.pch[ ibs.samples, 'col' ] <- 'pink'
col.pch[ tsi.samples, 'col' ] <- 'purple'
col.pch[ ofg.samples, 'col' ] <- 'lightblue'
col.pch[ nonaj.samples, 'col' ] <- 'red'
#pch
col.pch[ nonaj.samples, 'pch' ] <- 25
col.pch[ control.samples, 'pch' ] <- 20
col.pch[ unrelated.samples, 'pch' ] <- 24
plot.pca <- function(x, main='', outliers=c(.01,.99), extra=NULL) {
message('xxt')
xxmat <- xxt(x)
evv <- eigen(xxmat, symmetric=TRUE)
message('eigen')
pcs <- evv$vectors[,1:5]
evals <- evv$values[1:5]
#if you want to plot extra samples using the existing pcs
if (!is.null(extra)) {
btr <- snp.pre.multiply(x, diag(1/sqrt(evals)) %*% t(pcs))
x <- rbind(x,extra)
pcs <- snp.post.multiply(x, t(btr))
}
plot(pcs[,1],pcs[,2],main=main,pch=col.pch[rownames(x),'pch'],col=col.pch[rownames(x),'col'],xlim=quantile(pcs[,1],outliers),ylim=quantile(pcs[,2],outliers))
}
pdf('pca.pdf')
r <- c(1,1,2,2)
layout(rbind(r,2+r,rep(5,length(r))),heights=c(4,4,1))
par(mai=rep(0.5, 4))
plot.pca(rbind(onekg,ice,controls),main='ONEKG+ICE+ctlAJ')
plot.pca(rbind(onekg,ofg,ice,nonaj,controls),main='ONEKG+ICE+ctlAJ+nonAJ+OFG')
plot.pca(rbind(onekg,ofg,ice,controls,nonaj,unrelated),main='all')
plot.pca(rbind(onekg,controls),extra=unrelated,main='ONKEG+ICE+ctlAJ AJ')
par(mai=c(0,0,0,0))
plot.new()
legend('center', legend=pop.names, col=fill, pch=pch, bty='n', ncol=length(pop.names) )
dev.off()
@pontikos
Copy link
Author

Ok this works now the bugs were:

  • using more samples than plotting characters,1200 instead of 667.
  • not including the samples on which the PCs were calculated in snp.post.multiply

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment