Forked from stephenturner/deseq2-analysis-template.R
Last active
March 6, 2016 21:09
-
-
Save alexcritschristoph/543b37bcd11d3a55c59b to your computer and use it in GitHub Desktop.
Template for analysis with DESeq2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## DESeq2 made as easy as it should have always been, but for some reason isn't. | |
## Code based on: https://gist.github.com/stephenturner/f60c1934405c127f09a6 | |
library('DESeq2') | |
''' | |
Our starting CSV/TSV looks like: | |
Sample Pairing1 Pairing2 Pairing3 Status KXB65094.1 KXB65950.1 KXB67202.1 .... | |
1 a Active Active Positive Active 0 0 1 | |
2 b Active Active Positive Active 0 1 1 | |
3 c Inactive Positive Inactive 0 0 0 | |
4 d Control Control Control 0 0 7 | |
5 e Control Control Control 2 0 2 | |
6 f Control Control Control 3 1 3 | |
... | |
''' | |
#Read in data like a normal person | |
m = read.csv('./mucosal.csv') | |
m$X = NULL | |
#Set blank values to NA | |
m[m == ''] <- NA | |
#Remove all rows which have an NA value for that pairing | |
## EDIT THIS COLUMN NAME | |
m1 <- m[which(! is.na(m$Pairing3)),] | |
#Select data / feature columns - starts at column 6 for this example | |
## EDIT THIS NUMBER | |
data = t(as.matrix(m1[,6:149])) | |
#Select control / active grouping column | |
## EDIT THIS COLUMN NAME | |
meta = relevel(m1$Pairing3, ref="Control") | |
#Prep for DESeq | |
countdata <- data | |
condition <- meta | |
coldata <- data.frame(row.names=colnames(countdata), condition) | |
#Pseudocounts to avoid 0 error (try commenting out) | |
countdata[which(countdata == 0)] = 1 | |
#Run DESeq2 | |
dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~condition) | |
dds <- DESeq(dds) | |
#PCA | |
rld <- rlogTransformation(dds) | |
DESeq2::plotPCA(rld, intgroup="condition") | |
#Results | |
res <- results(dds) | |
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE) | |
print(which(res$padj<0.05)) | |
volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) { | |
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main=main, ...)) | |
with(subset(res, padj<sigthresh ), points(log2FoldChange, -log10(pvalue), pch=20, col="red", ...)) | |
with(subset(res, abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="orange", ...)) | |
with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(pvalue), pch=20, col="green", ...)) | |
if (labelsig) { | |
require(calibrate) | |
with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=textcx, ...)) | |
} | |
legend(legendpos, xjust=1, yjust=1, legend=c(paste("FDR<",sigthresh,sep=""), paste("|LogFC|>",lfcthresh,sep=""), "both"), pch=20, col=c("red","orange","green")) | |
} | |
#Volcano Plot | |
volcanoplot(resdata, lfcthresh=1, sigthresh=0.05, textcx=.8, xlim=c(-2.3, 2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment