Created
February 9, 2021 13:11
-
-
Save yulijia/0ffd031af93e2d3cbd2ed42bb6cf54b3 to your computer and use it in GitHub Desktop.
Bioinformatics Analysis on expression data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Category Term Count PValue | |
GO_MF GO:0030165~PDZ domain binding 5 0.003597602 | |
GO_CC GO:0005576~extracellular region 23 0.003966517 | |
GO_CC GO:0016021~integral component of membrane 53 0.007762346 | |
GO_BP GO:2000820~negative regulation of transcription from RNA polymerase II promoter involved in smooth muscle cell differentiation 2 0.015069497 | |
GO_BP GO:0036304~umbilical cord morphogenesis 2 0.015069497 | |
GO_BP GO:0035987~endodermal cell differentiation 3 0.017605976 | |
GO_BP GO:0060412~ventricular septum morphogenesis 3 0.020166114 | |
GO_BP GO:0048008~platelet-derived growth factor receptor signaling pathway 3 0.020166114 | |
GO_CC GO:0009986~cell surface 10 0.020464569 | |
GO_MF GO:0035939~microsatellite binding 2 0.021701287 | |
GO_BP GO:2001212~regulation of vasculogenesis 2 0.022519537 | |
GO_BP GO:0071673~positive regulation of smooth muscle cell chemotaxis 2 0.022519537 | |
GO_CC GO:0031012~extracellular matrix 7 0.0237569 | |
GO_BP GO:0002027~regulation of heart rate 3 0.025717694 | |
GO_CC GO:0042383~sarcolemma 4 0.025758908 | |
GO_BP GO:0009968~negative regulation of signal transduction 3 0.027191909 | |
GO_BP GO:0001525~angiogenesis 6 0.02727028 | |
GO_BP GO:0035910~ascending aorta morphogenesis 2 0.029913665 | |
GO_BP GO:0048619~embryonic hindgut morphogenesis 2 0.029913665 | |
GO_BP GO:0060842~arterial endothelial cell differentiation 2 0.029913665 | |
GO_BP GO:0030177~positive regulation of Wnt signaling pathway 3 0.030239789 | |
GO_BP GO:0048813~dendrite morphogenesis 3 0.031812301 | |
GO_CC GO:0005796~Golgi lumen 4 0.035143139 | |
GO_MF GO:0000988~transcription factor activity, protein binding 2 0.035908636 | |
GO_BP GO:0006167~AMP biosynthetic process 2 0.037252298 | |
GO_CC GO:0005886~plasma membrane 41 0.038858292 | |
GO_CC GO:0005615~extracellular space 17 0.043340256 | |
GO_CC GO:0005783~endoplasmic reticulum 12 0.045756018 | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(GEOquery) | |
library(limma) | |
library(pheatmap) | |
library(clusterProfiler) | |
library(org.Hs.eg.db) | |
library(tidyverse) | |
library(enrichplot) | |
# load series and platform data from GEO | |
gset <- getGEO("GSE122220", GSEMatrix =TRUE, AnnotGPL=TRUE) | |
if (length(gset) > 1) idx <- grep("GPL10558", attr(gset, "names")) else idx <- 1 | |
gset <- gset[[idx]] | |
# make proper column names to match toptable | |
fvarLabels(gset) <- make.names(fvarLabels(gset)) | |
gset.bk=gset | |
# # group membership for all samples | |
# gsms <- "0111110100" | |
# sml <- strsplit(gsms, split="")[[1]] | |
# group membership for all samples | |
gsms <- "011X110100" | |
sml <- strsplit(gsms, split="")[[1]] | |
# filter out excluded samples (marked as "X") | |
sel <- which(sml != "X") | |
sml <- sml[sel] | |
gset <- gset[ ,sel] | |
# log2 transformation | |
ex <- exprs(gset) | |
meta <- fData(gset) | |
meta=meta[!is.na(meta$Gene.ID),] | |
exn=(ex[row.names(ex)%in%meta$ID,]) | |
extab=data.frame(exn,ID=row.names(exn)) | |
exprSet <- merge(meta[,c("ID","Gene.ID")],extab,by='ID') | |
exprSet.final <- extab %>% | |
inner_join(meta[,c("ID","Gene.ID")],by="ID") %>% | |
# select(-ID) %>% | |
select(Gene.ID, everything()) %>% | |
mutate(rowMean =rowMeans(.[grep("GSM", names(.))])) %>% | |
arrange(desc(rowMean)) %>% | |
distinct(Gene.ID,.keep_all = T) %>% | |
select(-rowMean) | |
exn.table=as.matrix(exprSet.final[,grepl("GSM",names(exprSet.final))]) | |
row.names(exn.table)=exprSet.final$ID | |
qx <- as.numeric(quantile(exn.table, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T)) | |
LogC <- (qx[5] > 100) || | |
(qx[6]-qx[1] > 50 && qx[2] > 0) | |
## find NA value | |
if (LogC) { exn[which(exn.table <= 0)] <- NaN; | |
dat=log2(exn.table) } | |
## re-create eSet | |
gset=ExpressionSet(assayData=dat[meta[meta$ID%in%exprSet.final$ID,]$ID,],featureData=AnnotatedDataFrame(meta[meta$ID%in%exprSet.final$ID,]),phenoData=phenoData(gset)) | |
# dat = exprs(gset) | |
dat %>% as.data.frame()%>% filter_all(any_vars(is.na(.))) | |
# assign samples to groups and set up design matrix | |
gs <- factor(sml) | |
groups <- make.names(c("response","non-response")) | |
levels(gs) <- groups | |
gset$group <- gs | |
# Value 0 corresponds to the intercept (if any), and positive values to terms in the order given by the term | |
design <- model.matrix(~group + 0,gset) | |
colnames(design) <- levels(gs) | |
fit <- limma::lmFit(gset, design) # fit linear model | |
# set up contrasts of interest and recalculate model coefficients | |
cts <- c(paste(groups[1],"-",groups[2],sep="")) | |
cont.matrix <- makeContrasts(contrasts=cts, levels=design) | |
fit2 <- contrasts.fit(fit, cont.matrix) | |
# compute statistics and table of top significant genes | |
fit2 <- eBayes(fit2, 0.01) | |
tT=topTable(fit2,sort="none",n=Inf) | |
write.table(tT, file="bio_GSE122220.tsv", row.names=F, sep="\t") | |
# summarize test results as "up", "down" or "not expressed" | |
dT = decideTests(fit2, adjust.method="none", p.value=0.05,lfc=1.1) | |
df = cbind(tT,dT) | |
ggplot(df) + | |
geom_point(aes(x=logFC, y=-log10(P.Value), colour=as.factor(`response-non.response`))) + | |
ggtitle("Differential expressed gene") + | |
xlab("log2 fold change") + | |
ylab("-log10 p-value") + | |
theme(legend.position = "none", | |
plot.title = element_text(size = rel(1.5), hjust = 0.5), | |
axis.title = element_text(size = rel(1.25))) + | |
geom_hline(yintercept=-1*log10(0.05),color = "black")+ | |
geom_vline(xintercept = -1.1,color = "black")+ | |
geom_vline(xintercept = 1.1,color = "black") | |
degene=df[df$`response-non.response`!=0,]$ID | |
annotation_col=data.frame(groups = gs) | |
row.names(annotation_col)=gset$geo_accession | |
pheatmap(dat[row.names(dat)%in%df[df$`response-non.response`!=0,]$ID,], | |
cluster_cols = T,cluster_rows = T,show_rownames = F,annotation_col = annotation_col) | |
select=df[df$`response-non.response`!=0,] | |
write.table(df[df$`response-non.response`!=0,],"./DE_gene.tsv",sep="\t",row.names = F) | |
length(select$Gene.ID) | |
ego.bp <- enrichGO(gene = select[select$`response-non.response`==1,]$Gene.ID, | |
universe = as.character(df$Gene.ID), | |
OrgDb = org.Hs.eg.db, | |
ont = "BP", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.05, | |
qvalueCutoff = 1, | |
readable = TRUE) | |
ego.mf <- enrichGO(gene = select[select$`response-non.response`==1,]$Gene.ID, | |
universe = as.character(df$Gene.ID), | |
OrgDb = org.Hs.eg.db, | |
ont = "MF", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.05, | |
qvalueCutoff = 1, | |
readable = TRUE) | |
ego.cc <- enrichGO(gene = select[select$`response-non.response`==1,]$Gene.ID, | |
universe = as.character(df$Gene.ID), | |
OrgDb = org.Hs.eg.db, | |
ont = "CC", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.05, | |
qvalueCutoff = 1, | |
readable = TRUE) | |
ego.bp <- enrichGO(gene = select[select$`response-non.response`==-1,]$Gene.ID, | |
universe = as.character(df$Gene.ID), | |
OrgDb = org.Hs.eg.db, | |
ont = "BP", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.05, | |
qvalueCutoff = 1, | |
readable = TRUE) | |
ego.mf <- enrichGO(gene = select[select$`response-non.response`==-1,]$Gene.ID, | |
universe = as.character(df$Gene.ID), | |
OrgDb = org.Hs.eg.db, | |
ont = "MF", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.05, | |
qvalueCutoff = 1, | |
readable = TRUE) | |
ego.cc <- enrichGO(gene = select[select$`response-non.response`==-1,]$Gene.ID, | |
universe = as.character(df$Gene.ID), | |
OrgDb = org.Hs.eg.db, | |
ont = "CC", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.05, | |
qvalueCutoff = 1, | |
readable = TRUE) | |
ego.BP <- enrichGO(gene = select$Gene.ID, | |
OrgDb = org.Hs.eg.db, | |
ont = "BP", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 1, | |
qvalueCutoff = 0.05, | |
readable = TRUE) | |
dim(ego.BP@result[ego.BP@result$pvalue<=0.05,]) | |
dim(ego.BP@result[ego.BP@result$qvalue<=0.05,]) | |
# barplot(ego.BP, showCategory=50) + ggtitle("barplot of enriched BP terms") | |
# dotplot(ego.BP) | |
ego.MF <- enrichGO(gene = select$Gene.ID, | |
OrgDb = org.Hs.eg.db, | |
ont = "MF", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 1, | |
qvalueCutoff = 0.05, | |
readable = TRUE) | |
dim(ego.MF@result[ego.MF@result$pvalue<=0.05,]) | |
dim(ego.MF@result[ego.MF@result$qvalue<=0.05,]) | |
barplot(ego.MF, showCategory=50) + ggtitle("barplot of enriched MF terms") | |
ego.CC <- enrichGO(gene = select$Gene.ID, | |
OrgDb = org.Hs.eg.db, | |
ont = "CC", | |
pAdjustMethod = "BH", | |
pvalueCutoff = 1, | |
qvalueCutoff = 0.05, | |
readable = TRUE) | |
dim(ego.CC@result[ego.CC@result$pvalue<=0.05,]) | |
dim(ego.CC@result[ego.CC@result$qvalue<=0.05,]) | |
barplot(ego.CC, showCategory=50) + ggtitle("barplot of enriched CC terms") | |
genelist=select[, "P.Value"] | |
names(genelist)=select[, "Gene.ID"] | |
genelist = sort(genelist, decreasing = TRUE) | |
# gse.re=gseGO(geneList = genelist,ont = "ALL",OrgDb=org.Hs.eg.db,pvalueCutoff = 0.1) | |
# ridgeplot(gse.re) | |
gse.BP=gseGO(geneList = genelist,ont = "BP",OrgDb= org.Hs.eg.db,pvalueCutoff = 0.1) | |
ridgeplot(gse.BP) | |
# p1 <- gseaplot(gse.BP, geneSetID = 1, by = "runningScore", title = gse.BP$Description[1]) | |
# p2 <- gseaplot(gse.BP, geneSetID = 1, by = "preranked", title = gse.BP$Description[1]) | |
# p3 <- gseaplot(gse.BP, geneSetID = 1, title = gse.BP$Description[1]) | |
gseaplot2(gse.BP, geneSetID = 1, title = gse.BP$Description[1]) | |
gse.CC=gseGO(geneList = genelist,ont = "CC",OrgDb= org.Hs.eg.db,pvalueCutoff = 0.1) | |
# ridgeplot(gse.CC) | |
gse.MF=gseGO(geneList = genelist,ont = "MF",OrgDb= org.Hs.eg.db,pvalueCutoff = 0.1) | |
ridgeplot(gse.MF) | |
gseaplot2(gse.MF, geneSetID = 1, title = gse.MF$Description[1]) | |
gseaplot2(gse.MF, geneSetID = 2, title = gse.MF$Description[2]) | |
kk <- enrichKEGG(gene = select$Gene.ID, | |
organism = 'hsa', | |
pvalueCutoff = 0.1) | |
barplot(kk)+ ggtitle("barplot of enriched KEGG pathway") | |
kk2 <- gseKEGG(geneList = genelist, | |
organism = 'hsa', | |
pvalueCutoff = 0.1, | |
verbose = FALSE) | |
gseaplot2(kk2, geneSetID = 1, title = kk2$Description[1]) | |
mkk <- enrichMKEGG(gene = select$Gene.ID, | |
organism = 'hsa', | |
pvalueCutoff = 0.1) | |
mkk2 <- gseMKEGG(geneList = genelist, | |
organism = 'hsa', pvalueCutoff = 0.1) | |
a=read.table("./function_annotation.txt",sep="\t",header = T) | |
ggplot(data=a, aes(x=Count, y=reorder(Term,-PValue),fill=PValue)) + | |
geom_bar(stat="identity") + | |
ggtitle("Function annotation") + | |
xlab("Gene count") + | |
ylab(NULL) +scale_fill_gradient(low="blue", high="red") | |
a=read.table("./up.txt",sep="\t",header = T) | |
ggplot(data=a, aes(x=Count, y=reorder(Term,-PValue),fill=PValue)) + | |
geom_bar(stat="identity") + | |
ggtitle("Function annotation") + | |
xlab("Gene count") + | |
ylab(NULL) +scale_fill_gradient(low="blue", high="red")+facet_grid(Category~.,scales="free",space="free") | |
a=read.table("./down.txt",sep="\t",header = T) | |
ggplot(data=a, aes(x=Count, y=reorder(Term,-PValue),fill=PValue)) + | |
geom_bar(stat="identity") + | |
ggtitle("Function annotation") + | |
xlab("Gene count") + | |
ylab(NULL) +scale_fill_gradient(low="blue", high="red")+ | |
facet_grid(Category~.,scales="free",space="free") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Category Term Count PValue | |
KEGG hsa05221:Acute myeloid leukemia 4 0.002345308 | |
GO_MF GO:0005524~ATP binding 13 0.013790123 | |
GO_MF GO:0031782~type 4 melanocortin receptor binding 2 0.015782965 | |
GO_MF GO:0031781~type 3 melanocortin receptor binding 2 0.015782965 | |
GO_BP GO:0010626~negative regulation of Schwann cell proliferation 2 0.016336818 | |
GO_BP GO:0009408~response to heat 3 0.016619629 | |
GO_BP GO:0006091~generation of precursor metabolites and energy 3 0.020038205 | |
GO_CC GO:0005829~cytosol 21 0.038827297 | |
GO_BP GO:0090084~negative regulation of inclusion body assembly 2 0.040350065 | |
GO_MF GO:0043225~anion transmembrane-transporting ATPase activity 2 0.042814823 | |
GO_BP GO:0046718~viral entry into host cell 3 0.042820725 | |
GO_MF GO:0016491~oxidoreductase activity 4 0.045166591 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment