Created
February 1, 2021 06:57
-
-
Save jnpaulson/a5b181cc49a521913d2d78682fb45d3d to your computer and use it in GitHub Desktop.
GTEx download of 6p and DE for lung + whole blood
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Actually perform the differential expression analysis | |
library(yarn) | |
library(dplyr) | |
library(readr) | |
library(biomaRt) | |
library(limma) | |
source("voomweights.R") | |
obj <- readRDS("~/Desktop/gtex_v6p_lung_blood_norm.rds") | |
gender = pData(obj)$GENDER | |
batch = factor(as.character(pData(obj)$SMNABTCHT)) | |
tissue <- factor(pData(obj)$SMTSD,levels=c("Whole Blood","Lung")) | |
design = model.matrix(~tissue+batch+gender) | |
transformedCounts = assayData(obj)[["normalizedMatrix"]] | |
voomOutput = voomWeightsCustomized(transformedCounts,design) | |
fit = lmFit(voomOutput,design) | |
fit = eBayes(fit) | |
gl = fData(obj)$hgnc_symbol | |
tt = topTable(fit,number=Inf,genelist=gl,coef="tissueLung") | |
saveRDS(list(fit,tt),file="~/Desktop/results.rdata") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download and prepare the datafile | |
library(yarn) | |
library(dplyr) | |
library(readr) | |
library(biomaRt) | |
library(limma) | |
cnts <- suppressWarnings(read_delim("https://storage.googleapis.com/gtex_analysis_v6p/rna_seq_data/GTEx_Analysis_v6p_RNA-seq_RNA-SeQCv1.1.8_gene_reads.gct.gz",delim='\t',skip=2)) | |
genes <- unlist(cnts[, 1]) | |
geneNames <- unlist(cnts[, 2]) | |
counts <- cnts[, -c(1:2)] | |
counts <- as.matrix(counts) | |
rownames(counts) <- genes | |
throwAway <- which(rowSums(counts) == 0) | |
counts <- counts[-throwAway, ] | |
es <- ExpressionSet(as.matrix(counts)) | |
phenoFile <- "https://storage.googleapis.com/gtex_analysis_v6p/annotations/GTEx_Data_V6_Annotations_SampleAttributesDS.txt" | |
pd <- suppressWarnings(read_tsv(phenoFile)) %>% as.data.frame | |
rownames(pd) <- pd[, "SAMPID"] | |
ids <- sapply(strsplit(pd[, "SAMPID"], "-"), function(i) paste(i[1:2], | |
collapse = "-")) | |
pd <- cbind(pd,SUBJID = ids) | |
pheno2File<- "https://storage.googleapis.com/gtex_analysis_v6p/annotations/GTEx_Data_V6_Annotations_SubjectPhenotypesDS.txt" | |
pd2 <- suppressWarnings(read_tsv(pheno2File)) %>% as.data.frame | |
pdfinal <- left_join(pd,pd2) | |
pdfinal <- pdfinal[match(colnames(counts),pdfinal[["SAMPID"]]),] | |
phenoData(es) <- AnnotatedDataFrame(pdfinal) | |
genes <- sub("\\..*", "", rownames(counts)) | |
ensembl = useEnsembl(biomart="ensembl",GRCh=37, dataset="hsapiens_gene_ensembl") | |
attributes <- c("ensembl_gene_id", "hgnc_symbol", "chromosome_name", | |
"start_position", "end_position", "gene_biotype") | |
esFinal <- annotateFromBiomart(obj = es, genes = genes,attributes=attributes) | |
saveRDS(esFinal, file = "~/Desktop/gtex_v6p.rds") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Filter to solely whole blood and lung samples | |
# We checked and this is the 'only' USEFRZ cohort | |
library(yarn) | |
library(dplyr) | |
library(readr) | |
library(biomaRt) | |
library(limma) | |
obj <- readRDS("~/Desktop/gtex_v6p.rds") | |
obj <- filterSamples(esFinal,c("Whole Blood","Lung"), | |
groups='SMTSD',keepOnly=TRUE) | |
obj = filterLowGenes(obj,"SMTSD") | |
obj <- filterGenes(obj) | |
### Normalize using qsmooth | |
obj = normalizeTissueAware(obj,"SMTSD") | |
saveRDS(obj,file="~/Desktop/gtex_v6p_lung_blood_norm.rds") | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# customized voom weights for already transformed values | |
voomWeightsCustomized <- function(log2counts,design,lib.size = NULL,is.cpm = FALSE) { | |
if (is.null(design)) { | |
design <- model.matrix(~ 1 ) | |
} | |
out<-list() | |
if (is.cpm == TRUE) { | |
fit <- lmFit(log2counts, design) | |
xx <- fit$Amean + mean(log2(lib.size + 1)) - log2(1e+06) | |
yy <- sqrt(fit$sigma) | |
l <- lowess(xx, yy, f = .5) | |
f <- approxfun(l, rule = 2) | |
fitted.values <- fit$coef %*% t(fit$design) | |
fitted.cpm <- 2^fitted.values | |
fitted.count <- 1e-06 * t(t(fitted.cpm) * (lib.size + 1)) | |
fitted.logcount <- log2(fitted.count) | |
w <- 1/f(fitted.logcount)^4 | |
dim(w) <- dim(fitted.logcount) | |
} | |
if (is.cpm == FALSE) { | |
fit <- lmFit(log2counts, design) | |
xx <- fit$Amean | |
yy <- sqrt(fit$sigma) | |
l <- lowess(xx, yy, f = .5) | |
f <- approxfun(l, rule = 2) | |
fitted.values <- fit$coef %*% t(fit$design) | |
fitted.logcount <- log2(fitted.values) | |
w <- 1/f(fitted.logcount)^4 | |
dim(w) <- dim(fitted.logcount) | |
} | |
out$E = log2counts | |
out$weights = w | |
out$design = design | |
new("EList",out) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment