Last active
March 3, 2025 10:38
-
-
Save tiagochst/e324bead84c4accef753f51bc7e4dbd1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# --------------------------- | |
# Accessing the material | |
# https://tinyurl.com/bioc2017-ELMER | |
# --------------------------- | |
library("Bioc2017.TCGAbiolinks.ELMER") | |
Biobase::openVignette("Bioc2017.TCGAbiolinks.ELMER") | |
# --------------------------- | |
# Section 1: | |
# Aims: | |
# 1.1 Accessing GDC | |
# 1.2 Downloading samples | |
# 1.3 Preparing files into a SummarizedExperiment object | |
# --------------------------- | |
# Auxiliary information: | |
# - GDC: The NCI's Genomic Data Commons (GDC) | |
# https://portal.gdc.cancer.gov/ | |
# - Pipelines description | |
# https://docs.gdc.cancer.gov/Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline/ | |
# - TCGA Barcode description: | |
# https://wiki.nci.nih.gov/display/TCGA/TCGA+barcode | |
# https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/sample-type-codes | |
# --------------------------- | |
library(TCGAbiolinks) | |
library(SummarizedExperiment) | |
library(DT) | |
library(dplyr) | |
query.exp <- GDCquery(project = "TCGA-LUSC", | |
data.category = "Transcriptome Profiling", | |
data.type = "Gene Expression Quantification", | |
workflow.type = "HTSeq - FPKM-UQ", | |
barcode = c("TCGA-34-5231-01","TCGA-77-7138-01")) | |
GDCdownload(query.exp) | |
exp <- GDCprepare(query = query.exp, | |
save = TRUE, | |
save.filename = "Exp_LUSC.rda", | |
summarizedExperiment = TRUE) | |
# Summarized experiment | |
# http://bioconductor.org/packages/release/bioc/vignettes/SummarizedExperiment/inst/doc/SummarizedExperiment.html | |
query.met <- GDCquery(project = "TCGA-LUSC", | |
data.category = "DNA Methylation", | |
platform = "Illumina Human Methylation 450", | |
barcode = c("TCGA-34-5231-01A-21D-1818-05","TCGA-77-7138-01A-41D-2043-05")) | |
GDCdownload(query.met) | |
met <- GDCprepare(query = query.met, | |
save = TRUE, | |
save.filename = "DNAmethylation_LUSC.rda", | |
summarizedExperiment = TRUE) | |
# --------------------------- | |
# Section 2: | |
# Aims: | |
# 2.1 Create a MultiAssayExperiment | |
# 2.2 Find Differently methylated probes | |
# (Normal tissue sample vs Primary Solid Tumor) | |
# 2.3 Find inverse correlated probes-genes pairs | |
# (probes hypomethylated and gene upregulated) | |
# 2.4 Motif enrichment analysis on the regions with a loss of | |
# DNA methylation | |
# 2.5 Find cadidate regulatory TF | |
# --------------------------- | |
library("Bioc2017.TCGAbiolinks.ELMER") | |
library(MultiAssayExperiment) | |
library(ELMER) | |
lusc.exp | |
lusc.met | |
distal.probes <- get.feature.probe(genome = "hg38", met.platform = "450K") | |
mae <- createMAE(exp = lusc.exp, | |
met = lusc.met, | |
save = TRUE, | |
linearize.exp = TRUE, | |
filter.probes = distal.probes, | |
save.filename = "mae_bioc2017.rda", | |
met.platform = "450K", | |
genome = "hg38", | |
TCGA = TRUE) | |
mae | |
colData(mae)[1:5,] %>% as.data.frame %>% datatable(options = list(scrollX = TRUE)) | |
sampleMap(mae)[1:5,] %>% as.data.frame %>% datatable(options = list(scrollX = TRUE)) | |
group.col <- "definition" | |
group1 <- "Primary solid Tumor" | |
group2 <- "Solid Tissue Normal" | |
dir.out <- "result" | |
Sig.probes <- get.diff.meth(data = mae, | |
group.col = group.col, | |
group1 = group1, | |
group2 = group2, | |
minSubgroupFrac = 0.2, | |
sig.dif = 0.3, | |
diff.dir = "hypo", # Search for hypomethylated probes in group 1 | |
cores = 4, | |
dir.out = dir.out, | |
pvalue = 0.01) | |
nearGenes <- GetNearGenes(data = mae, | |
probes = Sig.probes$probe, | |
numFlankingGenes = 20, # 10 upstream and 10 dowstream genes | |
cores = 4) | |
pair <- get.pair(data = mae, | |
group.col = group.col, | |
group1 = group1, | |
group2 = group2, | |
nearGenes = nearGenes, | |
minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M | |
permu.dir = "result/permu", | |
permu.size = 100, # Please set to 100000 to get significant results | |
pvalue = 0.05, | |
Pe = 0.01, # Please set to 0.001 to get significant results | |
filter.probes = TRUE, # See preAssociationProbeFiltering function | |
filter.percentage = 0.05, | |
filter.portion = 0.3, | |
dir.out = "result", | |
cores = 4, | |
label = "hypo") | |
# 2.4 Motif enrichment analysis on the regions with a loss of | |
# DNA methylation | |
# Hocomoco: http://hocomoco.autosome.ru/ | |
library(ELMER.data) | |
data("Probes.motif.hg19.450K") | |
as.matrix(Probes.motif.hg19.450K[1:3,1:3]) | |
enriched.motif <- get.enriched.motif(data = mae, | |
probes.motif = Probes.motif.hg19.450K, | |
probes = pair$Probe, | |
dir.out = dir.out, | |
label = "hypo", | |
min.incidence = 10, | |
lower.OR = 1.1) | |
motif.enrichment <- read.csv(paste0(dir.out,"/getMotif.hypo.motif.enrichment.csv"), | |
stringsAsFactors=FALSE) | |
names(enriched.motif) # enriched motifs | |
enriched.motif[[1]] | |
# TF classification: http://tfclass.bioinf.med.uni-goettingen.de/tfclass | |
downloader::download("https://github.com/tiagochst/ELMER.data/raw/master/data/TF.family.rda","TF.family.rda") | |
load("TF.family.rda") | |
TF.family$AHR_HUMAN.H10MO.B | |
downloader::download("https://github.com/tiagochst/ELMER.data/raw/master/data/TF.subfamily.rda","TF.subfamily.rda") | |
load("TF.subfamily.rda") | |
## identify regulatory TF for the enriched motifs | |
TF <- get.TFs(data = mae, | |
motif.relevant.TFs = TF.family, | |
group.col = group.col, | |
group1 = group1, | |
group2 = group2, | |
minSubgroupFrac = 0.4, | |
enriched.motif = enriched.motif, | |
dir.out = dir.out, | |
cores = 4, | |
label = "hypo") | |
TF.meth.cor <- get(load(paste0(dir.out,"/getTF.hypo.TFs.with.motif.pvalue.rda"))) | |
save(mae, | |
Sig.probes, | |
pair, nearGenes, | |
motif.enrichment, enriched.motif, | |
TF.meth.cor, TF, | |
group.col, group1, group2, | |
file = paste0(dir.out,"/ELMER_results_hypo.rda")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment