Created
June 15, 2011 00:55
Revisions
-
DarioS revised this gist
Aug 23, 2011 . 1 changed file with 16 additions and 12 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,18 +5,19 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count stop(paste(unknown.what, collapse = ", "), " are invalid values for 'what'.") counts.df <- as.data.frame(counts) features.keep <- counts.df[, "type"] %in% what counts.df <- counts.df[features.keep, ] counts.cols <- metadata(counts)[["counts.cols"]] + 5 # Transform relative to all data frame columns. n.counts.cols <- length(counts.cols) sample.names <- colnames(counts.df)[counts.cols] feat.indices <- unlist(split(1:nrow(counts.df), counts.df$gene)) results.list <- by(counts.df, counts.df[, "gene"], function(x) { feat.counts <- x[, counts.cols] counts.OK <- rowSums(feat.counts) > min.count chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) || sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, counts.cols]) feat.counts.full <- do.call(cbind, lapply(1:n.counts.cols, function(x) { expect <- s.diff <- rep(NA, nrow(feat.counts)) obs <- feat.counts[, x] @@ -26,7 +27,8 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count s.diff <- (obs - expect)^2 / expect } sample.counts.full <- cbind(obs, expect, s.diff) colnames(sample.counts.full) <- paste(sample.names[x], c("Observed", "Expected", "Z²")) sample.counts.full })) p.value <- ifelse(!is.null(chisq.result), chisq.result$p.value, 1) @@ -35,14 +37,16 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count ids <- names(results.list) if("gene.symbol" %in% colnames(counts.df)) # Gene names not always given to rnaCounts by the user. gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"], p.value = sapply(results.list, "[[", 1)) else gene.stats <- data.frame(gene = ids, p.value = sapply(results.list, "[[", 1)) rownames(gene.stats) <- NULL feature.stats <- counts[features.keep] feature.anno.cols <- colnames(counts.df) %in% c("type", "gene", "gene.symbol") values(feature.stats) <- DataFrame(counts.df[, feature.anno.cols], do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ]) list(genes = gene.stats, features = feature.stats) } -
DarioS revised this gist
Aug 9, 2011 . 1 changed file with 36 additions and 8 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,15 +6,43 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count counts.df <- as.data.frame(counts) counts.df <- counts.df[counts.df[, "type"] %in% what, ] count.cols <- which(!colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol")) n.count.cols <- length(count.cols) sample.names <- colnames(counts.df)[count.cols] feat.indices <- unlist(split(1:nrow(counts.df), counts.df$gene)) results.list <- by(counts.df, counts.df[, "gene"], function(x) { feat.counts <- x[, count.cols] counts.OK <- rowSums(feat.counts) > min.count chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) || sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, count.cols]) feat.counts.full <- do.call(cbind, lapply(1:n.count.cols, function(x) { expect <- s.diff <- rep(NA, nrow(feat.counts)) obs <- feat.counts[, x] if(!is.null(chisq.result)) { expect[counts.OK] <- chisq.result[["expected"]][, x] s.diff <- (obs - expect)^2 / expect } sample.counts.full <- cbind(obs, expect, s.diff) colnames(sample.counts.full) <- paste(sample.names[x], c("Observed", "Expected", "Scaled Difference")) sample.counts.full })) p.value <- ifelse(!is.null(chisq.result), chisq.result$p.value, 1) list(p.value = p.value, counts = feat.counts.full) }) ids <- names(results.list) if("gene.symbol" %in% colnames(counts.df)) # Gene names not always given to rnaCounts by the user. gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"], p.value = sapply(results.list, "[[", 1)) else gene.stats <- data.frame(gene = ids, p.value = sapply(results.list, "[[", 1)) rownames(gene.stats) <- NULL feature.anno.cols <- IRanges::sapply(values(counts), class) != "integer" feature.stats <- counts values(feature.stats) <- DataFrame(values(feature.stats)[feature.anno.cols], do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ]) list(genes = gene.stats, features = feature.stats) } -
DarioS revised this gist
Jul 20, 2011 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -11,7 +11,7 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count { counts <- x[, count.cols] counts.OK <- rowSums(counts) > min.count ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[counts.OK, count.cols])$p.value) }) ids <- unlist(dimnames(p.value)) -
DarioS revised this gist
Jul 20, 2011 . 1 changed file with 9 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,16 +1,19 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count = 0) { unknown.what <- setdiff(what, c("exon", "junction", "intronic")) if(length(unknown.what) > 0) stop(paste(unknown.what, collapse = ", "), " are invalid values for 'what'.") counts.df <- as.data.frame(counts) counts.df <- counts.df[counts.df[, "type"] %in% what, ] count.cols <- !colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol") p.value <- by(counts.df, counts.df[, "gene"], function(x) { counts <- x[, count.cols] counts.OK <- rowSums(counts) > min.count ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value) }) ids <- unlist(dimnames(p.value)) symbols <- counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"] data.frame(gene = ids, gene.symbol = symbols, p.value = as.numeric(p.value), row.names = 1:length(ids)) -
DarioS revised this gist
Jul 4, 2011 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,8 +7,8 @@ isoDiff <- function(counts, what = c("exon", "junction")) p.value <- by(counts.df, counts.df[, "gene"], function(x) { counts <- x[, count.cols] non.zero <- rowSums(counts) > 0 ifelse(any(colSums(counts) == 0) || sum(non.zero) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value) }) ids <- unlist(dimnames(p.value)) -
DarioS revised this gist
Jun 15, 2011 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,7 +7,8 @@ isoDiff <- function(counts, what = c("exon", "junction")) p.value <- by(counts.df, counts.df[, "gene"], function(x) { counts <- x[, count.cols] non.zero <- rowSums(x) > 0 ifelse(any(colSums(counts) == 0), 1, chisq.test(x[non.zero, count.cols])$p.value) }) ids <- unlist(dimnames(p.value)) -
DarioS created this gist
Jun 15, 2011 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,16 @@ isoDiff <- function(counts, what = c("exon", "junction")) { what <- match.arg(what) counts.df <- as.data.frame(counts) counts.df <- counts.df[counts.df[, "type"] == what, ] count.cols <- !colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol") p.value <- by(counts.df, counts.df[, "gene"], function(x) { counts <- x[, count.cols] ifelse(any(rowSums(counts) == 0) || any(colSums(counts) == 0), 1, chisq.test(x[, count.cols])$p.value) }) ids <- unlist(dimnames(p.value)) symbols <- counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"] data.frame(gene = ids, gene.symbol = symbols, p.value = as.numeric(p.value), row.names = 1:length(ids)) }