DarioS · June 15, 2011 00:55 · Aug 23, 2011 · Aug 9, 2011 · Jul 20, 2011 · Jul 20, 2011
diff --git a/isoDiff.R b/isoDiff.R
@@ -5,18 +5,19 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count
         stop(paste(unknown.what, collapse = ", "), " are invalid values for 'what'.")
 
     counts.df <- as.data.frame(counts)
-    counts.df <- counts.df[counts.df[, "type"] %in% what, ]
-    count.cols <- which(!colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol"))
-    n.count.cols <- length(count.cols)
-    sample.names <- colnames(counts.df)[count.cols]
+    features.keep <- counts.df[, "type"] %in% what
+    counts.df <- counts.df[features.keep, ]
+    counts.cols <- metadata(counts)[["counts.cols"]] + 5 # Transform relative to all data frame columns.
+    n.counts.cols <- length(counts.cols)
+    sample.names <- colnames(counts.df)[counts.cols]
 
     feat.indices <- unlist(split(1:nrow(counts.df), counts.df$gene))
     results.list <- by(counts.df, counts.df[, "gene"], function(x)
                    {
-                       feat.counts <- x[, count.cols]
+                       feat.counts <- x[, counts.cols]
                        counts.OK <- rowSums(feat.counts) > min.count
-                       chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) || sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, count.cols])
-                       feat.counts.full <- do.call(cbind, lapply(1:n.count.cols, function(x)
+                       chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) || sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, counts.cols])
+                       feat.counts.full <- do.call(cbind, lapply(1:n.counts.cols, function(x)
                                                           {
                                                               expect <- s.diff <- rep(NA, nrow(feat.counts))
                                                               obs <- feat.counts[, x]
@@ -26,7 +27,8 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count
                                                                 s.diff <- (obs - expect)^2 / expect
                                                               }
                                                               sample.counts.full <- cbind(obs, expect, s.diff)
-                                                              colnames(sample.counts.full) <- paste(sample.names[x], c("Observed", "Expected", "Scaled Difference"))
+                                                              colnames(sample.counts.full) <- paste(sample.names[x],
+                                                                                                    c("Observed", "Expected", "Z²"))
                                                               sample.counts.full
                                                           }))
                        p.value <- ifelse(!is.null(chisq.result), chisq.result$p.value, 1)
@@ -35,14 +37,16 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count
     ids <- names(results.list)
 
     if("gene.symbol" %in% colnames(counts.df)) # Gene names not always given to rnaCounts by the user.
-        gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"], p.value = sapply(results.list, "[[", 1))
+        gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"],
+                                 p.value = sapply(results.list, "[[", 1))
     else
         gene.stats <- data.frame(gene = ids, p.value = sapply(results.list, "[[", 1))
     rownames(gene.stats) <- NULL
 
-    feature.anno.cols <- IRanges::sapply(values(counts), class) != "integer"
-    feature.stats <- counts
-    values(feature.stats) <- DataFrame(values(feature.stats)[feature.anno.cols], do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ])
+    feature.stats <- counts[features.keep]
+    feature.anno.cols <- colnames(counts.df) %in% c("type", "gene", "gene.symbol")
+    values(feature.stats) <- DataFrame(counts.df[, feature.anno.cols],
+                                       do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ])
 
     list(genes = gene.stats, features = feature.stats)
 }
diff --git a/isoDiff.R b/isoDiff.R
@@ -6,15 +6,43 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count
 
     counts.df <- as.data.frame(counts)
     counts.df <- counts.df[counts.df[, "type"] %in% what, ]
-    count.cols <- !colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol")
-    p.value <- by(counts.df, counts.df[, "gene"], function(x)
+    count.cols <- which(!colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol"))
+    n.count.cols <- length(count.cols)
+    sample.names <- colnames(counts.df)[count.cols]
+
+    feat.indices <- unlist(split(1:nrow(counts.df), counts.df$gene))
+    results.list <- by(counts.df, counts.df[, "gene"], function(x)
                    {
-                       counts <- x[, count.cols]
-                       counts.OK <- rowSums(counts) > min.count
-                       ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[counts.OK, count.cols])$p.value)
+                       feat.counts <- x[, count.cols]
+                       counts.OK <- rowSums(feat.counts) > min.count
+                       chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) || sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, count.cols])
+                       feat.counts.full <- do.call(cbind, lapply(1:n.count.cols, function(x)
+                                                          {
+                                                              expect <- s.diff <- rep(NA, nrow(feat.counts))
+                                                              obs <- feat.counts[, x]
+                                                              if(!is.null(chisq.result))
+                                                              {
+                                                                expect[counts.OK] <- chisq.result[["expected"]][, x]
+                                                                s.diff <- (obs - expect)^2 / expect
+                                                              }
+                                                              sample.counts.full <- cbind(obs, expect, s.diff)
+                                                              colnames(sample.counts.full) <- paste(sample.names[x], c("Observed", "Expected", "Scaled Difference"))
+                                                              sample.counts.full
+                                                          }))
+                       p.value <- ifelse(!is.null(chisq.result), chisq.result$p.value, 1)
+                       list(p.value = p.value, counts = feat.counts.full)
                    })
+    ids <- names(results.list)
 
-    ids <- unlist(dimnames(p.value))
-    symbols <- counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"]
-    data.frame(gene = ids, gene.symbol = symbols, p.value = as.numeric(p.value), row.names = 1:length(ids))
+    if("gene.symbol" %in% colnames(counts.df)) # Gene names not always given to rnaCounts by the user.
+        gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"], p.value = sapply(results.list, "[[", 1))
+    else
+        gene.stats <- data.frame(gene = ids, p.value = sapply(results.list, "[[", 1))
+    rownames(gene.stats) <- NULL
+
+    feature.anno.cols <- IRanges::sapply(values(counts), class) != "integer"
+    feature.stats <- counts
+    values(feature.stats) <- DataFrame(values(feature.stats)[feature.anno.cols], do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ])
+
+    list(genes = gene.stats, features = feature.stats)
 }
diff --git a/isoDiff.R b/isoDiff.R
@@ -11,7 +11,7 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count
                    {
                        counts <- x[, count.cols]
                        counts.OK <- rowSums(counts) > min.count
-                       ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value)
+                       ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[counts.OK, count.cols])$p.value)
                    })
 
     ids <- unlist(dimnames(p.value))

diff --git a/isoDiff.R b/isoDiff.R
@@ -1,16 +1,19 @@
-isoDiff <- function(counts, what = c("exon", "junction"))
+isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count = 0)
 {
-    what <- match.arg(what)
+    unknown.what <- setdiff(what, c("exon", "junction", "intronic"))
+    if(length(unknown.what) > 0)
+        stop(paste(unknown.what, collapse = ", "), " are invalid values for 'what'.")
+
     counts.df <- as.data.frame(counts)
-    counts.df <- counts.df[counts.df[, "type"] == what, ]
+    counts.df <- counts.df[counts.df[, "type"] %in% what, ]
     count.cols <- !colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol")
     p.value <- by(counts.df, counts.df[, "gene"], function(x)
                    {
                        counts <- x[, count.cols]
-                       non.zero <- rowSums(counts) > 0
-                       ifelse(any(colSums(counts) == 0) || sum(non.zero) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value)
+                       counts.OK <- rowSums(counts) > min.count
+                       ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value)
                    })
-    
+
     ids <- unlist(dimnames(p.value))
     symbols <- counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"]
     data.frame(gene = ids, gene.symbol = symbols, p.value = as.numeric(p.value), row.names = 1:length(ids))

diff --git a/isoDiff.R b/isoDiff.R
@@ -7,8 +7,8 @@ isoDiff <- function(counts, what = c("exon", "junction"))
     p.value <- by(counts.df, counts.df[, "gene"], function(x)
                    {
                        counts <- x[, count.cols]
-                       non.zero <- rowSums(x) > 0
-                       ifelse(any(colSums(counts) == 0), 1, chisq.test(x[non.zero, count.cols])$p.value)
+                       non.zero <- rowSums(counts) > 0
+                       ifelse(any(colSums(counts) == 0) || sum(non.zero) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value)
                    })
 
     ids <- unlist(dimnames(p.value))

diff --git a/isoDiff.R b/isoDiff.R
@@ -7,7 +7,8 @@ isoDiff <- function(counts, what = c("exon", "junction"))
     p.value <- by(counts.df, counts.df[, "gene"], function(x)
                    {
                        counts <- x[, count.cols]
-                       ifelse(any(rowSums(counts) == 0) || any(colSums(counts) == 0), 1, chisq.test(x[, count.cols])$p.value)
+                       non.zero <- rowSums(x) > 0
+                       ifelse(any(colSums(counts) == 0), 1, chisq.test(x[non.zero, count.cols])$p.value)
                    })
 
     ids <- unlist(dimnames(p.value))

diff --git a/isoDiff.R b/isoDiff.R
@@ -0,0 +1,16 @@
+isoDiff <- function(counts, what = c("exon", "junction"))
+{
+    what <- match.arg(what)
+    counts.df <- as.data.frame(counts)
+    counts.df <- counts.df[counts.df[, "type"] == what, ]
+    count.cols <- !colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol")
+    p.value <- by(counts.df, counts.df[, "gene"], function(x)
+                   {
+                       counts <- x[, count.cols]
+                       ifelse(any(rowSums(counts) == 0) || any(colSums(counts) == 0), 1, chisq.test(x[, count.cols])$p.value)
+                   })
+
+    ids <- unlist(dimnames(p.value))
+    symbols <- counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"]
+    data.frame(gene = ids, gene.symbol = symbols, p.value = as.numeric(p.value), row.names = 1:length(ids))
+}