Skip to content

Instantly share code, notes, and snippets.

@DarioS
Created June 15, 2011 00:55

Revisions

  1. DarioS revised this gist Aug 23, 2011. 1 changed file with 16 additions and 12 deletions.
    28 changes: 16 additions & 12 deletions isoDiff.R
    Original file line number Diff line number Diff line change
    @@ -5,18 +5,19 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count
    stop(paste(unknown.what, collapse = ", "), " are invalid values for 'what'.")

    counts.df <- as.data.frame(counts)
    counts.df <- counts.df[counts.df[, "type"] %in% what, ]
    count.cols <- which(!colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol"))
    n.count.cols <- length(count.cols)
    sample.names <- colnames(counts.df)[count.cols]
    features.keep <- counts.df[, "type"] %in% what
    counts.df <- counts.df[features.keep, ]
    counts.cols <- metadata(counts)[["counts.cols"]] + 5 # Transform relative to all data frame columns.
    n.counts.cols <- length(counts.cols)
    sample.names <- colnames(counts.df)[counts.cols]

    feat.indices <- unlist(split(1:nrow(counts.df), counts.df$gene))
    results.list <- by(counts.df, counts.df[, "gene"], function(x)
    {
    feat.counts <- x[, count.cols]
    feat.counts <- x[, counts.cols]
    counts.OK <- rowSums(feat.counts) > min.count
    chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) || sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, count.cols])
    feat.counts.full <- do.call(cbind, lapply(1:n.count.cols, function(x)
    chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) || sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, counts.cols])
    feat.counts.full <- do.call(cbind, lapply(1:n.counts.cols, function(x)
    {
    expect <- s.diff <- rep(NA, nrow(feat.counts))
    obs <- feat.counts[, x]
    @@ -26,7 +27,8 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count
    s.diff <- (obs - expect)^2 / expect
    }
    sample.counts.full <- cbind(obs, expect, s.diff)
    colnames(sample.counts.full) <- paste(sample.names[x], c("Observed", "Expected", "Scaled Difference"))
    colnames(sample.counts.full) <- paste(sample.names[x],
    c("Observed", "Expected", ""))
    sample.counts.full
    }))
    p.value <- ifelse(!is.null(chisq.result), chisq.result$p.value, 1)
    @@ -35,14 +37,16 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count
    ids <- names(results.list)

    if("gene.symbol" %in% colnames(counts.df)) # Gene names not always given to rnaCounts by the user.
    gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"], p.value = sapply(results.list, "[[", 1))
    gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"],
    p.value = sapply(results.list, "[[", 1))
    else
    gene.stats <- data.frame(gene = ids, p.value = sapply(results.list, "[[", 1))
    rownames(gene.stats) <- NULL

    feature.anno.cols <- IRanges::sapply(values(counts), class) != "integer"
    feature.stats <- counts
    values(feature.stats) <- DataFrame(values(feature.stats)[feature.anno.cols], do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ])
    feature.stats <- counts[features.keep]
    feature.anno.cols <- colnames(counts.df) %in% c("type", "gene", "gene.symbol")
    values(feature.stats) <- DataFrame(counts.df[, feature.anno.cols],
    do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ])

    list(genes = gene.stats, features = feature.stats)
    }
  2. DarioS revised this gist Aug 9, 2011. 1 changed file with 36 additions and 8 deletions.
    44 changes: 36 additions & 8 deletions isoDiff.R
    Original file line number Diff line number Diff line change
    @@ -6,15 +6,43 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count

    counts.df <- as.data.frame(counts)
    counts.df <- counts.df[counts.df[, "type"] %in% what, ]
    count.cols <- !colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol")
    p.value <- by(counts.df, counts.df[, "gene"], function(x)
    count.cols <- which(!colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol"))
    n.count.cols <- length(count.cols)
    sample.names <- colnames(counts.df)[count.cols]

    feat.indices <- unlist(split(1:nrow(counts.df), counts.df$gene))
    results.list <- by(counts.df, counts.df[, "gene"], function(x)
    {
    counts <- x[, count.cols]
    counts.OK <- rowSums(counts) > min.count
    ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[counts.OK, count.cols])$p.value)
    feat.counts <- x[, count.cols]
    counts.OK <- rowSums(feat.counts) > min.count
    chisq.result <- if(any(colSums(feat.counts[counts.OK, ]) == 0) || sum(counts.OK) < 2) NULL else chisq.test(x[counts.OK, count.cols])
    feat.counts.full <- do.call(cbind, lapply(1:n.count.cols, function(x)
    {
    expect <- s.diff <- rep(NA, nrow(feat.counts))
    obs <- feat.counts[, x]
    if(!is.null(chisq.result))
    {
    expect[counts.OK] <- chisq.result[["expected"]][, x]
    s.diff <- (obs - expect)^2 / expect
    }
    sample.counts.full <- cbind(obs, expect, s.diff)
    colnames(sample.counts.full) <- paste(sample.names[x], c("Observed", "Expected", "Scaled Difference"))
    sample.counts.full
    }))
    p.value <- ifelse(!is.null(chisq.result), chisq.result$p.value, 1)
    list(p.value = p.value, counts = feat.counts.full)
    })
    ids <- names(results.list)

    ids <- unlist(dimnames(p.value))
    symbols <- counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"]
    data.frame(gene = ids, gene.symbol = symbols, p.value = as.numeric(p.value), row.names = 1:length(ids))
    if("gene.symbol" %in% colnames(counts.df)) # Gene names not always given to rnaCounts by the user.
    gene.stats <- data.frame(gene = ids, gene.symbol = counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"], p.value = sapply(results.list, "[[", 1))
    else
    gene.stats <- data.frame(gene = ids, p.value = sapply(results.list, "[[", 1))
    rownames(gene.stats) <- NULL

    feature.anno.cols <- IRanges::sapply(values(counts), class) != "integer"
    feature.stats <- counts
    values(feature.stats) <- DataFrame(values(feature.stats)[feature.anno.cols], do.call(rbind, sapply(results.list, "[[", 2))[order(feat.indices), ])

    list(genes = gene.stats, features = feature.stats)
    }
  3. DarioS revised this gist Jul 20, 2011. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion isoDiff.R
    Original file line number Diff line number Diff line change
    @@ -11,7 +11,7 @@ isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count
    {
    counts <- x[, count.cols]
    counts.OK <- rowSums(counts) > min.count
    ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value)
    ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[counts.OK, count.cols])$p.value)
    })

    ids <- unlist(dimnames(p.value))
  4. DarioS revised this gist Jul 20, 2011. 1 changed file with 9 additions and 6 deletions.
    15 changes: 9 additions & 6 deletions isoDiff.R
    Original file line number Diff line number Diff line change
    @@ -1,16 +1,19 @@
    isoDiff <- function(counts, what = c("exon", "junction"))
    isoDiff <- function(counts, what = c("exon", "junction", "intronic"), min.count = 0)
    {
    what <- match.arg(what)
    unknown.what <- setdiff(what, c("exon", "junction", "intronic"))
    if(length(unknown.what) > 0)
    stop(paste(unknown.what, collapse = ", "), " are invalid values for 'what'.")

    counts.df <- as.data.frame(counts)
    counts.df <- counts.df[counts.df[, "type"] == what, ]
    counts.df <- counts.df[counts.df[, "type"] %in% what, ]
    count.cols <- !colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol")
    p.value <- by(counts.df, counts.df[, "gene"], function(x)
    {
    counts <- x[, count.cols]
    non.zero <- rowSums(counts) > 0
    ifelse(any(colSums(counts) == 0) || sum(non.zero) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value)
    counts.OK <- rowSums(counts) > min.count
    ifelse(any(colSums(counts) == 0) || sum(counts.OK) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value)
    })

    ids <- unlist(dimnames(p.value))
    symbols <- counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"]
    data.frame(gene = ids, gene.symbol = symbols, p.value = as.numeric(p.value), row.names = 1:length(ids))
  5. DarioS revised this gist Jul 4, 2011. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions isoDiff.R
    Original file line number Diff line number Diff line change
    @@ -7,8 +7,8 @@ isoDiff <- function(counts, what = c("exon", "junction"))
    p.value <- by(counts.df, counts.df[, "gene"], function(x)
    {
    counts <- x[, count.cols]
    non.zero <- rowSums(x) > 0
    ifelse(any(colSums(counts) == 0), 1, chisq.test(x[non.zero, count.cols])$p.value)
    non.zero <- rowSums(counts) > 0
    ifelse(any(colSums(counts) == 0) || sum(non.zero) < 2, 1, chisq.test(x[non.zero, count.cols])$p.value)
    })

    ids <- unlist(dimnames(p.value))
  6. DarioS revised this gist Jun 15, 2011. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion isoDiff.R
    Original file line number Diff line number Diff line change
    @@ -7,7 +7,8 @@ isoDiff <- function(counts, what = c("exon", "junction"))
    p.value <- by(counts.df, counts.df[, "gene"], function(x)
    {
    counts <- x[, count.cols]
    ifelse(any(rowSums(counts) == 0) || any(colSums(counts) == 0), 1, chisq.test(x[, count.cols])$p.value)
    non.zero <- rowSums(x) > 0
    ifelse(any(colSums(counts) == 0), 1, chisq.test(x[non.zero, count.cols])$p.value)
    })

    ids <- unlist(dimnames(p.value))
  7. DarioS created this gist Jun 15, 2011.
    16 changes: 16 additions & 0 deletions isoDiff.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,16 @@
    isoDiff <- function(counts, what = c("exon", "junction"))
    {
    what <- match.arg(what)
    counts.df <- as.data.frame(counts)
    counts.df <- counts.df[counts.df[, "type"] == what, ]
    count.cols <- !colnames(counts.df) %in% c("seqnames", "start", "end", "width", "strand", "type", "gene", "gene.symbol")
    p.value <- by(counts.df, counts.df[, "gene"], function(x)
    {
    counts <- x[, count.cols]
    ifelse(any(rowSums(counts) == 0) || any(colSums(counts) == 0), 1, chisq.test(x[, count.cols])$p.value)
    })

    ids <- unlist(dimnames(p.value))
    symbols <- counts.df[match(ids, counts.df[, "gene"]), "gene.symbol"]
    data.frame(gene = ids, gene.symbol = symbols, p.value = as.numeric(p.value), row.names = 1:length(ids))
    }