Created
August 27, 2014 00:09
-
-
Save PeteHaitch/fdb66d360446ff96ed4b to your computer and use it in GitHub Desktop.
Reproducible example for question on Bioc-Devel https://stat.ethz.ch/pipermail/bioc-devel/2014-August/006106.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Necessary packages (BioC-devel) | |
library(GenomicRanges) | |
library(S4Vectors) | |
## Class A using a DataFrameOrNULL in internalPos slot | |
setClassUnion(name = "DataFrameOrNULL", members = c("DataFrame", "NULL")) | |
setClass("A", | |
contains = "GRanges", | |
representation( | |
internalPos = "DataFrameOrNULL", | |
size = "integer"), | |
prototype( | |
internalPos = NULL, | |
size = NA_integer_) | |
) | |
setMethod(GenomicRanges:::extraColumnSlotNames, "A", | |
function(x) { | |
c("internalPos") | |
}) | |
A <- function(seqnames = Rle(), tuples = matrix(), | |
strand = Rle("*", length(seqnames)), ..., | |
seqlengths = NULL, seqinfo = NULL) { | |
# Get size of tuples | |
if (all(is.na(tuples))) { | |
size <- NA_integer_ | |
} else { | |
size <- ncol(tuples) | |
} | |
# Create IRanges | |
if (!is.na(size)) { | |
ranges <- IRanges(start = tuples[, 1], end = tuples[, size]) | |
} else { | |
ranges <- IRanges() | |
} | |
# Create internalPos | |
if (is.na(size) || size < 3) { | |
internalPos <- NULL | |
} else { | |
internalPos <- DataFrame(tuples[, seq(from = 2L, to = size - 1, by = 1L), | |
drop = FALSE]) | |
} | |
# Create GRanges | |
gr <- GRanges(seqnames = seqnames, ranges = ranges, strand = strand, | |
seqlengths = seqlengths, seqinfo = seqinfo, ...) | |
new("A", gr, internalPos = internalPos, size = size) | |
} | |
# Class B using matrixOrNULL in internalPos slot | |
setClassUnion(name = "matrixOrNULL", members = c("matrix", "NULL")) | |
setClass("B", | |
contains = "GRanges", | |
representation( | |
internalPos = "matrixOrNULL", | |
size = "integer"), | |
prototype( | |
internalPos = NULL, | |
size = NA_integer_) | |
) | |
setMethod(GenomicRanges:::extraColumnSlotNames, "B", | |
function(x) { | |
c("internalPos") | |
}) | |
B <- function(seqnames = Rle(), tuples = matrix(), | |
strand = Rle("*", length(seqnames)), ..., | |
seqlengths = NULL, seqinfo = NULL) { | |
# Get size of tuples | |
if (all(is.na(tuples))) { | |
size <- NA_integer_ | |
} else { | |
size <- ncol(tuples) | |
} | |
# Create IRanges | |
if (!is.na(size)) { | |
ranges <- IRanges(start = tuples[, 1], end = tuples[, size]) | |
} else { | |
ranges <- IRanges() | |
} | |
# Create internalPos | |
if (is.na(size) || size < 3) { | |
internalPos <- NULL | |
} else { | |
internalPos <- tuples[, seq(from = 2L, to = size - 1, by = 1L), | |
drop = FALSE] | |
} | |
# Create GRanges | |
gr <- GRanges(seqnames = seqnames, ranges = ranges, strand = strand, | |
seqlengths = seqlengths, seqinfo = seqinfo, ...) | |
new("B", gr, internalPos = internalPos, size = size) | |
} | |
# Function to make two tuples object of class A or B of given size and replace | |
# an element of the first with an element of the second to create a third | |
# object. Essentially, make a call to S4Vectors::replaceROWS with each class. | |
# If size < 3 then internalPos is NULL otherwise it is a DataFrame | |
# (resp. matrix) with "size - 2" columns. | |
f <- function(size = 3, class = c("A", "B")) { | |
seqinfo <- Seqinfo(paste0("chr", 1:3), c(1000, 2000, 1500), NA, "mock1") | |
if (class == "A") { | |
# Create object of class A | |
x <- A(seqnames = Rle('chr1', 10), | |
tuples = matrix(seq(1, size * 10), ncol = size), seqinfo = seqinfo) | |
# Create another object of class A | |
value <- A(seqnames = Rle('chr3', 1), | |
tuples = matrix(seq(size * 10 + 1, size * 10 + size), ncol = size), | |
seqinfo = seqinfo) | |
# Try to replace the first element of a with aa | |
xx <- replaceROWS(x, 1, value) | |
} else if (class == "B") { | |
# Create object of class B | |
x <- B(seqnames = Rle('chr1', 1), | |
tuples = matrix(seq(1, size * 10), ncol = size), seqinfo = seqinfo) | |
# Create another object of class A | |
value <- B(seqnames = Rle('chr3', 1), | |
tuples = matrix(seq(size * 10 + 1, size * 10 + size), ncol = size), | |
seqinfo = seqinfo) | |
# Try to replace the first element of a with aa | |
xx <- replaceROWS(x, 1, value) | |
} | |
return(xx) | |
} | |
## Define a replaceROWS function with signature NULL | |
## Perhaps more generally useful than my package? | |
setMethod("replaceROWS", | |
"NULL", | |
function(x, i, value) { | |
NULL | |
} | |
) | |
## Class A - works regardless of size. By this I mean replaceROWS also updates | |
# the internalPos slot. | |
# NB: the show method (which is inherited from GenomicRanges) will fail when | |
# size < 3 but this isn't important here. Use str() to see that the object | |
# was indeed properly updated. | |
a1 <- f(1, "A") | |
a1@internalPos | |
a2 <- f(2, "A") | |
a2@internalPos | |
# Works if size > 2. By this I mean it also updates the internalPos slot. | |
a3 <- f(3, "A") | |
a3@internalPos | |
a4 <- f(4, "A") | |
a4@internalPos | |
## Class B - works if size < 3, fails otherwise. | |
# NB: the show method (which is inherited from GenomicRanges) will fail when | |
# size < 3 but this isn't important here. Use str() to see that the object | |
# was indeed properly updated. | |
b1 <- f(1, "B") | |
b1@internalPos | |
b2 <- f(2, "B") | |
b2@internalPos | |
# Fails if size = 3 due to "ans_ecs <- GenomicRanges:::extraColumnSlotsAsDF(x)" | |
# in the replaceROWS method for B (which is inherited from GenomicRanges). | |
# This means that the subsequent call to | |
# "update(x, seqnames = ans_seqnames, ranges = ans_ranges, strand = ans_strand, | |
# elementMetadata = ans_mcols, .slotList = as.list(ans_ecs))" in replaceROWS | |
# gets a DataFrame, rather than a matrix, for the internalPos slot and | |
# causes the error. | |
b3 <- f(3, "B") | |
# Fails with a slightly different error if size > 3, which is again due to | |
# "ans_ecs <- GenomicRanges:::extraColumnSlotsAsDF(x)" in the replaceROWS | |
# method for B (which is inherited from GenomicRanges). The difference being | |
# that it fails at the subsequent call to | |
# "ans_ecs <- replaceROWS(ans_ecs, i, value_ecs[seq_len(ans_necs)])" in | |
# replaceROWS | |
b4 <- f(4, "B") | |
## What I could do: | |
# (1): Just use class A, i.e. a DataFrame for internalPos, since this works | |
# out-of-the-box. | |
# (2) : I could define a replaceROWS method for class B but it would be nice | |
# for it to work via inheritance to GenomicRanges when the subclass has a | |
# matrix in a slot that is an extraColumnSlots (unless this is not allowed). | |
sessionInfo() | |
# | |
# R version 3.1.1 (2014-07-10) | |
# Platform: x86_64-apple-darwin13.1.0 (64-bit) | |
# | |
# locale: | |
# [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8 | |
# | |
# attached base packages: | |
# [1] parallel stats graphics grDevices utils datasets methods | |
# [8] base | |
# | |
# other attached packages: | |
# [1] GenomicRanges_1.17.35 GenomeInfoDb_1.1.18 IRanges_1.99.24 | |
# [4] S4Vectors_0.1.2 BiocGenerics_0.11.4 | |
# | |
# loaded via a namespace (and not attached): | |
# [1] packrat_0.4.0.12 stats4_3.1.1 tools_3.1.1 XVector_0.5.7 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment