I hereby claim:
- I am petehaitch on github.
- I am peterhickey (https://keybase.io/peterhickey) on keybase.
- I have a public key ASCF4DS1IJgSk7plC9JD2iCp-ey_j53id8_e82va8CmkLQo
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
library(profmem) | |
library(Matrix) | |
dgt <- readMM("~/sparse.mtx.gz") | |
dgc <- as(dgt, "dgCMatrix") | |
colCountsEqualZero <- function(x) { | |
if (length(x@x) / length(x) > 0.5) { | |
# If majority of data are non-zero | |
Matrix::colSums(x == 0) | |
} else { |
library(profmem) | |
library(Matrix) | |
dgt <- readMM("~/sparse.mtx.gz") | |
dgc <- as(dgt, "dgCMatrix") | |
threshold <- 5 | |
# Total memory allocation (bytes) | |
total(profmem(Matrix::colSums(dgt > threshold))) | |
#> [1] 935422632 | |
total(profmem(Matrix::colSums(dgc > threshold))) |
library(Matrix) | |
library(pryr) | |
library(profmem) | |
library(matrixStats) | |
library(microbenchmark) | |
nrow <- 20000 | |
ncol <- 600 | |
threshold <- 0 |
library(GenomicRanges) | |
# A fast check that all(x == y) for GenomicRanges objects. | |
.all.equal.GenomicRanges <- function(x, y) { | |
seqinfo <- merge(seqinfo(x), seqinfo(y)) | |
seqlevels <- seqlevels(seqinfo) | |
if (any(diff(match(seqlevels(y), seqlevels)) < 0L)) { | |
stop("the 2 objects to compare have seqlevels in incompatible orders") | |
} | |
ok <- all(identical(seqnames(x), seqnames(y)), |
# Pseudocode | |
isUnsortedListOfAtomics <- function(..., na.rm = FALSE, strictly = FALSE) { | |
args <- list(...) | |
m <- length(args) | |
n <- length(args[[1]]) | |
for (i in (seq_along(args[[1]]) - 1)) { | |
for (j in seq_len(m)) { | |
# All vectors except the last are checked in the same way | |
if (j < m) { |
# (1) Using existing repo
git clone [email protected]:PeteHaitch/GenomicTuples.git
cd GenomicTuples
curl -O https://raw.githubusercontent.com/Bioconductor/mirror/master/update_remotes.sh
bash update_remotes.sh
# Bump Version and Date in DESCRIPTION (v1.5.2)
# NOTE: Version isn't up-to-date with SVN; why?
git add DESCRIPTION
git commit -m "Bump version number"
Peter Hickey
20 October 2015
I often find myself with multiple SE
objects (I'm using SE
as a shorthand for the SummarizedExperiment0
and RangedSummarizedExeriment
classes), each with potentially non-distinct samples and potentially non-overlapping features/ranges. Currently, it is difficult to combine these objects; rbind()
can only combine objects with the same samples but distinct features/ranges and cbind()
can only combine objects with the same features/ranges but distinct samples. I think it would be useful to have a "combine" method for SE
objects that handles the most general situation where each object has potentially non-distinct samples and potentially non-overlapping features/ranges.
library(GenomicRanges) | |
library(microbenchmark) | |
nrows <- 2000000; ncols <- 6 | |
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows) | |
rowData <- GRanges(rep(c("chr1", "chr2"), c(0.25 * nrows, 0.75 * nrows)), | |
IRanges(floor(runif(nrows, 1e5, 1e6)), width=100), | |
strand=sample(c("+", "-"), nrows, TRUE)) | |
colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 3), | |
row.names=LETTERS[1:6]) |
## Necessary packages (BioC-devel) | |
library(GenomicRanges) | |
library(S4Vectors) | |
## Class A using a DataFrameOrNULL in internalPos slot | |
setClassUnion(name = "DataFrameOrNULL", members = c("DataFrame", "NULL")) | |
setClass("A", | |
contains = "GRanges", | |
representation( |