Created
March 3, 2019 21:27
-
-
Save sgibb/8c8ace205ae566680cd08022b49fc790 to your computer and use it in GitHub Desktop.
simple on-disk vector
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setClass("ondiskvec", | |
slots=list(path="character", n="numeric"), | |
prototype=list(path=character(), n=numeric()) | |
) | |
ondiskvec <- function(x, path=tempfile()) { | |
writeBin(as.double(x), path, size=8L) | |
new("ondiskvec", path=path, n=length(x)) | |
} | |
setMethod("length", "ondiskvec", function(x)x@n) | |
setMethod(f="[", | |
signature=signature(x="ondiskvec", i="numeric", j="missing"), | |
definition=function(x, i, j, ..., drop=FALSE) { | |
if (length(i) == 1L) { | |
f <- file(x@path, "rb") | |
on.exit(close(f)) | |
if (i > 1L) { | |
seek(f, where=(i - 1L) * 8L) | |
} | |
readBin(f, "double", n=1L, size=8L) | |
} else { | |
# that's stupid but not used often | |
readBin(x@path, "double", n=x@n, size=8L)[i] | |
} | |
}) | |
setMethod(f="[", | |
signature=signature(x="ondiskvec", i="missing", j="missing"), | |
definition=function(x, i, j, ..., drop=FALSE) { | |
readBin(x@path, "double", n=x@n, size=8L) | |
}) | |
setReplaceMethod(f="[", | |
signature=signature(x="ondiskvec", i="missing", j="missing"), | |
definition=function(x, i, j, ..., value) { | |
if (length(value) != x@n) { | |
stop("Length of 'value' doesn't match length of 'x'.") | |
} | |
writeBin(as.double(value), x@path, size=8L) | |
x | |
}) | |
x <- 1:10 | |
odv <- ondiskvec(x) | |
odv[1] | |
odv[9] | |
odv[10] | |
odv[] | |
odv[] <- 11:20 | |
odv[] | |
library("matter") | |
library("microbenchmark") | |
x <- sample(1e5) | |
m <- matter_vec(x) | |
o <- ondiskvec(x) | |
microbenchmark(m[], o[]) | |
# Unit: microseconds | |
# expr min lq mean median uq max neval | |
# m[] 370.232 502.118 637.9967 625.6630 676.8755 3272.946 100 | |
# o[] 209.726 269.250 398.4346 334.7615 374.1130 3299.462 100 | |
microbenchmark(m[1L], o[1L]) | |
# Unit: microseconds | |
# expr min lq mean median uq max neval | |
# m[1L] 43.511 44.9160 87.74990 47.020 48.3285 3927.426 100 | |
# o[1L] 18.862 19.9515 31.38386 22.748 23.8100 884.608 100 | |
microbenchmark(m[length(m)], o[length(o)]) | |
# Unit: microseconds | |
# expr min lq mean median uq max neval | |
# m[length(m)] 44.719 47.6770 59.22049 50.5620 55.709 182.632 100 | |
# o[length(o)] 27.994 30.4715 38.05458 34.2625 36.499 165.515 100 | |
i <- sample(1e5, 1e2) | |
microbenchmark(m[i], o[i]) | |
# Unit: microseconds | |
# expr min lq mean median uq max neval | |
# m[i] 199.069 237.216 295.7426 274.1865 330.4935 602.635 100 | |
# o[i] 226.902 265.557 423.4638 301.9010 382.5500 4896.891 100 | |
microbenchmark(m[]<-x, o[]<-x, x[]<-x) | |
# Unit: microseconds | |
# expr min lq mean median uq max neval | |
# m[] <- x 845.461 1082.198 1365.1878 1191.546 1395.302 5635.026 100 | |
# o[] <- x 859.788 1306.372 2660.3586 2019.383 3288.673 22707.037 100 | |
# x[] <- x 331.224 364.996 541.4231 470.652 614.356 2288.032 100 | |
microbenchmark(sum(m), sum(o[])) | |
# Unit: microseconds | |
# expr min lq mean median uq max neval | |
# sum(m) 2799.899 2983.1655 3237.6274 3146.5525 3279.2650 7326.494 100 | |
# sum(o[]) 272.509 306.9125 607.9439 451.4855 569.7225 6629.260 100 | |
f <- function(y){ y[] <- y[] + 1L; y} | |
microbenchmark(f(m), f(o), f(x)) | |
# Unit: microseconds | |
# expr min lq mean median uq max neval | |
# f(m) 1466.246 1743.6230 2113.5229 1993.194 2264.881 4326.144 100 | |
# f(o) 1074.079 1370.5895 2301.5754 1675.156 3423.842 5997.356 100 | |
# f(x) 551.204 622.2855 934.9378 797.703 987.192 6961.754 100 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment