Created
November 28, 2014 02:18
-
-
Save Ironholds/3829102e3fbc3a90a8c7 to your computer and use it in GitHub Desktop.
Benchmarks for my upcoming string anonymisation package.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(anonymise) | |
library(digest) | |
library(microbenchmark) | |
#Generate some unique character strings. Say, 30,000 of them. | |
uniques <- character(30000) | |
for(i in seq_along(uniques)){ | |
uniques[i] <- paste(sample(c(0:9,letters,LETTERS), 30), collapse = "") | |
} | |
#anonymise test, MD5 | |
microbenchmark({ | |
anon_test_out <- c_anonymise(uniques, algorithm = "md5") | |
}) | |
# Unit: milliseconds | |
# min lq mean median uq max neval | |
# 77.73334 80.31 81.11888 80.88899 81.73885 85.85944 100 | |
#digest test, MD5, no serialisation, for-loop | |
microbenchmark({ | |
anon_test_out <- character(length(uniques)) | |
for(i in seq_along(uniques)){ | |
anon_test_out[i] <- digest(uniques[i], algo = "md5", serialize = FALSE) | |
} | |
}) | |
# Unit: seconds | |
# min lq mean median uq max neval | |
# 1.397521 1.400441 1.411395 1.402358 1.411605 1.461821 100 | |
#digest test, MD5, no serialisation, lapply | |
microbenchmark({ | |
anon_test_out <- unlist(lapply(uniques, digest, algo = "md5", serialize = FALSE)) | |
}) | |
# Unit: seconds | |
# min lq mean median uq max neval | |
# 1.277343 1.318001 1.33729 1.336168 1.356127 1.414562 100 | |
#anonymise test, SHA1 | |
microbenchmark({ | |
anon_test_out <- c_anonymise(uniques, algorithm = "sha1") | |
}) | |
# Unit: milliseconds | |
# min lq mean median uq max neval | |
# 89.7401 91.42565 95.07778 93.92603 96.38657 124.4629 100 | |
#digest test, SHA1, no serialisation, for-loop | |
microbenchmark({ | |
anon_test_out <- character(length(uniques)) | |
for(i in seq_along(uniques)){ | |
anon_test_out[i] <- digest(uniques[i], algo = "sha1", serialize = FALSE) | |
} | |
}) | |
# Unit: seconds | |
# min lq mean median uq max neval | |
# 1.395969 1.41004 1.421917 1.414723 1.432235 1.482069 100 | |
#digest test, SHA1, no serialisation, lapply | |
microbenchmark({ | |
anon_test_out <- unlist(lapply(uniques, digest, algo = "sha1", serialize = FALSE)) | |
}) | |
# Unit: seconds | |
# min lq mean median uq max neval | |
# 1.314194 1.369632 1.384138 1.382087 1.400361 1.480818 100 | |
#anonymise test, SHA256 | |
microbenchmark({ | |
anon_test_out <- c_anonymise(uniques, algorithm = "sha256") | |
}) | |
# Unit: milliseconds | |
# min lq mean median uq max neval | |
# 140.0948 143.5231 145.382 144.3923 145.9739 195.133 100 | |
#digest test, SHA256, no serialisation, for-loop | |
microbenchmark({ | |
anon_test_out <- character(length(uniques)) | |
for(i in seq_along(uniques)){ | |
anon_test_out[i] <- digest(uniques[i], algo = "sha256", serialize = FALSE) | |
} | |
}) | |
# Unit: seconds | |
# min lq mean median uq max neval | |
# 1.431169 1.476169 1.492009 1.495912 1.50535 1.556519 100 | |
#digest test, SHA256, no serialisation, lapply | |
microbenchmark({ | |
anon_test_out <- unlist(lapply(uniques, digest, algo = "sha256", serialize = FALSE)) | |
}) | |
# Unit: seconds | |
# min lq mean median uq max neval | |
# 1.419841 1.45139 1.470209 1.462869 1.481413 1.728109 100 | |
#anonymise test, SHA512 | |
microbenchmark({ | |
anon_test_out <- c_anonymise(uniques, algorithm = "sha512") | |
}) | |
# Unit: milliseconds | |
# min lq mean median uq max neval | |
# 258.1498 259.3524 260.6519 259.7233 260.6193 305.5327 100 | |
#digest test, SHA512, no serialisation, for-loop | |
microbenchmark({ | |
anon_test_out <- character(length(uniques)) | |
for(i in seq_along(uniques)){ | |
anon_test_out[i] <- digest(uniques[i], algo = "sha512", serialize = FALSE) | |
} | |
}) | |
# Unit: seconds | |
# min lq mean median uq max neval | |
# 1.307411 1.316431 1.334668 1.328401 1.342659 1.481539 100 | |
#digest test, SHA512, no serialisation, lapply | |
microbenchmark({ | |
anon_test_out <- unlist(lapply(uniques, digest, algo = "sha512", serialize = FALSE)) | |
}) | |
# Unit: seconds | |
# min lq mean median uq max neval | |
# 1.26656 1.302912 1.412134 1.31475 1.338408 1.993292 100 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment