Skip to content

Instantly share code, notes, and snippets.

@jesskfullwood
Last active May 7, 2018 14:17
Show Gist options
  • Save jesskfullwood/2349d8306c708d879d5689fe611daeea to your computer and use it in GitHub Desktop.
Save jesskfullwood/2349d8306c708d879d5689fe611daeea to your computer and use it in GitHub Desktop.
Write benchmarks - R and Rust
## R Output
[1] "generate:"
Time difference of 2.086496 mins
[1] "fwrite:"
Written 40.3% of 10000000 rows in 2 secs using 4 threads. anyBufferGrown=no; maxBuffUsed=47%. Finished in 2
Written 66.5% of 10000000 rows in 3 secs using 4 threads. anyBufferGrown=no; maxBuffUsed=47%. Finished in 1
Written 92.6% of 10000000 rows in 4 secs using 4 threads. anyBufferGrown=no; maxBuffUsed=47%. Finished in 0
Time difference of 3.918112 secs
NULL
[1] "write.csv:"
Time difference of 1.7461 mins
NULL
## Rust output
Wrote 10000000 rows (682MB) in 26.585s (25.676MB/s)
# R version
$ R --version
R version 3.4.4 (2018-03-15) -- "Someone to Lean On"
# Start R repl and install data.table package
$ R
> install.packages("data.table")
# Run
$ Rscript writetest.R
library("data.table")
## https://stackoverflow.com/questions/42734547/generating-random-strings
randWords <- function(n) {
do.call(paste0, replicate(10, sample(LETTERS, n, TRUE), FALSE))
}
genData <- function(n) {
intRange <- -1000000:1000000
data.table(
first=runif(n),
second=randWords(n),
third=sample(intRange, n, replace=T),
fourth=sample(intRange, n, replace=T),
fifth=randWords(n),
sixth=runif(n)
)
}
makeTmp <- function() {
ext <- randWords(1)[1]
dir <- paste("/tmp/writetest-", ext, sep="")
dir.create(dir, recursive=T)
dir
}
timeIt <- function(f, ...) {
start = Sys.time()
out = f(...)
end = Sys.time()
print(end - start)
out
}
dirname <- makeTmp()
print("generate:")
data <- timeIt(genData, 10000000)
filename = paste(dirname, "/fwrite.csv", sep="")
print("fwrite:")
timeIt(fwrite, data, file=filename)
filename = paste(dirname, "/write.csv", sep="")
print("write.csv:")
timeIt(write.csv, data, file=filename)
unlink(dirname, recursive=T) ## remove dir. remove this line if you wish to inspect the output
extern crate rand;
extern crate csv;
extern crate tempdir;
use rand::Rng;
use std::time::{Instant, Duration};
use std::fs::File;
const NROWS: usize = 10_000_000;
fn main() {
let tmp = tempdir::TempDir::new("writetest").unwrap();
let path = tmp.path().join("write-test.csv");
let data = gen_data(NROWS);
let mut writer = csv::Writer::from_path(&path).unwrap();
let start = Instant::now();
for record in data {
writer.serialize(record).unwrap();
}
writer.flush().unwrap();
let duration = Instant::now() - start;
let duration_flt = duration.as_secs() as f64 + (duration.subsec_nanos() as f64 / 1e9);
let f = File::open(&path).unwrap();
let bytes_written = f.metadata().unwrap().len();
let rate = bytes_written as f64 / duration_flt / (1024.0 * 1024.0);
println!("Wrote {} rows ({}MB) in {:.03}s ({:.03}MB/s)", NROWS, bytes_written / (1024 * 1024), duration_flt, rate);
}
fn gen_data(n: usize) -> Vec<(f64, String, i32, i8, String, f64)> {
let mut rng = rand::thread_rng();
(0..n).map(|_| (
rng.gen(),
rand_str(&mut rng),
rng.gen_range(-1_000_000, 1_000_000),
rng.gen(),
rand_str(&mut rng),
rng.gen()
)).collect()
}
fn rand_str(rng: &mut rand::ThreadRng) -> String {
let n = 10;
let mut out = String::with_capacity(n);
(0..n).for_each(|_| out.push(rng.gen_range('a' as u8, 'z' as u8 + 1) as char));
out
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment