Last active
May 7, 2018 14:17
-
-
Save jesskfullwood/2349d8306c708d879d5689fe611daeea to your computer and use it in GitHub Desktop.
Write benchmarks - R and Rust
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## R Output | |
[1] "generate:" | |
Time difference of 2.086496 mins | |
[1] "fwrite:" | |
Written 40.3% of 10000000 rows in 2 secs using 4 threads. anyBufferGrown=no; maxBuffUsed=47%. Finished in 2 | |
Written 66.5% of 10000000 rows in 3 secs using 4 threads. anyBufferGrown=no; maxBuffUsed=47%. Finished in 1 | |
Written 92.6% of 10000000 rows in 4 secs using 4 threads. anyBufferGrown=no; maxBuffUsed=47%. Finished in 0 | |
Time difference of 3.918112 secs | |
NULL | |
[1] "write.csv:" | |
Time difference of 1.7461 mins | |
NULL | |
## Rust output | |
Wrote 10000000 rows (682MB) in 26.585s (25.676MB/s) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# R version | |
$ R --version | |
R version 3.4.4 (2018-03-15) -- "Someone to Lean On" | |
# Start R repl and install data.table package | |
$ R | |
> install.packages("data.table") | |
# Run | |
$ Rscript writetest.R |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("data.table") | |
## https://stackoverflow.com/questions/42734547/generating-random-strings | |
randWords <- function(n) { | |
do.call(paste0, replicate(10, sample(LETTERS, n, TRUE), FALSE)) | |
} | |
genData <- function(n) { | |
intRange <- -1000000:1000000 | |
data.table( | |
first=runif(n), | |
second=randWords(n), | |
third=sample(intRange, n, replace=T), | |
fourth=sample(intRange, n, replace=T), | |
fifth=randWords(n), | |
sixth=runif(n) | |
) | |
} | |
makeTmp <- function() { | |
ext <- randWords(1)[1] | |
dir <- paste("/tmp/writetest-", ext, sep="") | |
dir.create(dir, recursive=T) | |
dir | |
} | |
timeIt <- function(f, ...) { | |
start = Sys.time() | |
out = f(...) | |
end = Sys.time() | |
print(end - start) | |
out | |
} | |
dirname <- makeTmp() | |
print("generate:") | |
data <- timeIt(genData, 10000000) | |
filename = paste(dirname, "/fwrite.csv", sep="") | |
print("fwrite:") | |
timeIt(fwrite, data, file=filename) | |
filename = paste(dirname, "/write.csv", sep="") | |
print("write.csv:") | |
timeIt(write.csv, data, file=filename) | |
unlink(dirname, recursive=T) ## remove dir. remove this line if you wish to inspect the output |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate rand; | |
extern crate csv; | |
extern crate tempdir; | |
use rand::Rng; | |
use std::time::{Instant, Duration}; | |
use std::fs::File; | |
const NROWS: usize = 10_000_000; | |
fn main() { | |
let tmp = tempdir::TempDir::new("writetest").unwrap(); | |
let path = tmp.path().join("write-test.csv"); | |
let data = gen_data(NROWS); | |
let mut writer = csv::Writer::from_path(&path).unwrap(); | |
let start = Instant::now(); | |
for record in data { | |
writer.serialize(record).unwrap(); | |
} | |
writer.flush().unwrap(); | |
let duration = Instant::now() - start; | |
let duration_flt = duration.as_secs() as f64 + (duration.subsec_nanos() as f64 / 1e9); | |
let f = File::open(&path).unwrap(); | |
let bytes_written = f.metadata().unwrap().len(); | |
let rate = bytes_written as f64 / duration_flt / (1024.0 * 1024.0); | |
println!("Wrote {} rows ({}MB) in {:.03}s ({:.03}MB/s)", NROWS, bytes_written / (1024 * 1024), duration_flt, rate); | |
} | |
fn gen_data(n: usize) -> Vec<(f64, String, i32, i8, String, f64)> { | |
let mut rng = rand::thread_rng(); | |
(0..n).map(|_| ( | |
rng.gen(), | |
rand_str(&mut rng), | |
rng.gen_range(-1_000_000, 1_000_000), | |
rng.gen(), | |
rand_str(&mut rng), | |
rng.gen() | |
)).collect() | |
} | |
fn rand_str(rng: &mut rand::ThreadRng) -> String { | |
let n = 10; | |
let mut out = String::with_capacity(n); | |
(0..n).for_each(|_| out.push(rng.gen_range('a' as u8, 'z' as u8 + 1) as char)); | |
out | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment