bgall · May 19, 2022 08:02
diff --git a/benchmarking_dummy_functions b/benchmarking_dummy_functions
 # Compare the performance of two purportedly-fast ways of generating dummy variables from character vector.
 # NOTE: fastDummies retains the original variable by default while modeldb does not

 # Dependencies
 library(microbenchmark)
 library(fastDummies)
 library(modeldb)
 library(dplyr)

 # Simulate data: 1 million rows, 1 variable with 26 unique values
 set.seed(123)
 df <- data.frame(x = sample(LETTERS, size = 1000000, replace = TRUE))

 # Benchmarks
 t <- 1000

 result_modeldb <- microbenchmark(
        z <- df %>% modeldb::add_dummy_variables(x = x, auto_values = T),
        times = t)

 result_fastdums <- microbenchmark::microbenchmark(
         z <- df %>% fastDummies::dummy_cols(remove_first_dummy = TRUE),
         times = t)

 # Compare
 result_modeldb %>% bind_rows(result_fastdums)
	# Compare the performance of two purportedly-fast ways of generating dummy variables from character vector.
	# NOTE: fastDummies retains the original variable by default while modeldb does not

	# Dependencies
	library(microbenchmark)
	library(fastDummies)
	library(modeldb)
	library(dplyr)

	# Simulate data: 1 million rows, 1 variable with 26 unique values
	set.seed(123)
	df <- data.frame(x = sample(LETTERS, size = 1000000, replace = TRUE))

	# Benchmarks
	t <- 1000

	result_modeldb <- microbenchmark(
	z <- df %>% modeldb::add_dummy_variables(x = x, auto_values = T),
	times = t)

	result_fastdums <- microbenchmark::microbenchmark(
	z <- df %>% fastDummies::dummy_cols(remove_first_dummy = TRUE),
	times = t)

	# Compare
	result_modeldb %>% bind_rows(result_fastdums)
No results found