library(tidyverse)
# overall, the approach is to try and make a function that takes in a vector
# and returns some output, in this case a list
create_dummies <- function(vec, col_name = NULL) {
# %||% is the "null pipe", this is the equivalent of:
# if (is.null(col_name)){
# col_name <- deparse(substitute(vec))
# }
col_name <- col_name %||% deparse(substitute(vec))
unique_vals <- na.omit(unique(vec))
dummy_list <- map(
.x = unique_vals,
# this is a bit curly, and also issues a warning
# but it says where x is equal to unique non missings, AND isn't missing
.f = \(x) as.numeric(vec == x & !is.na(vec))
)
# set the names to be named after the unique values
# this could also be converted into a number sequence with
# seq_along(unique_vals) instead of "unique_vals" here
# setNames(dummy_list, paste0(col_name, "_", seq_along(unique_vals)))
setNames(dummy_list, paste0(col_name, "_", unique_vals))
}
x <- c("1", NA, "2", NA, "3")
# finds "x",
create_dummies(x)
#> $x_1
#> [1] 1 0 0 0 0
#>
#> $x_2
#> [1] 0 0 1 0 0
#>
#> $x_3
#> [1] 0 0 0 0 1
# or any name
any_name <- c("1", NA, "2", NA, "3")
create_dummies(any_name)
#> $any_name_1
#> [1] 1 0 0 0 0
#>
#> $any_name_2
#> [1] 0 0 1 0 0
#>
#> $any_name_3
#> [1] 0 0 0 0 1
# but will also allow you to specify the name
create_dummies(x, "zz")
#> $zz_1
#> [1] 1 0 0 0 0
#>
#> $zz_2
#> [1] 0 0 1 0 0
#>
#> $zz_3
#> [1] 0 0 0 0 1
# Add dummies to a data frame
add_dummies <- function(data, cols = where(is.character)) {
col_names <- data |>
select({{ cols }}) |>
names()
dummy_results <- map(
.x = col_names,
.f = \(col) create_dummies(data[[col]], col)
)
# dummy results is a nested list
# so we flatten it out
flatten_dummy <- list_flatten(dummy_results)
# then bind it
data |>
bind_cols(flatten_dummy)
}
# examples
dat <- tibble(
x = c("1", NA, "2", NA, "3"),
x2 = c("1", NA, "1", NA, "3"),
y = c("A", "B", "A", "B", "C"),
w = c("X", "Y", "X", "Y", "Z")
)
# default is all character columns
dat |>
add_dummies()
#> # A tibble: 5 × 15
#> x x2 y w x_1 x_2 x_3 x2_1 x2_3 y_A y_B y_C w_X
#> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 A X 1 0 0 1 0 1 0 0 1
#> 2 <NA> <NA> B Y 0 0 0 0 0 0 1 0 0
#> 3 2 1 A X 0 1 0 1 0 1 0 0 1
#> 4 <NA> <NA> B Y 0 0 0 0 0 0 1 0 0
#> 5 3 3 C Z 0 0 1 0 1 0 0 1 0
#> # ℹ 2 more variables: w_Y <dbl>, w_Z <dbl>
# specific columns
dat |>
add_dummies(c(x, y))
#> # A tibble: 5 × 10
#> x x2 y w x_1 x_2 x_3 y_A y_B y_C
#> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 A X 1 0 0 1 0 0
#> 2 <NA> <NA> B Y 0 0 0 0 1 0
#> 3 2 1 A X 0 1 0 1 0 0
#> 4 <NA> <NA> B Y 0 0 0 0 1 0
#> 5 3 3 C Z 0 0 1 0 0 1
dat |>
add_dummies(c(x2))
#> # A tibble: 5 × 6
#> x x2 y w x2_1 x2_3
#> <chr> <chr> <chr> <chr> <dbl> <dbl>
#> 1 1 1 A X 1 0
#> 2 <NA> <NA> B Y 0 0
#> 3 2 1 A X 1 0
#> 4 <NA> <NA> B Y 0 0
#> 5 3 3 C Z 0 1
Created on 2025-07-17 with reprex v2.1.1
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.5.1 (2025-06-13)
#> os macOS Sonoma 14.5
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Australia/Brisbane
#> date 2025-07-17
#> pandoc 3.4 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
#> quarto 1.7.31 @ /usr/local/bin/quarto
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> cli 3.6.5 2025-04-23 [1] CRAN (R 4.5.0)
#> digest 0.6.37 2024-08-19 [1] CRAN (R 4.5.0)
#> dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.5.0)
#> evaluate 1.0.4 2025-06-18 [1] CRAN (R 4.5.0)
#> farver 2.1.2 2024-05-13 [1] CRAN (R 4.5.0)
#> fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.0)
#> forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.5.0)
#> fs 1.6.6 2025-04-12 [1] CRAN (R 4.5.0)
#> generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.0)
#> ggplot2 * 3.5.2 2025-04-09 [1] CRAN (R 4.5.0)
#> glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.0)
#> gtable 0.3.6 2024-10-25 [1] CRAN (R 4.5.0)
#> hms 1.1.3 2023-03-21 [1] CRAN (R 4.5.0)
#> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.5.0)
#> knitr 1.50 2025-03-16 [1] CRAN (R 4.5.0)
#> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.5.0)
#> lubridate * 1.9.4 2024-12-08 [1] CRAN (R 4.5.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.5.0)
#> pillar 1.11.0 2025-07-04 [1] CRAN (R 4.5.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.5.0)
#> purrr * 1.0.4.9000 2025-05-04 [1] Github (tidyverse/purrr@9c8beb4)
#> R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.0)
#> RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.5.0)
#> readr * 2.1.5 2024-01-10 [1] CRAN (R 4.5.0)
#> reprex 2.1.1 2024-07-06 [1] CRAN (R 4.5.0)
#> rlang 1.1.6 2025-04-11 [1] CRAN (R 4.5.0)
#> rmarkdown 2.29 2024-11-04 [1] CRAN (R 4.5.0)
#> rstudioapi 0.17.1 2024-10-22 [1] CRAN (R 4.5.0)
#> scales 1.4.0 2025-04-24 [1] CRAN (R 4.5.0)
#> sessioninfo 1.2.3 2025-02-05 [1] CRAN (R 4.5.0)
#> stringi 1.8.7 2025-03-27 [1] CRAN (R 4.5.0)
#> stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.5.0)
#> tibble * 3.3.0 2025-06-08 [1] CRAN (R 4.5.0)
#> tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.5.0)
#> tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.0)
#> tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.5.0)
#> timechange 0.3.0 2024-01-18 [1] CRAN (R 4.5.0)
#> tzdb 0.5.0 2025-03-15 [1] CRAN (R 4.5.0)
#> utf8 1.2.6 2025-06-08 [1] CRAN (R 4.5.0)
#> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.5.0)
#> withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.0)
#> xfun 0.52 2025-04-02 [1] CRAN (R 4.5.0)
#> yaml 2.3.10 2024-07-26 [1] CRAN (R 4.5.0)
#>
#> [1] /Users/nick_1/Library/R/arm64/4.5/library
#> [2] /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library
#> * ── Packages attached to the search path.
#>
#> ──────────────────────────────────────────────────────────────────────────────