Last active
July 10, 2023 23:37
-
-
Save nischalshrestha/672125b1327770ab7061bbf6918b4a62 to your computer and use it in GitHub Desktop.
Meta explorations in R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
string_code <- | |
" | |
diamonds |> | |
select(carat, cut, color, clarity, price) |> | |
group_by(color) |> | |
summarise(n = n(), price = mean(price)) | |
" | |
# split up a base pipe chain using simple regex, and return list of exprs | |
stringr::str_split(string_code, "\\|>") %>% | |
purrr::map(~ trimws(.x)) %>% | |
purrr::flatten_chr() %>% | |
purrr::map(~ rlang::parse_expr(.x)) | |
#> [[1]] | |
#> diamonds | |
#> | |
#> [[2]] | |
#> select(carat, cut, color, clarity, price) | |
#> | |
#> [[3]] | |
#> group_by(color) | |
#> | |
#> [[4]] | |
#> summarise(n = n(), price = mean(price)) | |
# LIMITATION: if student has |> in a string like this: | |
string_code <- | |
' | |
"This is a pipe: |>" |> | |
cat() | |
' | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# experimental way to split base |> calls | |
library(tidyverse) | |
# Recurse on a base pipe code and return a list of intermediate expressions. | |
# | |
# @param expr The parent expression | |
# @param top_expr The `character` of the first or top expression symbol/function | |
# @param expr_list The `[]` that maintains the intermediate expressions | |
# by default empty `[]` | |
# | |
# @return The `[expression]` | |
recurse_base_pipe <- function(expr, top_expr, expr_list = list()) { | |
if (length(expr) == 1) return(append(expr, expr_list)) | |
# grab the verb | |
verb_expr <- expr[[1]] | |
# base case if the verb is the first symbol/function call at the top | |
if (identical(verb_expr, top_expr)) { | |
return(append(expr, expr_list)) | |
} | |
# extract verb first argument expr (we will recurse on this) | |
first_arg_expr <- expr[[2]] | |
# extract arguments | |
args <- rlang::call_args(expr) | |
# if the first argument is not a call return | |
if (!length(args)) { | |
return(expr_list) | |
} | |
# if there are more than 1 arg, take args from 2nd and so on | |
if (length(args) > 1) { | |
args <- args[2:length(args)] | |
verb_call_without_first_arg <- rlang::call2(verb_expr, !!!args) | |
} else { | |
# otherwise, we just make a no argument call with the verb | |
verb_call_without_first_arg <- rlang::call2(verb_expr) | |
} | |
# set the new expr_list list | |
expr_list <- append(verb_call_without_first_arg, expr_list) | |
# recurse on the first argument expression | |
recurse_base_pipe(first_arg_expr, top_expr, expr_list) | |
} | |
# This splits a base pipe code into a list of its intermediate expressions. | |
# | |
# It relies on using `getParseData()` to determine what the first expression is | |
# at the top/beginning of the chain. For e.g., for `mtcars |> head()`, we | |
# determine `mtcars` to be the stopping point. | |
# | |
# Then it uses `recurse_base_pipe()` on the quoted version of the code | |
# to recursively figure out the intermediate expressions. | |
# | |
# @param string_code A `character()` of the code | |
# | |
# @return The `[expression]` | |
split_base_pipe <- function(string_code) { | |
parse_data <- getParseData(parse(text = string_code)) | |
# figure out what the starting expression of the chain is | |
top_expr <- parse_data[ | |
parse_data$token %in% c("SYMBOL", "SYMBOL_FUNCTION_CALL"), 'text' | |
][[1]] | |
top_expr <- rlang::sym(top_expr) | |
recurse_base_pipe( | |
rlang::parse_expr(string_code), | |
top_expr | |
) | |
} | |
# examples | |
# starting at a symbol | |
expr1 <- | |
"mtcars |> | |
group_by() |> | |
summarise(mean_mpg = mean(mpg))" | |
split_base_pipe(expr1) | |
#> [[1]] | |
#> mtcars | |
#> | |
#> [[2]] | |
#> group_by() | |
#> | |
#> [[3]] | |
#> summarise(mean_mpg = mean(mpg)) | |
# starting with a function call | |
expr2 <- | |
"data.frame( | |
fruit = c('apple', 'apple', 'orange', 'orange'), | |
cost = c(1.79, 2.79, 0.99, 1.50) | |
) |> | |
group_by(fruit) |> | |
summarise(mean_cost = mean(cost))" | |
split_base_pipe(expr2) | |
#> [[1]] | |
#> data.frame(fruit = c("apple", "apple", "orange", "orange"), cost = c(1.79, | |
#> 2.79, 0.99, 1.5)) | |
#> | |
#> [[2]] | |
#> group_by(fruit) | |
#> | |
#> [[3]] | |
#> summarise(mean_cost = mean(cost)) | |
# base pipes in args | |
expr3 <- | |
"foo('here is a string' |> sum()) |> | |
bar()" | |
split_base_pipe(expr3) | |
#> [[1]] | |
#> foo(sum("here is a string")) | |
#> | |
#> [[2]] | |
#> bar() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment