library(tidymodels)
library(embed)
library(bonsai)
train_set <- ames |>
slice_sample(n = 50000, replace = TRUE) |>
rename(survey_target = Street)
lgb_model_recipe <- recipe(survey_target ~ ., train_set) |>
step_novel(all_nominal_predictors()) |>
step_unknown(all_nominal_predictors()) |>
step_dummy(all_nominal_predictors()) |>
step_zv(all_predictors()) |>
step_interact(~starts_with("MS_"):starts_with("Ne"):starts_with("Bsmt_"))
# Model specification
lgb_spec <- boost_tree() |>
set_engine(
"lightgbm",
params = list(is_unbalance = "true"),
eval = "auc"
) |>
set_mode("classification")
wf_spec <- workflow(lgb_model_recipe, lgb_spec)
tictoc::tic(msg = "workflow")
wf_fit <- fit(wf_spec, train_set)
tictoc::toc()
#> workflow: 21.974 sec elapsed
tictoc::tic(msg = "recipe")
prepped <- prep(lgb_model_recipe, train_set) |>
bake(NULL)
tictoc::toc()
#> recipe: 11.779 sec elapsed
library(lightgbm)
tictoc::tic(msg = "parsnip")
parsnip_fit <- fit_xy(lgb_spec, prepped |> select(-survey_target), prepped |> pull(survey_target))
tictoc::toc()
#> parsnip: 8.645 sec elapsed
tictoc::tic(msg = "lightgbm")
junk <- utils::capture.output({
dtrain <- lgb.Dataset(as.matrix(prepped |> select(-survey_target)), label = train_set$survey_target)
model <- lgb.train(
params = list(
objective = "binary",
metric = "auc"
),
data = dtrain
)
})
tictoc::toc()
#> lightgbm: 6.251 sec elapsed
tictoc::tic(msg = "lightgbm - parsnip-like")
junk <- utils::capture.output({
dtrain <- lgb.Dataset(as.matrix(prepped |> select(-survey_target)), label = train_set$survey_target)
model <- lgb.train(
params = list(
num_iterations = 100,
learning_rate = 0.1,
max_depth = -1,
feature_fraction_bynode = 1,
min_data_in_leaf = 20,
min_gain_to_split = 0,
bagging_fraction = 1,
params = list(is_unbalance = "true"),
seed = 61689L,
deterministic = TRUE,
num_class = 1,
objective = "binary"
),
data = dtrain
)
})
tictoc::toc()
#> lightgbm - parsnip-like: 6.071 sec elapsed
Created on 2024-11-09 with reprex v2.1.0