Skip to content

Instantly share code, notes, and snippets.

@EmilHvitfeldt
Last active November 9, 2024 19:30
Show Gist options
  • Save EmilHvitfeldt/2b435eceeb508c1cc273039a7c81e7f4 to your computer and use it in GitHub Desktop.
Save EmilHvitfeldt/2b435eceeb508c1cc273039a7c81e7f4 to your computer and use it in GitHub Desktop.
figure out if we are doing something wrong with lightgbm
library(tidymodels)
library(embed)
library(bonsai)

train_set <- ames |>
  slice_sample(n = 50000, replace = TRUE) |>
  rename(survey_target = Street)

lgb_model_recipe <- recipe(survey_target ~ ., train_set) |> 
  step_novel(all_nominal_predictors()) |> 
  step_unknown(all_nominal_predictors()) |>
  step_dummy(all_nominal_predictors()) |>
  step_zv(all_predictors()) |>
  step_interact(~starts_with("MS_"):starts_with("Ne"):starts_with("Bsmt_"))

# Model specification
lgb_spec <- boost_tree() |>
  set_engine(
    "lightgbm",
     params = list(is_unbalance = "true"),
     eval = "auc"
  ) |>
  set_mode("classification")

wf_spec <- workflow(lgb_model_recipe, lgb_spec)

tictoc::tic(msg = "workflow")
wf_fit <- fit(wf_spec, train_set)
tictoc::toc()
#> workflow: 21.974 sec elapsed

tictoc::tic(msg = "recipe")
prepped <- prep(lgb_model_recipe, train_set) |>
  bake(NULL)
tictoc::toc()
#> recipe: 11.779 sec elapsed

library(lightgbm)

tictoc::tic(msg = "parsnip")
parsnip_fit <- fit_xy(lgb_spec, prepped |> select(-survey_target), prepped |> pull(survey_target))
tictoc::toc()
#> parsnip: 8.645 sec elapsed

tictoc::tic(msg = "lightgbm")
junk <- utils::capture.output({
dtrain <- lgb.Dataset(as.matrix(prepped |> select(-survey_target)), label = train_set$survey_target)
model <- lgb.train(
  params = list(
    objective = "binary", 
    metric = "auc"
  ),
  data = dtrain
)
})
tictoc::toc()
#> lightgbm: 6.251 sec elapsed

tictoc::tic(msg = "lightgbm - parsnip-like")
junk <- utils::capture.output({
dtrain <- lgb.Dataset(as.matrix(prepped |> select(-survey_target)), label = train_set$survey_target)
model <- lgb.train(
  params = list(
    num_iterations = 100, 
    learning_rate = 0.1, 
    max_depth = -1, 
    feature_fraction_bynode = 1, 
    min_data_in_leaf = 20, 
    min_gain_to_split = 0, 
    bagging_fraction = 1, 
    params = list(is_unbalance = "true"), 
    seed = 61689L, 
    deterministic = TRUE, 
    num_class = 1,
    objective = "binary"
  ),
  data = dtrain
)
})
tictoc::toc()
#> lightgbm - parsnip-like: 6.071 sec elapsed

Created on 2024-11-09 with reprex v2.1.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment