Last active
May 12, 2018 08:16
-
-
Save jtilly/f3fc89e2ab32efcfc15e94ccc101081f to your computer and use it in GitHub Desktop.
dim(preds) vs. dim(predict(...)): what the hell is LightGBM doing?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(lightgbm) | |
library(tidyverse) | |
rm(list = ls()) | |
# We load the default iris dataset shipped with R | |
data(iris) | |
iris = as_data_frame(iris) %>% | |
mutate(Species = as.numeric(factor(Species)) - 1) %>% | |
filter(Species < 2) %>% | |
group_by(Species) %>% | |
dplyr::slice(1:2) %>% | |
ungroup() | |
x = as.matrix(iris %>% select(-Species)) | |
y = iris %>% pull(Species) | |
dtrain <- lgb.Dataset(data = x, label = y) | |
custom_multiclass_obj = function(preds, dtrain) { | |
labels = getinfo(dtrain, "label") | |
# print preds | |
print(data_frame(preds)) | |
# preds is a matrix with rows corresponding to samples and colums corresponding to choices | |
preds = matrix(preds, nrow = length(labels)) | |
# to prevent overflow, normalize preds by row | |
preds = preds - apply(preds, 1, max) | |
prob = exp(preds) / rowSums(exp(preds)) | |
# compute gradient | |
grad = prob | |
grad[cbind(1:length(labels), labels + 1)] = grad[cbind(1:length(labels), labels + 1)] - 1 | |
# compute hessian (approximation) | |
hess = 2 * prob * (1 - prob) | |
return(list(grad = grad, hess = hess)) | |
} | |
# define custom metric | |
custom_multiclass_metric = function(preds, dtrain) { | |
labels = getinfo(dtrain, "label") | |
preds = matrix(preds, nrow = length(labels)) | |
preds = preds - apply(preds, 1, max) | |
prob = exp(preds) / rowSums(exp(preds)) | |
return(list(name = "error", | |
value = -mean(log(prob[cbind(1:length(labels), labels + 1)])), | |
higher_better = FALSE)) | |
} | |
setinfo(dtrain, "init_score", c(0, 0, 0, 0, 0, 0, 0, 0)) | |
# Estimate model with nrounds = 2, check out predictions at the beginning of round 2: | |
model1 <- lgb.train(list(), | |
dtrain, | |
nrounds = 2, | |
min_data = 1, | |
learning_rate = 1, | |
num_leaves = 2, | |
objective = custom_multiclass_obj, | |
eval = custom_multiclass_metric, | |
num_class = 2) | |
# # A tibble: 8 x 1 | |
# preds | |
# <dbl> | |
# 1 0.333 | |
# 2 0.333 | |
# 3 -1.000 | |
# 4 0.333 | |
# 5 -0.333 | |
# 6 -0.333 | |
# 7 1.000 | |
# 8 -0.333 | |
# Estimate model with nrounds = 1, check out final predictions; because the learning_rate is equal to one, these | |
# should be identical to the predictions from above | |
model2 <- lgb.train(list(), | |
dtrain, | |
nrounds = 1, | |
min_data = 1, | |
learning_rate = 1, | |
num_leaves = 2, | |
objective = custom_multiclass_obj, | |
eval = custom_multiclass_metric, | |
num_class = 2) | |
print(data_frame(predict(model2, x))) | |
# `predict(model2, x)` | |
# <dbl> | |
# 1 0.333 | |
# 2 -0.333 | |
# 3 0.333 | |
# 4 -0.333 | |
# 5 -1.000 | |
# 6 1.000 | |
# 7 0.333 | |
# 8 -0.333 | |
# Note that the order is wrong! We need a reshape: | |
print(data_frame(reshape = predict(model2, x, reshape = TRUE) %>% as.vector())) | |
# # A tibble: 8 x 1 | |
# reshape | |
# <dbl> | |
# 1 0.333 | |
# 2 0.333 | |
# 3 -1.000 | |
# 4 0.333 | |
# 5 -0.333 | |
# 6 -0.333 | |
# 7 1.000 | |
# 8 -0.333 | |
# Conclusion: predictions that we obtain from R's predict function and the vector called `preds` | |
# inside the custom objective function are stored in different orders! | |
# This also affects how we deal with base margins! Base margins follow the logic of R's predict | |
# function. | |
# All of this is due to the lovely helper functions RowFunctionFromDenseMatric in C Api that | |
# appears to be applied inconsistently. | |
# Check base margins | |
# Estimate model with nrounds = 2, check out predictions at the beginning of round 2: | |
model1 <- lgb.train(list(), | |
dtrain, | |
nrounds = 2, | |
min_data = 1, | |
learning_rate = 1, | |
num_leaves = 2, | |
objective = custom_multiclass_obj, | |
eval = custom_multiclass_metric, | |
num_class = 2) | |
predict(model1, data = x, num_iteration = 1, reshape = TRUE) %>% as.vector() | |
# [1] 0.3333333 0.3333333 -1.0000000 0.3333333 -0.3333333 -0.3333333 1.0000000 -0.3333333 | |
base_margin = c(0.333, 0.333, -1.000, 0.333, -0.333, -0.333, 1.000, -0.333) | |
setinfo(dtrain, "init_score", base_margin) | |
# Estimate model with nrounds = 1, check out final predictions; because the learning_rate is equal to one, these | |
# should be identical to the predictions from above | |
model2 <- lgb.train(list(), | |
dtrain, | |
nrounds = 1, | |
min_data = 1, | |
learning_rate = 1, | |
num_leaves = 2, | |
objective = custom_multiclass_obj, | |
eval = custom_multiclass_metric, | |
num_class = 2) | |
print(data_frame(predict(model1, x, reshape = TRUE) %>% as.vector())) | |
print(data_frame(predict(model2, x, reshape = TRUE) %>% as.vector() + base_margin)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment