-
-
Save venescu/5747aeb7e4962041557778a02c52da7c to your computer and use it in GitHub Desktop.
Automated machine learning H20.ai in R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install H2O packages | |
if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) } | |
if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") } | |
pkgs <- c("RCurl","jsonlite") | |
for (pkg in pkgs) { | |
if (! (pkg %in% rownames(installed.packages()))) { install.packages(pkg) } | |
} | |
install.packages("h2o", type="source", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/latest_stable_R"))) | |
library(h2o) | |
localH2O = h2o.init() | |
demo(h2o.kmeans) | |
library(h2o) | |
h2o.init() | |
# Import a sample binary outcome train/test set into H2O | |
train <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv") | |
test <- h2o.importFile("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv") | |
# Identify predictors and response | |
y <- "response" | |
x <- setdiff(names(train), y) | |
# For binary classification, response should be a factor | |
train[, y] <- as.factor(train[, y]) | |
test[, y] <- as.factor(test[, y]) | |
# Run AutoML for 20 base models (limited to 1 hour max runtime by default) | |
aml <- h2o.automl(x = x, y = y, | |
training_frame = train, | |
max_models = 20, | |
seed = 1) | |
# View the AutoML Leaderboard | |
lb <- aml@leaderboard | |
print(lb, n = nrow(lb)) # Print all rows instead of default (6 rows) | |
# model_id auc logloss mean_per_class_error rmse mse | |
# 1 StackedEnsemble_AllModels_AutoML_20181210_150447 0.7895453 0.5516022 0.3250365 0.4323464 0.1869234 | |
# 2 StackedEnsemble_BestOfFamily_AutoML_20181210_150447 0.7882530 0.5526024 0.3239841 0.4328491 0.1873584 | |
# 3 XGBoost_1_AutoML_20181210_150447 0.7846510 0.5575305 0.3254707 0.4349489 0.1891806 | |
# 4 XGBoost_grid_1_AutoML_20181210_150447_model_4 0.7835232 0.5578542 0.3188188 0.4352486 0.1894413 | |
# 5 XGBoost_grid_1_AutoML_20181210_150447_model_3 0.7830043 0.5596125 0.3250808 0.4357077 0.1898412 | |
# 6 XGBoost_2_AutoML_20181210_150447 0.7813603 0.5588797 0.3470738 0.4359074 0.1900153 | |
# 7 XGBoost_3_AutoML_20181210_150447 0.7808475 0.5595886 0.3307386 0.4361295 0.1902090 | |
# 8 GBM_5_AutoML_20181210_150447 0.7808366 0.5599029 0.3408479 0.4361915 0.1902630 | |
# 9 GBM_2_AutoML_20181210_150447 0.7800361 0.5598060 0.3399258 0.4364149 0.1904580 | |
# 10 GBM_1_AutoML_20181210_150447 0.7798274 0.5608570 0.3350957 0.4366159 0.1906335 | |
# 11 GBM_3_AutoML_20181210_150447 0.7786685 0.5617903 0.3255378 0.4371886 0.1911339 | |
# 12 XGBoost_grid_1_AutoML_20181210_150447_model_2 0.7744105 0.5750165 0.3228112 0.4427003 0.1959836 | |
# 13 GBM_4_AutoML_20181210_150447 0.7714260 0.5697120 0.3374203 0.4410703 0.1945430 | |
# 14 GBM_grid_1_AutoML_20181210_150447_model_1 0.7697524 0.5725826 0.3443314 0.4424524 0.1957641 | |
# 15 GBM_grid_1_AutoML_20181210_150447_model_2 0.7543664 0.9185673 0.3558550 0.4966377 0.2466490 | |
# 16 DRF_1_AutoML_20181210_150447 0.7428924 0.5958832 0.3554027 0.4527742 0.2050045 | |
# 17 XRT_1_AutoML_20181210_150447 0.7420910 0.5993457 0.3565826 0.4531168 0.2053148 | |
# 18 DeepLearning_grid_1_AutoML_20181210_150447_model_2 0.7388505 0.6012286 0.3695292 0.4555318 0.2075092 | |
# 19 XGBoost_grid_1_AutoML_20181210_150447_model_1 0.7257836 0.6013126 0.3820490 0.4565541 0.2084417 | |
# 20 DeepLearning_1_AutoML_20181210_150447 0.6979292 0.6339217 0.3979403 0.4692373 0.2201836 | |
# 21 DeepLearning_grid_1_AutoML_20181210_150447_model_1 0.6847773 0.6694364 0.4081802 0.4799664 0.2303678 | |
# 22 GLM_grid_1_AutoML_20181210_150447_model_1 0.6826481 0.6385205 0.3972341 0.4726827 0.2234290 | |
# | |
# [22 rows x 6 columns] | |
# The leader model is stored here | |
aml@leader |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment