Revisions
-
nagadomi revised this gist
Aug 7, 2015 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -39,7 +39,7 @@ def f(param): 'nthread': 4, 'silent': 1, 'n_estimators': 1000, 'objective': 'multi:softprob', 'max_depth': hp.quniform('max_depth', 1, 10, 1), 'learning_rate': hp.quniform("eta", 0.001, 0.1, 0.001), 'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01), @@ -61,7 +61,7 @@ def f(param): print confusion_matrix(testY, predict) # **best param # {'colsample_bytree': 0.5, 'silent': 1, 'learning_rate': 0.063, 'nthread': 4, 'min_child_weight': 1.0, 'n_estimators': 168, 'subsample': 0.78, 'score': 0.1174425, 'objective': 'multi:softprob', 'max_depth': 3.0, 'gamma': 0.1} # [[25 0 0] # [ 0 23 2] # [ 0 0 25]] -
nagadomi revised this gist
Aug 7, 2015 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -49,7 +49,7 @@ def f(param): 'min_child_weight': hp.quniform('min_child_weight', 1.0, 20.0, 0.1) } fmin(f, param_space, algo=tpe.suggest, max_evals=200) params = sorted(params, key=lambda param : param['score']) best = params[0] print("**best param") @@ -61,7 +61,7 @@ def f(param): print confusion_matrix(testY, predict) # **best param # {'colsample_bytree': 0.5, 'silent': 1, 'learning_rate': 0.063, 'nthread': 4, 'min_child_weight': 1.0, 'n_estimators': 168, 'subsample': 0.78, 'score': 0.1174425, 'objective': 'binary:logistic', 'max_depth': 3.0, 'gamma': 0.1} # [[25 0 0] # [ 0 23 2] # [ 0 0 25]] -
nagadomi revised this gist
Aug 7, 2015 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -40,7 +40,7 @@ def f(param): 'silent': 1, 'n_estimators': 1000, 'objective': 'binary:logistic', 'max_depth': hp.quniform('max_depth', 1, 10, 1), 'learning_rate': hp.quniform("eta", 0.001, 0.1, 0.001), 'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01), #'colsample_bytree': hp.quniform('colsample_bytree', 0.1, 1.0, 0.01), @@ -61,7 +61,7 @@ def f(param): print confusion_matrix(testY, predict) # **best param # {'colsample_bytree': 0.75, 'silent': 1, 'learning_rate': 0.028, 'nthread': 4, 'min_child_weight': 1.1, 'n_estimators': 276, 'subsample': 1.0, 'score': 0.1372892, 'objective': 'binary:logistic', 'max_depth': 9.0, 'gamma': 0.30000000000000004} # [[25 0 0] # [ 0 23 2] # [ 0 0 25]] -
nagadomi revised this gist
Aug 7, 2015 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,6 +6,7 @@ from sklearn import cross_validation from sklearn.metrics import confusion_matrix np.random.seed(71) iris = datasets.load_iris() trainX = iris.data[0::2,:] trainY = iris.target[0::2] @@ -48,7 +49,6 @@ def f(param): 'min_child_weight': hp.quniform('min_child_weight', 1.0, 20.0, 0.1) } fmin(f, param_space, algo=tpe.suggest, max_evals=100) params = sorted(params, key=lambda param : param['score']) best = params[0] -
nagadomi revised this gist
Aug 7, 2015 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -40,7 +40,7 @@ def f(param): 'n_estimators': 1000, 'objective': 'binary:logistic', 'max_depth': hp.quniform('max_depth', 3, 10, 1), 'learning_rate': hp.quniform("eta", 0.001, 0.1, 0.001), 'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01), #'colsample_bytree': hp.quniform('colsample_bytree', 0.1, 1.0, 0.01), 'colsample_bytree': hp.quniform('colsample_bytree', 0.25, 1.0, 0.25), # iris has only 4 feat. 1/4 = 0.25 @@ -61,7 +61,7 @@ def f(param): print confusion_matrix(testY, predict) # **best param # {'colsample_bytree': 0.75, 'silent': 1, 'learning_rate': 0.059000000000000004, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 192, 'subsample': 0.67, 'score': 0.13530040000000002, 'objective': 'binary:logistic', 'max_depth': 3.0, 'gamma': 0.1} # [[25 0 0] # [ 0 23 2] # [ 0 0 25]] -
nagadomi revised this gist
Aug 7, 2015 . 1 changed file with 9 additions and 11 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,14 +6,19 @@ from sklearn import cross_validation from sklearn.metrics import confusion_matrix iris = datasets.load_iris() trainX = iris.data[0::2,:] trainY = iris.target[0::2] testX = iris.data[1::2,:] testY = iris.target[1::2] K = 10 kfold = cross_validation.KFold(n=len(trainX), n_folds=K, shuffle=True, random_state=0) params = [] def f(param): score = 0 iteration = 0 for train_index, test_index in kfold: xgb_model = xgb.XGBClassifier(**param) xgb_model.fit(trainX[train_index], trainY[train_index], eval_set=[(trainX[test_index], trainY[test_index])], @@ -29,12 +34,6 @@ def f(param): return param["score"] param_space = { 'nthread': 4, 'silent': 1, @@ -61,9 +60,8 @@ def f(param): predict = xgb_model.predict(testX) print confusion_matrix(testY, predict) # **best param # {'colsample_bytree': 1.0, 'silent': 1, 'learning_rate': 0.007, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 805, 'subsample': 0.93, 'score': 0.14176099999999997, 'objective': 'binary:logistic', 'max_depth': 5.0, 'gamma': 0.4} # [[25 0 0] # [ 0 23 2] # [ 0 0 25]] -
nagadomi revised this gist
Aug 7, 2015 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,7 +9,6 @@ K = 10 params = [] def f(param): kf = cross_validation.KFold(n=len(trainX), n_folds=K, shuffle=True, random_state=0) score = 0 @@ -64,7 +63,7 @@ def f(param): #**best param # {'colsample_bytree': 1.0, 'silent': 1, 'learning_rate': 0.007, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 805, 'subsample': 0.93, 'score': 0.14176099999999997, 'objective': 'binary:logistic', 'max_depth': 5.0, 'gamma': 0.4} # [[25 0 0] # [ 0 23 2] # [ 0 0 25]] # -
nagadomi revised this gist
Aug 7, 2015 . 1 changed file with 55 additions and 37 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,52 +1,70 @@ import numpy as np import scipy as sp import xgboost as xgb from hyperopt import hp, fmin, tpe from sklearn import datasets from sklearn import cross_validation from sklearn.metrics import confusion_matrix K = 10 params = [] def f(param): print(len(trainX)) kf = cross_validation.KFold(n=len(trainX), n_folds=K, shuffle=True, random_state=0) score = 0 iteration = 0 for train_index, test_index in kf: xgb_model = xgb.XGBClassifier(**param) xgb_model.fit(trainX[train_index], trainY[train_index], eval_set=[(trainX[test_index], trainY[test_index])], early_stopping_rounds=30, eval_metric=('mlogloss'), verbose=False) score += xgb_model.best_score iteration += xgb_model.best_iteration param["score"] = score / K param["n_estimators"] = int(iteration / K) params.append(param) return param["score"] iris = datasets.load_iris() trainX = iris.data[0::2,:] trainY = iris.target[0::2] testX = iris.data[1::2,:] testY = iris.target[1::2] param_space = { 'nthread': 4, 'silent': 1, 'n_estimators': 1000, 'objective': 'binary:logistic', 'max_depth': hp.quniform('max_depth', 3, 10, 1), 'learning_rate': hp.quniform("eta", 0.001, 0.01, 0.001), 'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01), #'colsample_bytree': hp.quniform('colsample_bytree', 0.1, 1.0, 0.01), 'colsample_bytree': hp.quniform('colsample_bytree', 0.25, 1.0, 0.25), # iris has only 4 feat. 1/4 = 0.25 'gamma': hp.quniform('gamma', 0.1, 20.0, 0.1), 'min_child_weight': hp.quniform('min_child_weight', 1.0, 20.0, 0.1) } np.random.seed(71) fmin(f, param_space, algo=tpe.suggest, max_evals=100) params = sorted(params, key=lambda param : param['score']) best = params[0] print("**best param") print(best) del best["score"] xgb_model = xgb.XGBClassifier(**best) xgb_model.fit(trainX, trainY) predict = xgb_model.predict(testX) print confusion_matrix(testY, predict) #**best param # {'colsample_bytree': 1.0, 'silent': 1, 'learning_rate': 0.007, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 805, 'subsample': 0.93, 'score': 0.14176099999999997, 'objective': 'binary:logistic', 'max_depth': 5.0, 'gamma': 0.4} #[[25 0 0] # [ 0 23 2] # [ 0 0 25]] # -
yagays created this gist
Aug 7, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,52 @@ import numpy as np import scipy as sp import xgboost as xgb from sklearn import datasets from sklearn.metrics import confusion_matrix from sklearn.grid_search import GridSearchCV from sklearn.grid_search import RandomizedSearchCV iris = datasets.load_iris() trainX = iris.data[0::2,:] trainY = iris.target[0::2] testX = iris.data[1::2,:] testY = iris.target[1::2] np.random.seed(131) # Grid Search params={'max_depth': [5], 'subsample': [0.95], 'colsample_bytree': [1.0] } xgb_model = xgb.XGBClassifier() gs = GridSearchCV(xgb_model, params, cv=10, scoring="log_loss", n_jobs=1, verbose=2) gs.fit(trainX,trainY) predict = gs.predict(testX) print confusion_matrix(testY, predict) # RandomizedSearchCV param_distributions={'max_depth': sp.stats.randint(1,11), 'subsample': sp.stats.uniform(0.5,0.5), 'colsample_bytree': sp.stats.uniform(0.5,0.5) } xgb_model = xgb.XGBClassifier() rs = RandomizedSearchCV(xgb_model, param_distributions, cv=10, n_iter=20, scoring="log_loss", n_jobs=1, verbose=2) rs.fit(trainX,trainY) predict = rs.predict(testX) print confusion_matrix(testY, predict)