Skip to content

Instantly share code, notes, and snippets.

@nagadomi
Forked from yagays/iris_xgboost.py
Last active September 22, 2016 01:29

Revisions

  1. nagadomi revised this gist Aug 7, 2015. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions iris_xgboost.py
    Original file line number Diff line number Diff line change
    @@ -39,7 +39,7 @@ def f(param):
    'nthread': 4,
    'silent': 1,
    'n_estimators': 1000,
    'objective': 'binary:logistic',
    'objective': 'multi:softprob',
    'max_depth': hp.quniform('max_depth', 1, 10, 1),
    'learning_rate': hp.quniform("eta", 0.001, 0.1, 0.001),
    'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01),
    @@ -61,7 +61,7 @@ def f(param):
    print confusion_matrix(testY, predict)

    # **best param
    # {'colsample_bytree': 0.5, 'silent': 1, 'learning_rate': 0.063, 'nthread': 4, 'min_child_weight': 1.0, 'n_estimators': 168, 'subsample': 0.78, 'score': 0.1174425, 'objective': 'binary:logistic', 'max_depth': 3.0, 'gamma': 0.1}
    # {'colsample_bytree': 0.5, 'silent': 1, 'learning_rate': 0.063, 'nthread': 4, 'min_child_weight': 1.0, 'n_estimators': 168, 'subsample': 0.78, 'score': 0.1174425, 'objective': 'multi:softprob', 'max_depth': 3.0, 'gamma': 0.1}
    # [[25 0 0]
    # [ 0 23 2]
    # [ 0 0 25]]
  2. nagadomi revised this gist Aug 7, 2015. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions iris_xgboost.py
    Original file line number Diff line number Diff line change
    @@ -49,7 +49,7 @@ def f(param):
    'min_child_weight': hp.quniform('min_child_weight', 1.0, 20.0, 0.1)
    }

    fmin(f, param_space, algo=tpe.suggest, max_evals=100)
    fmin(f, param_space, algo=tpe.suggest, max_evals=200)
    params = sorted(params, key=lambda param : param['score'])
    best = params[0]
    print("**best param")
    @@ -61,7 +61,7 @@ def f(param):
    print confusion_matrix(testY, predict)

    # **best param
    # {'colsample_bytree': 0.75, 'silent': 1, 'learning_rate': 0.028, 'nthread': 4, 'min_child_weight': 1.1, 'n_estimators': 276, 'subsample': 1.0, 'score': 0.1372892, 'objective': 'binary:logistic', 'max_depth': 9.0, 'gamma': 0.30000000000000004}
    # {'colsample_bytree': 0.5, 'silent': 1, 'learning_rate': 0.063, 'nthread': 4, 'min_child_weight': 1.0, 'n_estimators': 168, 'subsample': 0.78, 'score': 0.1174425, 'objective': 'binary:logistic', 'max_depth': 3.0, 'gamma': 0.1}
    # [[25 0 0]
    # [ 0 23 2]
    # [ 0 0 25]]
  3. nagadomi revised this gist Aug 7, 2015. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions iris_xgboost.py
    Original file line number Diff line number Diff line change
    @@ -40,7 +40,7 @@ def f(param):
    'silent': 1,
    'n_estimators': 1000,
    'objective': 'binary:logistic',
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'max_depth': hp.quniform('max_depth', 1, 10, 1),
    'learning_rate': hp.quniform("eta", 0.001, 0.1, 0.001),
    'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01),
    #'colsample_bytree': hp.quniform('colsample_bytree', 0.1, 1.0, 0.01),
    @@ -61,7 +61,7 @@ def f(param):
    print confusion_matrix(testY, predict)

    # **best param
    # {'colsample_bytree': 0.75, 'silent': 1, 'learning_rate': 0.059000000000000004, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 192, 'subsample': 0.67, 'score': 0.13530040000000002, 'objective': 'binary:logistic', 'max_depth': 3.0, 'gamma': 0.1}
    # {'colsample_bytree': 0.75, 'silent': 1, 'learning_rate': 0.028, 'nthread': 4, 'min_child_weight': 1.1, 'n_estimators': 276, 'subsample': 1.0, 'score': 0.1372892, 'objective': 'binary:logistic', 'max_depth': 9.0, 'gamma': 0.30000000000000004}
    # [[25 0 0]
    # [ 0 23 2]
    # [ 0 0 25]]
  4. nagadomi revised this gist Aug 7, 2015. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion iris_xgboost.py
    Original file line number Diff line number Diff line change
    @@ -6,6 +6,7 @@
    from sklearn import cross_validation
    from sklearn.metrics import confusion_matrix

    np.random.seed(71)
    iris = datasets.load_iris()
    trainX = iris.data[0::2,:]
    trainY = iris.target[0::2]
    @@ -48,7 +49,6 @@ def f(param):
    'min_child_weight': hp.quniform('min_child_weight', 1.0, 20.0, 0.1)
    }

    np.random.seed(71)
    fmin(f, param_space, algo=tpe.suggest, max_evals=100)
    params = sorted(params, key=lambda param : param['score'])
    best = params[0]
  5. nagadomi revised this gist Aug 7, 2015. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions iris_xgboost.py
    Original file line number Diff line number Diff line change
    @@ -40,7 +40,7 @@ def f(param):
    'n_estimators': 1000,
    'objective': 'binary:logistic',
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'learning_rate': hp.quniform("eta", 0.001, 0.01, 0.001),
    'learning_rate': hp.quniform("eta", 0.001, 0.1, 0.001),
    'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01),
    #'colsample_bytree': hp.quniform('colsample_bytree', 0.1, 1.0, 0.01),
    'colsample_bytree': hp.quniform('colsample_bytree', 0.25, 1.0, 0.25), # iris has only 4 feat. 1/4 = 0.25
    @@ -61,7 +61,7 @@ def f(param):
    print confusion_matrix(testY, predict)

    # **best param
    # {'colsample_bytree': 1.0, 'silent': 1, 'learning_rate': 0.007, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 805, 'subsample': 0.93, 'score': 0.14176099999999997, 'objective': 'binary:logistic', 'max_depth': 5.0, 'gamma': 0.4}
    # {'colsample_bytree': 0.75, 'silent': 1, 'learning_rate': 0.059000000000000004, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 192, 'subsample': 0.67, 'score': 0.13530040000000002, 'objective': 'binary:logistic', 'max_depth': 3.0, 'gamma': 0.1}
    # [[25 0 0]
    # [ 0 23 2]
    # [ 0 0 25]]
  6. nagadomi revised this gist Aug 7, 2015. 1 changed file with 9 additions and 11 deletions.
    20 changes: 9 additions & 11 deletions iris_xgboost.py
    Original file line number Diff line number Diff line change
    @@ -6,14 +6,19 @@
    from sklearn import cross_validation
    from sklearn.metrics import confusion_matrix

    iris = datasets.load_iris()
    trainX = iris.data[0::2,:]
    trainY = iris.target[0::2]
    testX = iris.data[1::2,:]
    testY = iris.target[1::2]
    K = 10
    kfold = cross_validation.KFold(n=len(trainX), n_folds=K,
    shuffle=True, random_state=0)
    params = []
    def f(param):
    kf = cross_validation.KFold(n=len(trainX), n_folds=K,
    shuffle=True, random_state=0)
    score = 0
    iteration = 0
    for train_index, test_index in kf:
    for train_index, test_index in kfold:
    xgb_model = xgb.XGBClassifier(**param)
    xgb_model.fit(trainX[train_index], trainY[train_index],
    eval_set=[(trainX[test_index], trainY[test_index])],
    @@ -29,12 +34,6 @@ def f(param):

    return param["score"]

    iris = datasets.load_iris()
    trainX = iris.data[0::2,:]
    trainY = iris.target[0::2]
    testX = iris.data[1::2,:]
    testY = iris.target[1::2]

    param_space = {
    'nthread': 4,
    'silent': 1,
    @@ -61,9 +60,8 @@ def f(param):
    predict = xgb_model.predict(testX)
    print confusion_matrix(testY, predict)

    #**best param
    # **best param
    # {'colsample_bytree': 1.0, 'silent': 1, 'learning_rate': 0.007, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 805, 'subsample': 0.93, 'score': 0.14176099999999997, 'objective': 'binary:logistic', 'max_depth': 5.0, 'gamma': 0.4}
    # [[25 0 0]
    # [ 0 23 2]
    # [ 0 0 25]]
    #
  7. nagadomi revised this gist Aug 7, 2015. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions iris_xgboost.py
    Original file line number Diff line number Diff line change
    @@ -9,7 +9,6 @@
    K = 10
    params = []
    def f(param):
    print(len(trainX))
    kf = cross_validation.KFold(n=len(trainX), n_folds=K,
    shuffle=True, random_state=0)
    score = 0
    @@ -64,7 +63,7 @@ def f(param):

    #**best param
    # {'colsample_bytree': 1.0, 'silent': 1, 'learning_rate': 0.007, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 805, 'subsample': 0.93, 'score': 0.14176099999999997, 'objective': 'binary:logistic', 'max_depth': 5.0, 'gamma': 0.4}
    #[[25 0 0]
    # [[25 0 0]
    # [ 0 23 2]
    # [ 0 0 25]]
    #
  8. nagadomi revised this gist Aug 7, 2015. 1 changed file with 55 additions and 37 deletions.
    92 changes: 55 additions & 37 deletions iris_xgboost.py
    Original file line number Diff line number Diff line change
    @@ -1,52 +1,70 @@
    import numpy as np
    import scipy as sp
    import xgboost as xgb
    from hyperopt import hp, fmin, tpe
    from sklearn import datasets
    from sklearn import cross_validation
    from sklearn.metrics import confusion_matrix
    from sklearn.grid_search import GridSearchCV
    from sklearn.grid_search import RandomizedSearchCV

    K = 10
    params = []
    def f(param):
    print(len(trainX))
    kf = cross_validation.KFold(n=len(trainX), n_folds=K,
    shuffle=True, random_state=0)
    score = 0
    iteration = 0
    for train_index, test_index in kf:
    xgb_model = xgb.XGBClassifier(**param)
    xgb_model.fit(trainX[train_index], trainY[train_index],
    eval_set=[(trainX[test_index], trainY[test_index])],
    early_stopping_rounds=30,
    eval_metric=('mlogloss'),
    verbose=False)
    score += xgb_model.best_score
    iteration += xgb_model.best_iteration

    param["score"] = score / K
    param["n_estimators"] = int(iteration / K)
    params.append(param)

    return param["score"]

    iris = datasets.load_iris()
    trainX = iris.data[0::2,:]
    trainY = iris.target[0::2]
    testX = iris.data[1::2,:]
    testY = iris.target[1::2]

    np.random.seed(131)

    # Grid Search
    params={'max_depth': [5],
    'subsample': [0.95],
    'colsample_bytree': [1.0]
    }

    xgb_model = xgb.XGBClassifier()
    gs = GridSearchCV(xgb_model,
    params,
    cv=10,
    scoring="log_loss",
    n_jobs=1,
    verbose=2)
    gs.fit(trainX,trainY)
    predict = gs.predict(testX)
    param_space = {
    'nthread': 4,
    'silent': 1,
    'n_estimators': 1000,
    'objective': 'binary:logistic',
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'learning_rate': hp.quniform("eta", 0.001, 0.01, 0.001),
    'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01),
    #'colsample_bytree': hp.quniform('colsample_bytree', 0.1, 1.0, 0.01),
    'colsample_bytree': hp.quniform('colsample_bytree', 0.25, 1.0, 0.25), # iris has only 4 feat. 1/4 = 0.25
    'gamma': hp.quniform('gamma', 0.1, 20.0, 0.1),
    'min_child_weight': hp.quniform('min_child_weight', 1.0, 20.0, 0.1)
    }

    np.random.seed(71)
    fmin(f, param_space, algo=tpe.suggest, max_evals=100)
    params = sorted(params, key=lambda param : param['score'])
    best = params[0]
    print("**best param")
    print(best)
    del best["score"]
    xgb_model = xgb.XGBClassifier(**best)
    xgb_model.fit(trainX, trainY)
    predict = xgb_model.predict(testX)
    print confusion_matrix(testY, predict)

    # RandomizedSearchCV
    param_distributions={'max_depth': sp.stats.randint(1,11),
    'subsample': sp.stats.uniform(0.5,0.5),
    'colsample_bytree': sp.stats.uniform(0.5,0.5)
    }

    xgb_model = xgb.XGBClassifier()
    rs = RandomizedSearchCV(xgb_model,
    param_distributions,
    cv=10,
    n_iter=20,
    scoring="log_loss",
    n_jobs=1,
    verbose=2)
    rs.fit(trainX,trainY)
    predict = rs.predict(testX)

    print confusion_matrix(testY, predict)
    #**best param
    # {'colsample_bytree': 1.0, 'silent': 1, 'learning_rate': 0.007, 'nthread': 4, 'min_child_weight': 1.5, 'n_estimators': 805, 'subsample': 0.93, 'score': 0.14176099999999997, 'objective': 'binary:logistic', 'max_depth': 5.0, 'gamma': 0.4}
    #[[25 0 0]
    # [ 0 23 2]
    # [ 0 0 25]]
    #
  9. @yagays yagays created this gist Aug 7, 2015.
    52 changes: 52 additions & 0 deletions iris_xgboost.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,52 @@
    import numpy as np
    import scipy as sp
    import xgboost as xgb
    from sklearn import datasets
    from sklearn.metrics import confusion_matrix
    from sklearn.grid_search import GridSearchCV
    from sklearn.grid_search import RandomizedSearchCV

    iris = datasets.load_iris()
    trainX = iris.data[0::2,:]
    trainY = iris.target[0::2]
    testX = iris.data[1::2,:]
    testY = iris.target[1::2]

    np.random.seed(131)

    # Grid Search
    params={'max_depth': [5],
    'subsample': [0.95],
    'colsample_bytree': [1.0]
    }

    xgb_model = xgb.XGBClassifier()
    gs = GridSearchCV(xgb_model,
    params,
    cv=10,
    scoring="log_loss",
    n_jobs=1,
    verbose=2)
    gs.fit(trainX,trainY)
    predict = gs.predict(testX)

    print confusion_matrix(testY, predict)

    # RandomizedSearchCV
    param_distributions={'max_depth': sp.stats.randint(1,11),
    'subsample': sp.stats.uniform(0.5,0.5),
    'colsample_bytree': sp.stats.uniform(0.5,0.5)
    }

    xgb_model = xgb.XGBClassifier()
    rs = RandomizedSearchCV(xgb_model,
    param_distributions,
    cv=10,
    n_iter=20,
    scoring="log_loss",
    n_jobs=1,
    verbose=2)
    rs.fit(trainX,trainY)
    predict = rs.predict(testX)

    print confusion_matrix(testY, predict)