nagadomi · September 22, 2016 01:29
diff --git a/iris_xgboost.py b/iris_xgboost.py
 import numpy as np
 import scipy as sp
 import xgboost as xgb
 from hyperopt import hp, fmin, tpe
 from sklearn import datasets
 from sklearn import cross_validation
 from sklearn.metrics import confusion_matrix

 np.random.seed(71)
 iris = datasets.load_iris()
 trainX = iris.data[0::2,:]
 trainY = iris.target[0::2]
 testX = iris.data[1::2,:]
 testY = iris.target[1::2]
 K = 10
 kfold = cross_validation.KFold(n=len(trainX), n_folds=K,
                               shuffle=True, random_state=0)
 params = []
 def f(param):
    score = 0
    iteration = 0
    for train_index, test_index in kfold:
        xgb_model = xgb.XGBClassifier(**param)
        xgb_model.fit(trainX[train_index], trainY[train_index],
                      eval_set=[(trainX[test_index], trainY[test_index])],
                      early_stopping_rounds=30,
                      eval_metric=('mlogloss'),
                      verbose=False)
        score += xgb_model.best_score
        iteration += xgb_model.best_iteration

    param["score"] = score / K
    param["n_estimators"] = int(iteration / K)
    params.append(param)
    
    return param["score"]

 param_space = {
    'nthread': 4,
    'silent': 1,
    'n_estimators': 1000,
    'objective': 'multi:softprob',
    'max_depth': hp.quniform('max_depth', 1, 10, 1),
    'learning_rate': hp.quniform("eta", 0.001, 0.1, 0.001),
    'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01),
    #'colsample_bytree': hp.quniform('colsample_bytree', 0.1, 1.0, 0.01),
    'colsample_bytree': hp.quniform('colsample_bytree', 0.25, 1.0, 0.25), # iris has only 4 feat. 1/4 = 0.25
    'gamma': hp.quniform('gamma', 0.1, 20.0, 0.1),
    'min_child_weight': hp.quniform('min_child_weight', 1.0, 20.0, 0.1)
    }

 fmin(f, param_space, algo=tpe.suggest, max_evals=200)
 params = sorted(params, key=lambda param : param['score'])
 best = params[0]
 print("**best param")
 print(best)
 del best["score"]
 xgb_model = xgb.XGBClassifier(**best)
 xgb_model.fit(trainX, trainY)
 predict = xgb_model.predict(testX)
 print confusion_matrix(testY, predict)

 # **best param
 # {'colsample_bytree': 0.5, 'silent': 1, 'learning_rate': 0.063, 'nthread': 4, 'min_child_weight': 1.0, 'n_estimators': 168, 'subsample': 0.78, 'score': 0.1174425, 'objective': 'multi:softprob', 'max_depth': 3.0, 'gamma': 0.1}
 # [[25  0  0]
 #  [ 0 23  2]
 #  [ 0  0 25]]
	import numpy as np
	import scipy as sp
	import xgboost as xgb
	from hyperopt import hp, fmin, tpe
	from sklearn import datasets
	from sklearn import cross_validation
	from sklearn.metrics import confusion_matrix

	np.random.seed(71)
	iris = datasets.load_iris()
	trainX = iris.data[0::2,:]
	trainY = iris.target[0::2]
	testX = iris.data[1::2,:]
	testY = iris.target[1::2]
	K = 10
	kfold = cross_validation.KFold(n=len(trainX), n_folds=K,
	shuffle=True, random_state=0)
	params = []
	def f(param):
	score = 0
	iteration = 0
	for train_index, test_index in kfold:
	xgb_model = xgb.XGBClassifier(**param)
	xgb_model.fit(trainX[train_index], trainY[train_index],
	eval_set=[(trainX[test_index], trainY[test_index])],
	early_stopping_rounds=30,
	eval_metric=('mlogloss'),
	verbose=False)
	score += xgb_model.best_score
	iteration += xgb_model.best_iteration

	param["score"] = score / K
	param["n_estimators"] = int(iteration / K)
	params.append(param)

	return param["score"]

	param_space = {
	'nthread': 4,
	'silent': 1,
	'n_estimators': 1000,
	'objective': 'multi:softprob',
	'max_depth': hp.quniform('max_depth', 1, 10, 1),
	'learning_rate': hp.quniform("eta", 0.001, 0.1, 0.001),
	'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01),
	#'colsample_bytree': hp.quniform('colsample_bytree', 0.1, 1.0, 0.01),
	'colsample_bytree': hp.quniform('colsample_bytree', 0.25, 1.0, 0.25), # iris has only 4 feat. 1/4 = 0.25
	'gamma': hp.quniform('gamma', 0.1, 20.0, 0.1),
	'min_child_weight': hp.quniform('min_child_weight', 1.0, 20.0, 0.1)
	}

	fmin(f, param_space, algo=tpe.suggest, max_evals=200)
	params = sorted(params, key=lambda param : param['score'])
	best = params[0]
	print("**best param")
	print(best)
	del best["score"]
	xgb_model = xgb.XGBClassifier(**best)
	xgb_model.fit(trainX, trainY)
	predict = xgb_model.predict(testX)
	print confusion_matrix(testY, predict)

	# **best param
	# {'colsample_bytree': 0.5, 'silent': 1, 'learning_rate': 0.063, 'nthread': 4, 'min_child_weight': 1.0, 'n_estimators': 168, 'subsample': 0.78, 'score': 0.1174425, 'objective': 'multi:softprob', 'max_depth': 3.0, 'gamma': 0.1}
	# [[25 0 0]
	# [ 0 23 2]
	# [ 0 0 25]]