ibayer · July 25, 2012 20:41
diff --git a/strong_rule_enet.py b/strong_rule_enet.py
 import numpy as np
 from scipy import linalg

 MAX_ITER = 100

 #    cdef double l1_reg = alpha * rho * n_samples
 #    cdef double l2_reg = alpha * (1.0 - rho) * n_samples
 def enet_coordinate_descent(X, y, alpha, rho, warm_start=None, max_iter=MAX_ITER):

    n_samples = X.shape[0]
    l1_reg = alpha * rho * n_samples
    l2_reg = alpha * (1.0 - rho) * n_samples

    if warm_start is not None:
        beta = warm_start
    else:
        beta = np.zeros(X.shape[1], dtype=np.float)
    alpha = alpha * X.shape[0]

    for _ in range(max_iter):
        for i in range(X.shape[1]):
            bb = beta.copy()
            bb[i] = 0.
            residual = np.dot(X[:, i], y - np.dot(X, bb).T)
            beta[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \
                / (np.dot(X[:, i], X[:, i]) + l2_reg)
    return beta


 def shrinkage(X, y, l1_reg, l2_reg, beta, active_set, max_iter):
    bb = beta.copy()
    for _ in range(max_iter):
        for i in active_set:

            bb[i] = 0
            residual = np.dot(X[:, i], y - np.dot(X, bb).T)
            bb[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \
                / (np.dot(X[:, i], X[:, i]) + l2_reg)
    return bb


 def enet_path(X, y, alphas, rho, max_iter=MAX_ITER, verbose=False):
    """
 The strategy is described in "Strong rules for discarding predictors in lasso-type problems"

 alphas must be an increasing sequence of regularization parameters

 WARNING: does not compute intercept
 """
    beta = np.zeros((len(alphas), X.shape[1]), dtype=np.float)

    alphas_scaled = np.array(alphas).copy() * X.shape[0]

    n_samples = X.shape[0]

    active_set = np.arange(X.shape[1]).tolist()
    for k, a in enumerate(alphas_scaled):
        if verbose:
            print 'Current active set ', active_set

        if k > 0:
        # .. Strong rules for discarding predictors in enet-type ..
            tmp = np.abs(np.dot(X.T, y - np.dot(X, beta[k - 1])))
            strong_active_set = tmp < rho * (2 * alphas_scaled[k] - alphas_scaled[k - 1])
            strong_active_set = np.where(strong_active_set)[0]
        else:
            strong_active_set = np.arange(X.shape[1])
        l1_reg = a * rho
        l2_reg = a * (1.0 - rho)
        # solve for the current active set
        beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter)
        print "solve for the current active set " + str(beta)

        # check KKT in the strong active set
        kkt_violations = True
        for i in strong_active_set:
            tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
            if beta[k, i] != 0 and not \
                    np.allclose(tmp, np.abs(l1_reg + l2_reg * beta[k, i])):
                active_set.append(i)
            if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg + l2_reg * beta[k, i]):
                active_set.append(i)
        else:
            # passed KKT for all variables in strong active set, we're done
            active_set = np.where(beta[k] != 0)[0].tolist()
            kkt_violations = False
            if verbose:
                print 'No KKT violations on active set'

        # .. recompute with new active set
        if kkt_violations:
            if verbose:
                print 'KKT violated on strong active set'
            beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter)

        # .. check KKT on all predictors ..
        kkt_violations = True
        for i in range(X.shape[1]):
            tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
            if beta[k, i] != 0 and tmp != np.abs(l1_reg + l2_reg * beta[k, i]):
                active_set.append(i)
            if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg + l2_reg * beta[k, i]):
                active_set.append(i)
        else:
        # passed KKT for all variables, we're done
            active_set = np.where(beta[k] != 0)[0].tolist()
            kkt_violations = False

        if kkt_violations:
            if verbose:
                print 'KKT violated on full active set'
            beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter)


    return beta


 def check_kkt_lasso(xr, coef, penalty, tol=1e-3):
    """
 Check KKT conditions for Lasso

 xr : X'(y - X coef)
 """
    nonzero = (coef != 0)
    return np.all(np.abs(xr[nonzero] - np.sign(coef[nonzero]) * penalty) < tol) \
        and np.all(np.abs(xr[~nonzero] / penalty) <= 1)


 if __name__ == '__main__':
    np.random.seed(0)
    from sklearn import datasets
    diabetes = datasets.load_diabetes()
    X = diabetes.data
    y = diabetes.target
    #print l1_coordinate_descent(X, y, .001)
    print enet_path(X, y, np.linspace(.1, 2., 5), rho=0.5, verbose=True)
diff --git a/test_strong_rule.py b/test_strong_rule.py
 import strong_rule_enet as enet
 import strong_rule_lasso as lasso
 import numpy as np
 from sklearn import datasets
 from sklearn import linear_model
 from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
                          assert_equal
 from sklearn.datasets.samples_generator import make_regression

 np.random.seed(0)
 diabetes = datasets.load_diabetes()
 X = diabetes.data
 y = diabetes.target

 MAX_ITER = 100


 def test_lasso_coordinate_descent():
    X, y = make_regression(n_samples=200, n_features=50, n_informative=5,
                     random_state=0)

    clf = linear_model.Lasso(alpha=0.001, max_iter=MAX_ITER)
    clf.fit(X, y)
    coef = lasso.l1_coordinate_descent(X, y, .001, max_iter=MAX_ITER)
    assert_array_almost_equal(coef, clf.coef_, 3)


 def test_enet_coordinate_descent():
    MAX_ITER = 100
    X, y = make_regression(n_samples=200, n_features=50, n_informative=5,
                     random_state=0)
    clf = linear_model.ElasticNet(alpha=0.001, rho=0.7, max_iter=MAX_ITER)
    clf.fit(X, y)
    coef = enet.enet_coordinate_descent(X, y, alpha=0.001, rho=0.7, max_iter=MAX_ITER)
    assert_array_almost_equal(coef, clf.coef_, 3)


 def test_lasso_path():
    MAX_ITER = 100
    X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
                        random_state=0)
    alphas = np.linspace(.1, 2., 5)
    path_coefs = lasso.l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=True)

    clf_1 = linear_model.Lasso(alpha=alphas[1], max_iter=MAX_ITER)
    clf_1.fit(X, y)
    clf_4 = linear_model.Lasso(alpha=alphas[4], max_iter=MAX_ITER)
    clf_4.fit(X, y)

    assert_array_almost_equal(path_coefs[1], clf_1.coef_, 1)
    assert_array_almost_equal(path_coefs[4], clf_4.coef_, 1)


 def test_enet_path_one_step():
    MAX_ITER = 100
    X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
                        random_state=0)
    rho = 0.9
    alphas = np.linspace(.1, 2., 5)
    path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)

    clf_0 = linear_model.ElasticNet(alpha=alphas[0], rho=rho, max_iter=MAX_ITER)
    clf_0.fit(X, y)
    assert_array_almost_equal(path_coefs[0], clf_0.coef_, 2)


 def test_enet_path():
    MAX_ITER = 100
    X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
                        random_state=0)
    rho = 0.9
    alphas = np.linspace(.1, 2., 5)
    path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)

    clf_1 = linear_model.ElasticNet(alpha=alphas[1], rho=rho, max_iter=MAX_ITER)
    clf_1.fit(X, y)
    clf_4 = linear_model.ElasticNet(alpha=alphas[4], rho=rho, max_iter=MAX_ITER)
    clf_4.fit(X, y)

    assert_array_almost_equal(path_coefs[1], clf_1.coef_, 1)
    assert_array_almost_equal(path_coefs[4], clf_4.coef_, 1)


 def test_lasso_path_big_data():
    MAX_ITER = 100
    X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
                        random_state=0)
    alphas = np.linspace(.1, 2., 5)
    path_coefs = lasso.l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=True)

    clf_1 = linear_model.Lasso(alpha=alphas[1], max_iter=MAX_ITER)
    clf_1.fit(X, y)
    clf_4 = linear_model.Lasso(alpha=alphas[4], max_iter=MAX_ITER)
    clf_4.fit(X, y)

    assert_array_almost_equal(path_coefs[1], clf_1.coef_, 2)
    assert_array_almost_equal(path_coefs[4], clf_4.coef_, 2)


 def test_enet_path_big_data():
    MAX_ITER = 100
    X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
                        random_state=0)
    rho = 0.9
    alphas = np.linspace(.1, 2., 5)
    path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)

    clf_1 = linear_model.ElasticNet(alpha=alphas[1], rho=rho, max_iter=MAX_ITER)
    clf_1.fit(X, y)
    clf_4 = linear_model.ElasticNet(alpha=alphas[4], rho=rho, max_iter=MAX_ITER)
    clf_4.fit(X, y)
    assert_array_almost_equal(path_coefs[1], clf_1.coef_, 2)
    assert_array_almost_equal(path_coefs[4], clf_4.coef_, 2)
	import numpy as np
	from scipy import linalg

	MAX_ITER = 100

	# cdef double l1_reg = alpha * rho * n_samples
	# cdef double l2_reg = alpha * (1.0 - rho) * n_samples
	def enet_coordinate_descent(X, y, alpha, rho, warm_start=None, max_iter=MAX_ITER):

	n_samples = X.shape[0]
	l1_reg = alpha * rho * n_samples
	l2_reg = alpha * (1.0 - rho) * n_samples

	if warm_start is not None:
	beta = warm_start
	else:
	beta = np.zeros(X.shape[1], dtype=np.float)
	alpha = alpha * X.shape[0]

	for _ in range(max_iter):
	for i in range(X.shape[1]):
	bb = beta.copy()
	bb[i] = 0.
	residual = np.dot(X[:, i], y - np.dot(X, bb).T)
	beta[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \
	/ (np.dot(X[:, i], X[:, i]) + l2_reg)
	return beta


	def shrinkage(X, y, l1_reg, l2_reg, beta, active_set, max_iter):
	bb = beta.copy()
	for _ in range(max_iter):
	for i in active_set:

	bb[i] = 0
	residual = np.dot(X[:, i], y - np.dot(X, bb).T)
	bb[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \
	/ (np.dot(X[:, i], X[:, i]) + l2_reg)
	return bb


	def enet_path(X, y, alphas, rho, max_iter=MAX_ITER, verbose=False):
	"""
	The strategy is described in "Strong rules for discarding predictors in lasso-type problems"

	alphas must be an increasing sequence of regularization parameters

	WARNING: does not compute intercept
	"""
	beta = np.zeros((len(alphas), X.shape[1]), dtype=np.float)

	alphas_scaled = np.array(alphas).copy() * X.shape[0]

	n_samples = X.shape[0]

	active_set = np.arange(X.shape[1]).tolist()
	for k, a in enumerate(alphas_scaled):
	if verbose:
	print 'Current active set ', active_set

	if k > 0:
	# .. Strong rules for discarding predictors in enet-type ..
	tmp = np.abs(np.dot(X.T, y - np.dot(X, beta[k - 1])))
	strong_active_set = tmp < rho * (2 * alphas_scaled[k] - alphas_scaled[k - 1])
	strong_active_set = np.where(strong_active_set)[0]
	else:
	strong_active_set = np.arange(X.shape[1])
	l1_reg = a * rho
	l2_reg = a * (1.0 - rho)
	# solve for the current active set
	beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter)
	print "solve for the current active set " + str(beta)

	# check KKT in the strong active set
	kkt_violations = True
	for i in strong_active_set:
	tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
	if beta[k, i] != 0 and not \
	np.allclose(tmp, np.abs(l1_reg + l2_reg * beta[k, i])):
	active_set.append(i)
	if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg + l2_reg * beta[k, i]):
	active_set.append(i)
	else:
	# passed KKT for all variables in strong active set, we're done
	active_set = np.where(beta[k] != 0)[0].tolist()
	kkt_violations = False
	if verbose:
	print 'No KKT violations on active set'

	# .. recompute with new active set
	if kkt_violations:
	if verbose:
	print 'KKT violated on strong active set'
	beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter)

	# .. check KKT on all predictors ..
	kkt_violations = True
	for i in range(X.shape[1]):
	tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
	if beta[k, i] != 0 and tmp != np.abs(l1_reg + l2_reg * beta[k, i]):
	active_set.append(i)
	if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg + l2_reg * beta[k, i]):
	active_set.append(i)
	else:
	# passed KKT for all variables, we're done
	active_set = np.where(beta[k] != 0)[0].tolist()
	kkt_violations = False

	if kkt_violations:
	if verbose:
	print 'KKT violated on full active set'
	beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter)


	return beta


	def check_kkt_lasso(xr, coef, penalty, tol=1e-3):
	"""
	Check KKT conditions for Lasso

	xr : X'(y - X coef)
	"""
	nonzero = (coef != 0)
	return np.all(np.abs(xr[nonzero] - np.sign(coef[nonzero]) * penalty) < tol) \
	and np.all(np.abs(xr[~nonzero] / penalty) <= 1)


	if __name__ == '__main__':
	np.random.seed(0)
	from sklearn import datasets
	diabetes = datasets.load_diabetes()
	X = diabetes.data
	y = diabetes.target
	#print l1_coordinate_descent(X, y, .001)
	print enet_path(X, y, np.linspace(.1, 2., 5), rho=0.5, verbose=True)
	import strong_rule_enet as enet
	import strong_rule_lasso as lasso
	import numpy as np
	from sklearn import datasets
	from sklearn import linear_model
	from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
	assert_equal
	from sklearn.datasets.samples_generator import make_regression

	np.random.seed(0)
	diabetes = datasets.load_diabetes()
	X = diabetes.data
	y = diabetes.target

	MAX_ITER = 100


	def test_lasso_coordinate_descent():
	X, y = make_regression(n_samples=200, n_features=50, n_informative=5,
	random_state=0)

	clf = linear_model.Lasso(alpha=0.001, max_iter=MAX_ITER)
	clf.fit(X, y)
	coef = lasso.l1_coordinate_descent(X, y, .001, max_iter=MAX_ITER)
	assert_array_almost_equal(coef, clf.coef_, 3)


	def test_enet_coordinate_descent():
	MAX_ITER = 100
	X, y = make_regression(n_samples=200, n_features=50, n_informative=5,
	random_state=0)
	clf = linear_model.ElasticNet(alpha=0.001, rho=0.7, max_iter=MAX_ITER)
	clf.fit(X, y)
	coef = enet.enet_coordinate_descent(X, y, alpha=0.001, rho=0.7, max_iter=MAX_ITER)
	assert_array_almost_equal(coef, clf.coef_, 3)


	def test_lasso_path():
	MAX_ITER = 100
	X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
	random_state=0)
	alphas = np.linspace(.1, 2., 5)
	path_coefs = lasso.l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=True)

	clf_1 = linear_model.Lasso(alpha=alphas[1], max_iter=MAX_ITER)
	clf_1.fit(X, y)
	clf_4 = linear_model.Lasso(alpha=alphas[4], max_iter=MAX_ITER)
	clf_4.fit(X, y)

	assert_array_almost_equal(path_coefs[1], clf_1.coef_, 1)
	assert_array_almost_equal(path_coefs[4], clf_4.coef_, 1)


	def test_enet_path_one_step():
	MAX_ITER = 100
	X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
	random_state=0)
	rho = 0.9
	alphas = np.linspace(.1, 2., 5)
	path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)

	clf_0 = linear_model.ElasticNet(alpha=alphas[0], rho=rho, max_iter=MAX_ITER)
	clf_0.fit(X, y)
	assert_array_almost_equal(path_coefs[0], clf_0.coef_, 2)


	def test_enet_path():
	MAX_ITER = 100
	X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
	random_state=0)
	rho = 0.9
	alphas = np.linspace(.1, 2., 5)
	path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)

	clf_1 = linear_model.ElasticNet(alpha=alphas[1], rho=rho, max_iter=MAX_ITER)
	clf_1.fit(X, y)
	clf_4 = linear_model.ElasticNet(alpha=alphas[4], rho=rho, max_iter=MAX_ITER)
	clf_4.fit(X, y)

	assert_array_almost_equal(path_coefs[1], clf_1.coef_, 1)
	assert_array_almost_equal(path_coefs[4], clf_4.coef_, 1)


	def test_lasso_path_big_data():
	MAX_ITER = 100
	X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
	random_state=0)
	alphas = np.linspace(.1, 2., 5)
	path_coefs = lasso.l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=True)

	clf_1 = linear_model.Lasso(alpha=alphas[1], max_iter=MAX_ITER)
	clf_1.fit(X, y)
	clf_4 = linear_model.Lasso(alpha=alphas[4], max_iter=MAX_ITER)
	clf_4.fit(X, y)

	assert_array_almost_equal(path_coefs[1], clf_1.coef_, 2)
	assert_array_almost_equal(path_coefs[4], clf_4.coef_, 2)


	def test_enet_path_big_data():
	MAX_ITER = 100
	X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
	random_state=0)
	rho = 0.9
	alphas = np.linspace(.1, 2., 5)
	path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)

	clf_1 = linear_model.ElasticNet(alpha=alphas[1], rho=rho, max_iter=MAX_ITER)
	clf_1.fit(X, y)
	clf_4 = linear_model.ElasticNet(alpha=alphas[4], rho=rho, max_iter=MAX_ITER)
	clf_4.fit(X, y)
	assert_array_almost_equal(path_coefs[1], clf_1.coef_, 2)
	assert_array_almost_equal(path_coefs[4], clf_4.coef_, 2)