Skip to content

Instantly share code, notes, and snippets.

@ibayer
Created July 25, 2012 20:41
Show Gist options
  • Save ibayer/3178547 to your computer and use it in GitHub Desktop.
Save ibayer/3178547 to your computer and use it in GitHub Desktop.
strong rule for enet
import numpy as np
from scipy import linalg
MAX_ITER = 100
# cdef double l1_reg = alpha * rho * n_samples
# cdef double l2_reg = alpha * (1.0 - rho) * n_samples
def enet_coordinate_descent(X, y, alpha, rho, warm_start=None, max_iter=MAX_ITER):
n_samples = X.shape[0]
l1_reg = alpha * rho * n_samples
l2_reg = alpha * (1.0 - rho) * n_samples
if warm_start is not None:
beta = warm_start
else:
beta = np.zeros(X.shape[1], dtype=np.float)
alpha = alpha * X.shape[0]
for _ in range(max_iter):
for i in range(X.shape[1]):
bb = beta.copy()
bb[i] = 0.
residual = np.dot(X[:, i], y - np.dot(X, bb).T)
beta[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \
/ (np.dot(X[:, i], X[:, i]) + l2_reg)
return beta
def shrinkage(X, y, l1_reg, l2_reg, beta, active_set, max_iter):
bb = beta.copy()
for _ in range(max_iter):
for i in active_set:
bb[i] = 0
residual = np.dot(X[:, i], y - np.dot(X, bb).T)
bb[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \
/ (np.dot(X[:, i], X[:, i]) + l2_reg)
return bb
def enet_path(X, y, alphas, rho, max_iter=MAX_ITER, verbose=False):
"""
The strategy is described in "Strong rules for discarding predictors in lasso-type problems"
alphas must be an increasing sequence of regularization parameters
WARNING: does not compute intercept
"""
beta = np.zeros((len(alphas), X.shape[1]), dtype=np.float)
alphas_scaled = np.array(alphas).copy() * X.shape[0]
n_samples = X.shape[0]
active_set = np.arange(X.shape[1]).tolist()
for k, a in enumerate(alphas_scaled):
if verbose:
print 'Current active set ', active_set
if k > 0:
# .. Strong rules for discarding predictors in enet-type ..
tmp = np.abs(np.dot(X.T, y - np.dot(X, beta[k - 1])))
strong_active_set = tmp < rho * (2 * alphas_scaled[k] - alphas_scaled[k - 1])
strong_active_set = np.where(strong_active_set)[0]
else:
strong_active_set = np.arange(X.shape[1])
l1_reg = a * rho
l2_reg = a * (1.0 - rho)
# solve for the current active set
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter)
print "solve for the current active set " + str(beta)
# check KKT in the strong active set
kkt_violations = True
for i in strong_active_set:
tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
if beta[k, i] != 0 and not \
np.allclose(tmp, np.abs(l1_reg + l2_reg * beta[k, i])):
active_set.append(i)
if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg + l2_reg * beta[k, i]):
active_set.append(i)
else:
# passed KKT for all variables in strong active set, we're done
active_set = np.where(beta[k] != 0)[0].tolist()
kkt_violations = False
if verbose:
print 'No KKT violations on active set'
# .. recompute with new active set
if kkt_violations:
if verbose:
print 'KKT violated on strong active set'
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter)
# .. check KKT on all predictors ..
kkt_violations = True
for i in range(X.shape[1]):
tmp = np.dot(X[:, i], y - np.dot(X, beta[k]))
if beta[k, i] != 0 and tmp != np.abs(l1_reg + l2_reg * beta[k, i]):
active_set.append(i)
if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg + l2_reg * beta[k, i]):
active_set.append(i)
else:
# passed KKT for all variables, we're done
active_set = np.where(beta[k] != 0)[0].tolist()
kkt_violations = False
if kkt_violations:
if verbose:
print 'KKT violated on full active set'
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter)
return beta
def check_kkt_lasso(xr, coef, penalty, tol=1e-3):
"""
Check KKT conditions for Lasso
xr : X'(y - X coef)
"""
nonzero = (coef != 0)
return np.all(np.abs(xr[nonzero] - np.sign(coef[nonzero]) * penalty) < tol) \
and np.all(np.abs(xr[~nonzero] / penalty) <= 1)
if __name__ == '__main__':
np.random.seed(0)
from sklearn import datasets
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target
#print l1_coordinate_descent(X, y, .001)
print enet_path(X, y, np.linspace(.1, 2., 5), rho=0.5, verbose=True)
import strong_rule_enet as enet
import strong_rule_lasso as lasso
import numpy as np
from sklearn import datasets
from sklearn import linear_model
from numpy.testing import assert_array_almost_equal, assert_almost_equal, \
assert_equal
from sklearn.datasets.samples_generator import make_regression
np.random.seed(0)
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target
MAX_ITER = 100
def test_lasso_coordinate_descent():
X, y = make_regression(n_samples=200, n_features=50, n_informative=5,
random_state=0)
clf = linear_model.Lasso(alpha=0.001, max_iter=MAX_ITER)
clf.fit(X, y)
coef = lasso.l1_coordinate_descent(X, y, .001, max_iter=MAX_ITER)
assert_array_almost_equal(coef, clf.coef_, 3)
def test_enet_coordinate_descent():
MAX_ITER = 100
X, y = make_regression(n_samples=200, n_features=50, n_informative=5,
random_state=0)
clf = linear_model.ElasticNet(alpha=0.001, rho=0.7, max_iter=MAX_ITER)
clf.fit(X, y)
coef = enet.enet_coordinate_descent(X, y, alpha=0.001, rho=0.7, max_iter=MAX_ITER)
assert_array_almost_equal(coef, clf.coef_, 3)
def test_lasso_path():
MAX_ITER = 100
X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
random_state=0)
alphas = np.linspace(.1, 2., 5)
path_coefs = lasso.l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=True)
clf_1 = linear_model.Lasso(alpha=alphas[1], max_iter=MAX_ITER)
clf_1.fit(X, y)
clf_4 = linear_model.Lasso(alpha=alphas[4], max_iter=MAX_ITER)
clf_4.fit(X, y)
assert_array_almost_equal(path_coefs[1], clf_1.coef_, 1)
assert_array_almost_equal(path_coefs[4], clf_4.coef_, 1)
def test_enet_path_one_step():
MAX_ITER = 100
X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
random_state=0)
rho = 0.9
alphas = np.linspace(.1, 2., 5)
path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)
clf_0 = linear_model.ElasticNet(alpha=alphas[0], rho=rho, max_iter=MAX_ITER)
clf_0.fit(X, y)
assert_array_almost_equal(path_coefs[0], clf_0.coef_, 2)
def test_enet_path():
MAX_ITER = 100
X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
random_state=0)
rho = 0.9
alphas = np.linspace(.1, 2., 5)
path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)
clf_1 = linear_model.ElasticNet(alpha=alphas[1], rho=rho, max_iter=MAX_ITER)
clf_1.fit(X, y)
clf_4 = linear_model.ElasticNet(alpha=alphas[4], rho=rho, max_iter=MAX_ITER)
clf_4.fit(X, y)
assert_array_almost_equal(path_coefs[1], clf_1.coef_, 1)
assert_array_almost_equal(path_coefs[4], clf_4.coef_, 1)
def test_lasso_path_big_data():
MAX_ITER = 100
X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
random_state=0)
alphas = np.linspace(.1, 2., 5)
path_coefs = lasso.l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=True)
clf_1 = linear_model.Lasso(alpha=alphas[1], max_iter=MAX_ITER)
clf_1.fit(X, y)
clf_4 = linear_model.Lasso(alpha=alphas[4], max_iter=MAX_ITER)
clf_4.fit(X, y)
assert_array_almost_equal(path_coefs[1], clf_1.coef_, 2)
assert_array_almost_equal(path_coefs[4], clf_4.coef_, 2)
def test_enet_path_big_data():
MAX_ITER = 100
X, y = make_regression(n_samples=50, n_features=200, n_informative=5,
random_state=0)
rho = 0.9
alphas = np.linspace(.1, 2., 5)
path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True)
clf_1 = linear_model.ElasticNet(alpha=alphas[1], rho=rho, max_iter=MAX_ITER)
clf_1.fit(X, y)
clf_4 = linear_model.ElasticNet(alpha=alphas[4], rho=rho, max_iter=MAX_ITER)
clf_4.fit(X, y)
assert_array_almost_equal(path_coefs[1], clf_1.coef_, 2)
assert_array_almost_equal(path_coefs[4], clf_4.coef_, 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment