Created
July 25, 2012 20:41
-
-
Save ibayer/3178547 to your computer and use it in GitHub Desktop.
strong rule for enet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy import linalg | |
MAX_ITER = 100 | |
# cdef double l1_reg = alpha * rho * n_samples | |
# cdef double l2_reg = alpha * (1.0 - rho) * n_samples | |
def enet_coordinate_descent(X, y, alpha, rho, warm_start=None, max_iter=MAX_ITER): | |
n_samples = X.shape[0] | |
l1_reg = alpha * rho * n_samples | |
l2_reg = alpha * (1.0 - rho) * n_samples | |
if warm_start is not None: | |
beta = warm_start | |
else: | |
beta = np.zeros(X.shape[1], dtype=np.float) | |
alpha = alpha * X.shape[0] | |
for _ in range(max_iter): | |
for i in range(X.shape[1]): | |
bb = beta.copy() | |
bb[i] = 0. | |
residual = np.dot(X[:, i], y - np.dot(X, bb).T) | |
beta[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \ | |
/ (np.dot(X[:, i], X[:, i]) + l2_reg) | |
return beta | |
def shrinkage(X, y, l1_reg, l2_reg, beta, active_set, max_iter): | |
bb = beta.copy() | |
for _ in range(max_iter): | |
for i in active_set: | |
bb[i] = 0 | |
residual = np.dot(X[:, i], y - np.dot(X, bb).T) | |
bb[i] = np.sign(residual) * np.fmax(np.abs(residual) - l1_reg, 0) \ | |
/ (np.dot(X[:, i], X[:, i]) + l2_reg) | |
return bb | |
def enet_path(X, y, alphas, rho, max_iter=MAX_ITER, verbose=False): | |
""" | |
The strategy is described in "Strong rules for discarding predictors in lasso-type problems" | |
alphas must be an increasing sequence of regularization parameters | |
WARNING: does not compute intercept | |
""" | |
beta = np.zeros((len(alphas), X.shape[1]), dtype=np.float) | |
alphas_scaled = np.array(alphas).copy() * X.shape[0] | |
n_samples = X.shape[0] | |
active_set = np.arange(X.shape[1]).tolist() | |
for k, a in enumerate(alphas_scaled): | |
if verbose: | |
print 'Current active set ', active_set | |
if k > 0: | |
# .. Strong rules for discarding predictors in enet-type .. | |
tmp = np.abs(np.dot(X.T, y - np.dot(X, beta[k - 1]))) | |
strong_active_set = tmp < rho * (2 * alphas_scaled[k] - alphas_scaled[k - 1]) | |
strong_active_set = np.where(strong_active_set)[0] | |
else: | |
strong_active_set = np.arange(X.shape[1]) | |
l1_reg = a * rho | |
l2_reg = a * (1.0 - rho) | |
# solve for the current active set | |
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter) | |
print "solve for the current active set " + str(beta) | |
# check KKT in the strong active set | |
kkt_violations = True | |
for i in strong_active_set: | |
tmp = np.dot(X[:, i], y - np.dot(X, beta[k])) | |
if beta[k, i] != 0 and not \ | |
np.allclose(tmp, np.abs(l1_reg + l2_reg * beta[k, i])): | |
active_set.append(i) | |
if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg + l2_reg * beta[k, i]): | |
active_set.append(i) | |
else: | |
# passed KKT for all variables in strong active set, we're done | |
active_set = np.where(beta[k] != 0)[0].tolist() | |
kkt_violations = False | |
if verbose: | |
print 'No KKT violations on active set' | |
# .. recompute with new active set | |
if kkt_violations: | |
if verbose: | |
print 'KKT violated on strong active set' | |
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter) | |
# .. check KKT on all predictors .. | |
kkt_violations = True | |
for i in range(X.shape[1]): | |
tmp = np.dot(X[:, i], y - np.dot(X, beta[k])) | |
if beta[k, i] != 0 and tmp != np.abs(l1_reg + l2_reg * beta[k, i]): | |
active_set.append(i) | |
if beta[k, i] == 0 and abs(tmp) >= np.abs(l1_reg + l2_reg * beta[k, i]): | |
active_set.append(i) | |
else: | |
# passed KKT for all variables, we're done | |
active_set = np.where(beta[k] != 0)[0].tolist() | |
kkt_violations = False | |
if kkt_violations: | |
if verbose: | |
print 'KKT violated on full active set' | |
beta[k] = shrinkage(X, y, l1_reg, l2_reg, beta[k], active_set, max_iter) | |
return beta | |
def check_kkt_lasso(xr, coef, penalty, tol=1e-3): | |
""" | |
Check KKT conditions for Lasso | |
xr : X'(y - X coef) | |
""" | |
nonzero = (coef != 0) | |
return np.all(np.abs(xr[nonzero] - np.sign(coef[nonzero]) * penalty) < tol) \ | |
and np.all(np.abs(xr[~nonzero] / penalty) <= 1) | |
if __name__ == '__main__': | |
np.random.seed(0) | |
from sklearn import datasets | |
diabetes = datasets.load_diabetes() | |
X = diabetes.data | |
y = diabetes.target | |
#print l1_coordinate_descent(X, y, .001) | |
print enet_path(X, y, np.linspace(.1, 2., 5), rho=0.5, verbose=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import strong_rule_enet as enet | |
import strong_rule_lasso as lasso | |
import numpy as np | |
from sklearn import datasets | |
from sklearn import linear_model | |
from numpy.testing import assert_array_almost_equal, assert_almost_equal, \ | |
assert_equal | |
from sklearn.datasets.samples_generator import make_regression | |
np.random.seed(0) | |
diabetes = datasets.load_diabetes() | |
X = diabetes.data | |
y = diabetes.target | |
MAX_ITER = 100 | |
def test_lasso_coordinate_descent(): | |
X, y = make_regression(n_samples=200, n_features=50, n_informative=5, | |
random_state=0) | |
clf = linear_model.Lasso(alpha=0.001, max_iter=MAX_ITER) | |
clf.fit(X, y) | |
coef = lasso.l1_coordinate_descent(X, y, .001, max_iter=MAX_ITER) | |
assert_array_almost_equal(coef, clf.coef_, 3) | |
def test_enet_coordinate_descent(): | |
MAX_ITER = 100 | |
X, y = make_regression(n_samples=200, n_features=50, n_informative=5, | |
random_state=0) | |
clf = linear_model.ElasticNet(alpha=0.001, rho=0.7, max_iter=MAX_ITER) | |
clf.fit(X, y) | |
coef = enet.enet_coordinate_descent(X, y, alpha=0.001, rho=0.7, max_iter=MAX_ITER) | |
assert_array_almost_equal(coef, clf.coef_, 3) | |
def test_lasso_path(): | |
MAX_ITER = 100 | |
X, y = make_regression(n_samples=50, n_features=200, n_informative=5, | |
random_state=0) | |
alphas = np.linspace(.1, 2., 5) | |
path_coefs = lasso.l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=True) | |
clf_1 = linear_model.Lasso(alpha=alphas[1], max_iter=MAX_ITER) | |
clf_1.fit(X, y) | |
clf_4 = linear_model.Lasso(alpha=alphas[4], max_iter=MAX_ITER) | |
clf_4.fit(X, y) | |
assert_array_almost_equal(path_coefs[1], clf_1.coef_, 1) | |
assert_array_almost_equal(path_coefs[4], clf_4.coef_, 1) | |
def test_enet_path_one_step(): | |
MAX_ITER = 100 | |
X, y = make_regression(n_samples=50, n_features=200, n_informative=5, | |
random_state=0) | |
rho = 0.9 | |
alphas = np.linspace(.1, 2., 5) | |
path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True) | |
clf_0 = linear_model.ElasticNet(alpha=alphas[0], rho=rho, max_iter=MAX_ITER) | |
clf_0.fit(X, y) | |
assert_array_almost_equal(path_coefs[0], clf_0.coef_, 2) | |
def test_enet_path(): | |
MAX_ITER = 100 | |
X, y = make_regression(n_samples=50, n_features=200, n_informative=5, | |
random_state=0) | |
rho = 0.9 | |
alphas = np.linspace(.1, 2., 5) | |
path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True) | |
clf_1 = linear_model.ElasticNet(alpha=alphas[1], rho=rho, max_iter=MAX_ITER) | |
clf_1.fit(X, y) | |
clf_4 = linear_model.ElasticNet(alpha=alphas[4], rho=rho, max_iter=MAX_ITER) | |
clf_4.fit(X, y) | |
assert_array_almost_equal(path_coefs[1], clf_1.coef_, 1) | |
assert_array_almost_equal(path_coefs[4], clf_4.coef_, 1) | |
def test_lasso_path_big_data(): | |
MAX_ITER = 100 | |
X, y = make_regression(n_samples=50, n_features=200, n_informative=5, | |
random_state=0) | |
alphas = np.linspace(.1, 2., 5) | |
path_coefs = lasso.l1_path(X, y, alphas, max_iter=MAX_ITER, verbose=True) | |
clf_1 = linear_model.Lasso(alpha=alphas[1], max_iter=MAX_ITER) | |
clf_1.fit(X, y) | |
clf_4 = linear_model.Lasso(alpha=alphas[4], max_iter=MAX_ITER) | |
clf_4.fit(X, y) | |
assert_array_almost_equal(path_coefs[1], clf_1.coef_, 2) | |
assert_array_almost_equal(path_coefs[4], clf_4.coef_, 2) | |
def test_enet_path_big_data(): | |
MAX_ITER = 100 | |
X, y = make_regression(n_samples=50, n_features=200, n_informative=5, | |
random_state=0) | |
rho = 0.9 | |
alphas = np.linspace(.1, 2., 5) | |
path_coefs = enet.enet_path(X, y, alphas, rho,max_iter=MAX_ITER, verbose=True) | |
clf_1 = linear_model.ElasticNet(alpha=alphas[1], rho=rho, max_iter=MAX_ITER) | |
clf_1.fit(X, y) | |
clf_4 = linear_model.ElasticNet(alpha=alphas[4], rho=rho, max_iter=MAX_ITER) | |
clf_4.fit(X, y) | |
assert_array_almost_equal(path_coefs[1], clf_1.coef_, 2) | |
assert_array_almost_equal(path_coefs[4], clf_4.coef_, 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment