Created
August 24, 2020 06:04
-
-
Save FedericoV/98e7f2b70a94d64aa3521229100f41a2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import make_regression | |
from sklearn.linear_model import Lasso | |
import numpy as np | |
from scipy.optimize import approx_fprime, check_grad, minimize | |
X, y, w_true = make_regression(n_samples=200, n_features=500, random_state=0, coef=True) | |
w_init = np.ones_like(w_true) | |
lam = 10 | |
def l(w, X, y, lam=10): | |
return 0.5*np.mean((np.dot(X, w) - y)**2)+ lam * np.sum(np.abs(w)) | |
def l_grad(w, X, y, lam=10): | |
residuals = np.dot(X, w) - y | |
n = len(X) | |
return 1/n * np.dot(X.T, residuals) + lam * np.sign(w) | |
# Check I didn't screw up defining subgradient: should be equal when far from zero: | |
print (check_grad(l, l_grad, w_init, X, y)) | |
# Subgradient should break when near w is near zero | |
print (check_grad(l, l_grad, np.zeros_like(w_true), X, y)) | |
# Subgradient should be fine when lambda is zero even near 0 | |
print (check_grad(l, l_grad, np.zeros_like(w_true), X, y, 0)) | |
# Lasso recovers a sparse solution: | |
lasso_model = Lasso(alpha=lam) | |
lasso_model.fit(X, y) | |
print (lasso_model.coef_) # Actually sparse | |
# Optimizing subgradient naively does not: | |
res = minimize(l, w_init, args=(X, y, lam), jac=l_grad) | |
print (res["x"]) # the zero coefficients are very close to zero but not quite zero, which messes with backprop - it's even worse with float32 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment