Created
August 11, 2012 16:27
-
-
Save ibayer/3325587 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Benchmarks of enet_coordinate_descent vs. enet_coordinate_descent | |
using the true solution as warm-start | |
First, we fix a training set and increase the number of | |
samples. Then we plot the computation time as function of | |
the number of samples. | |
In the second benchmark, we increase the number of dimensions of the | |
training set. Then we plot the computation time as function of | |
the number of dimensions. | |
In both cases, only 10% of the features are informative. | |
""" | |
import gc | |
from time import time | |
import numpy as np | |
from sklearn.datasets.samples_generator import make_regression | |
def compute_bench(alpha, rho, n_samples, n_features, precompute): | |
cold_start_results = [] | |
warm_start_results = [] | |
n_test_samples = 0 | |
it = 0 | |
for ns in n_samples: | |
for nf in n_features: | |
it += 1 | |
print '==================' | |
print 'Iteration %s of %s' % (it, max(len(n_samples), | |
len(n_features))) | |
print '==================' | |
n_informative = nf // 10 | |
X, y, coef_ = make_regression(n_samples=ns, n_features=nf, | |
n_informative=n_informative, | |
noise=0.1, coef=True) | |
X /= np.sqrt(np.sum(X ** 2, axis=0)) # Normalize data | |
l1_reg = alpha * rho * ns | |
l2_reg = alpha * (1.0 - rho) * ns | |
X = np.asfortranarray(X) | |
w = np.zeros(nf) | |
warm_start = coef_.copy() | |
gc.collect() | |
print "enet fit" | |
stime = time() | |
enet_coordinate_descent(w, l1_reg, l2_reg, | |
X, y, max_iter=10000, tol=1e-9, positive=False) | |
cold_start_results.append(time() - stime) | |
gc.collect() | |
print "warmstart with solution, enet fit" | |
stime = time() | |
enet_coordinate_descent(warm_start, l1_reg, l2_reg, | |
X, y, max_iter=10000, tol=1e-9, positive=False) | |
warm_start_results.append(time() - stime) | |
return cold_start_results, warm_start_results | |
if __name__ == '__main__': | |
from sklearn.linear_model.coordinate_descent import ElasticNet | |
from sklearn.linear_model.cd_fast import enet_coordinate_descent | |
import pylab as pl | |
alpha = 5.0 # regularization parameter | |
rho = 0.95 | |
n_features = 10 | |
list_n_samples = np.linspace(100, 1000000, 5).astype(np.int) | |
cold_start_results, warm_start_results = compute_bench(alpha, rho, | |
list_n_samples, [n_features], precompute=True) | |
pl.clf() | |
pl.subplot(211) | |
pl.plot(list_n_samples, cold_start_results, 'b-', | |
label='cold_start_results') | |
pl.plot(list_n_samples, warm_start_results, 'r-', | |
label='warm_start_results') | |
pl.title('Enet benchmark (%d features - alpha=%s)' % (n_features, alpha)) | |
pl.legend(loc='upper left') | |
pl.xlabel('number of samples') | |
pl.ylabel('time (in seconds)') | |
pl.axis('tight') | |
n_samples = 2000 | |
list_n_features = np.linspace(500, 3000, 5).astype(np.int) | |
cold_start_results, warm_start_results = compute_bench(alpha, rho, | |
[n_samples], list_n_features, precompute=False) | |
pl.subplot(212) | |
pl.plot(list_n_features, cold_start_results, 'b-', label='cold_start_results') | |
pl.plot(list_n_features, warm_start_results, 'r-', | |
label='warm_start_results') | |
pl.title('Enet benchmark (%d samples - alpha=%s)' % (n_samples, alpha)) | |
pl.legend(loc='upper left') | |
pl.xlabel('number of features') | |
pl.ylabel('time (in seconds)') | |
pl.axis('tight') | |
pl.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment