Skip to content

Instantly share code, notes, and snippets.

@ibayer
Created August 3, 2012 14:07
Show Gist options
  • Save ibayer/3247966 to your computer and use it in GitHub Desktop.
Save ibayer/3247966 to your computer and use it in GitHub Desktop.
bench enet refactoring
"""
Benchmarks of refactored against current enet implementation
First, we fix a training set and increase the number of
samples. Then we plot the computation time as function of
the number of samples.
In the second benchmark, we increase the number of dimensions of the
training set. Then we plot the computation time as function of
the number of dimensions.
In both cases, only 10% of the features are informative.
"""
import gc
from time import time
import numpy as np
from sklearn.datasets.samples_generator import make_regression
from numpy.testing import assert_array_almost_equal, assert_almost_equal, assert_equal
def compute_bench(alpha, rho, n_samples, n_features, precompute):
enet_results = []
enet_ref_results = []
n_test_samples = 0
it = 0
max_iter=1000000
tol=1e-6
for ns in n_samples:
for nf in n_features:
it += 1
print '=================='
print 'Iteration %s of %s' % (it, max(len(n_samples),
len(n_features)))
print '=================='
n_informative = nf // 10
X, Y, coef_ = make_regression(n_samples=ns, n_features=nf,
n_informative=n_informative,
noise=0.1, coef=True)
X /= np.sqrt(np.sum(X ** 2, axis=0)) # Normalize data
X = np.asfortranarray(X)
l1_reg = alpha * rho * ns
l2_reg = alpha * (1.0 - rho) * ns
w = np.zeros(nf)
w_ref = np.zeros(nf)
gc.collect()
print "- benching Enet"
# clf = ElasticNet(alpha=alpha, rho=rho, fit_intercept=False,
# max_iter=max_iter, tol=tol, precompute=False)
tstart = time()
# clf.fit(X, Y)
coef, dual_gap, eps = \
enet_coordinate_descent(w, l1_reg, l2_reg,
X, Y, max_iter=max_iter, tol=tol, positive=False)
enet_results.append(time() - tstart)
gc.collect()
print "- benching refactored Enet"
tstart = time()
#clf.fit(X, Y)
coef_ref, gap_ref, tol = enet_cd(w_ref, l1_reg, l2_reg, X, Y, max_iter=max_iter, tol=tol, calc_dual_gap=True)
enet_ref_results.append(time() - tstart)
print "gap_ref: " + str(gap_ref) + ", dual_gap=" + str(dual_gap)
assert_almost_equal(gap_ref, dual_gap, 6)
return enet_results, enet_ref_results
if __name__ == '__main__':
from sklearn.linear_model import ElasticNet, enet_cd
from sklearn.linear_model.cd_fast import enet_coordinate_descent
import pylab as pl
alpha = 0.5 # regularization parameter
rho = 0.80
n_features = 10
list_n_samples = np.linspace(100, 1000000, 5).astype(np.int)
enet_results, enet_ref_results = compute_bench(alpha, rho, list_n_samples,
[n_features], precompute=True)
pl.clf()
pl.subplot(211)
pl.plot(list_n_samples, enet_results, 'b-',
label='enet ')
pl.plot(list_n_samples, enet_ref_results, 'r-',
label='refactored Enet')
pl.title('enet benchmark (%d features - alpha=%s, rho=%s)' % (n_features, alpha, rho))
pl.legend(loc='upper left')
pl.xlabel('number of samples')
pl.ylabel('time (in seconds)')
pl.axis('tight')
n_samples = 2000
list_n_features = np.linspace(500, 3000, 5).astype(np.int)
enet_results, enet_ref_results = compute_bench(alpha, rho, [n_samples],
list_n_features, precompute=False)
pl.subplot(212)
pl.plot(list_n_features, enet_results, 'b-', label='enet')
pl.plot(list_n_features, enet_ref_results, 'r-', label='refactored Enet')
pl.title('enet benchmark (%d samples - alpha=%s, rho=%s)' % (n_samples, alpha, rho))
pl.legend(loc='upper left')
pl.xlabel('number of features')
pl.ylabel('time (in seconds)')
pl.axis('tight')
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment