Skip to content

Instantly share code, notes, and snippets.

@bombol
Last active June 27, 2016 18:53
Show Gist options
  • Select an option

  • Save bombol/8e8f363f7000169a4e42d34b76126f68 to your computer and use it in GitHub Desktop.

Select an option

Save bombol/8e8f363f7000169a4e42d34b76126f68 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"\n",
"import numpy as np\n",
"import theano\n",
"import theano.tensor as T\n",
"import lasagne\n",
" \n",
"from sklearn.datasets import load_iris\n",
"from sklearn.cross_validation import train_test_split\n",
"\n",
"# Load and store features as X and targets as y\n",
"iris = load_iris()\n",
"X = iris.data\n",
"y = iris.target"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Here we set one of the conditionals to True if we wish to do binary classification"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# default is to do 3-class classification\n",
"y_ = y; X_ = X\n",
"nonlin = lasagne.nonlinearities.softmax \n",
"loss_fn = lasagne.objectives.categorical_crossentropy\n",
"units = 3\n",
"\n",
"# IF THIS IS TRUE, WE ELIMINATE THE CLASS WHERE Y=2 AND DO BINARY CLASSIFICATION\n",
"# WITH SIGMOID AND BINARY CROSS ENTROPY\n",
"if True:\n",
" y_ = y[y<2].reshape(-1,1)\n",
" X_ = X[y<2]\n",
" units = 1\n",
" nonlin = lasagne.nonlinearities.sigmoid\n",
" loss_fn = lasagne.objectives.binary_crossentropy\n",
" \n",
"# IF THIS IS TRUE, WE ELIMINATE THE CLASS WHERE Y=2 AND DO BINARY CLASSIFICATION\n",
"# WITH SOFTMAX AND CATEGORICAL CROSS ENTROPY\n",
"if False:\n",
" y_ = y[y<2]\n",
" X_ = X[y<2]\n",
" units = 2\n",
" #nonlin = lasagne.nonlinearities.softmax\n",
" #loss_fn = lasagne.objectives.categorical_crossentropy\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[0],\n",
" [1],\n",
" [0],\n",
" [1],\n",
" [0]])"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X_, y_, train_size=0.8)\n",
"X_val = X_test[:15]\n",
"y_val = y_test[:15]\n",
"X_test = X_test[15:]\n",
"y_test = y_test[15:]\n",
"\n",
"y_test # print y values to make sure they are the proper form"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def batch_gen(X, y, N):\n",
" while True:\n",
" idx = np.random.choice(len(y), N)\n",
" yield X[idx].astype('float32'), y[idx].astype('int32')"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"\n",
"# 4-dim vector on input\n",
"l_in = lasagne.layers.InputLayer((None, 4))\n",
"# 3-dim ivector on output\n",
"l_out = lasagne.layers.DenseLayer(l_in, num_units=units, nonlinearity=nonlin)\n",
" \n",
"X_sym = T.matrix('X')\n",
"y_sym = T.ivector('y')\n",
" \n",
"# Expression for the output distribution\n",
"output = lasagne.layers.get_output(l_out, X_sym)\n",
"pred = output.argmax(-1)\n",
" \n",
"# Loss function\n",
"loss = T.mean(loss_fn(output, y_sym))\n",
"acc = T.mean(T.eq(pred, y_sym))\n",
" \n",
"# We retrieve the parameters\n",
"params = lasagne.layers.get_all_params(l_out)\n",
" \n",
"# Compute the gradient of the loss function with respect to the parameters.\n",
"# The stochastic gradient descent updates the parameters\n",
"grad = T.grad(loss, params)\n",
"updates = lasagne.updates.sgd(grad, params, learning_rate=0.05)\n",
" \n",
"# Define a training function\n",
"f_train = theano.function([X_sym, y_sym], [loss, acc], updates=updates)\n",
" \n",
"# A validation function, similar but it doesn't alter the parameters\n",
"f_val = theano.function([X_sym, y_sym], [loss, acc])\n",
" \n",
"# Prediction function,\n",
"f_predict = theano.function([X_sym], pred)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Batch size choice and the number of batches per epoch\n",
"BATCH_SIZE = 5\n",
"N_BATCHES = len(X_train) // BATCH_SIZE\n",
"N_VAL_BATCHES = len(X_val) // BATCH_SIZE\n",
" \n",
"# Minibatch generators\n",
"train_batches = batch_gen(X_train, y_train, BATCH_SIZE)\n",
"val_batches = batch_gen(X_val, y_val, BATCH_SIZE)\n"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "TypeError",
"evalue": "('Bad input argument to theano function with name \"<ipython-input-111-ba0ce04c1bbb>:27\" at index 1 (0-based)', 'Wrong number of dimensions: expected 1, got 2 with shape (5, 1).')",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-113-1ed1b25fbf6c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mN_BATCHES\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnext\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_batches\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mloss\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0macc\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mf_train\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[0mtrain_loss\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mloss\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[0mtrain_acc\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0macc\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/syedtahin/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 819\u001b[0m s.storage[0] = s.type.filter(\n\u001b[0;32m 820\u001b[0m \u001b[0marg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstrict\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstrict\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 821\u001b[1;33m allow_downcast=s.allow_downcast)\n\u001b[0m\u001b[0;32m 822\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 823\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/syedtahin/anaconda2/lib/python2.7/site-packages/theano/tensor/type.pyc\u001b[0m in \u001b[0;36mfilter\u001b[1;34m(self, data, strict, allow_downcast)\u001b[0m\n\u001b[0;32m 176\u001b[0m raise TypeError(\"Wrong number of dimensions: expected %s,\"\n\u001b[0;32m 177\u001b[0m \" got %s with shape %s.\" % (self.ndim, data.ndim,\n\u001b[1;32m--> 178\u001b[1;33m data.shape))\n\u001b[0m\u001b[0;32m 179\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mflags\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maligned\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 180\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mTypeError\u001b[0m: ('Bad input argument to theano function with name \"<ipython-input-111-ba0ce04c1bbb>:27\" at index 1 (0-based)', 'Wrong number of dimensions: expected 1, got 2 with shape (5, 1).')"
]
}
],
"source": [
"\n",
"for epoch in range(10):\n",
" train_loss = 0\n",
" train_acc = 0\n",
" for _ in range(N_BATCHES):\n",
" X, y = next(train_batches)\n",
" loss, acc = f_train(X, y)\n",
" train_loss += loss\n",
" train_acc += acc\n",
" train_loss /= N_BATCHES\n",
" train_acc /= N_BATCHES\n",
" \n",
" val_loss = 0\n",
" val_acc = 0\n",
" \n",
" for _ in range(N_BATCHES):\n",
" X, y = next(train_batches)\n",
" loss, acc = f_val(X, y)\n",
" val_loss += loss\n",
" val_acc += acc\n",
" val_loss /= N_BATCHES\n",
" val_acc /= N_BATCHES\n",
" \n",
" print('Epoch {}, Train (val) loss {:.03f} ({:.03f}) ratio {:.03f}'.format(\n",
" epoch, train_loss, val_loss, val_loss/train_loss))\n",
" print('Train (val) accuracy {:.03f} ({:.03f})'.format(train_acc, val_acc))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### ORIGINAL COMPLETE EXAMPLE"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 0, Train (val) loss 0.827 (0.804) ratio 0.972\n",
"Train (val) accuracy 0.650 (0.692)\n",
"Epoch 1, Train (val) loss 0.518 (0.562) ratio 1.085\n",
"Train (val) accuracy 0.750 (0.667)\n",
"Epoch 2, Train (val) loss 0.641 (0.683) ratio 1.066\n",
"Train (val) accuracy 0.617 (0.708)\n",
"Epoch 3, Train (val) loss 0.630 (0.503) ratio 0.799\n",
"Train (val) accuracy 0.650 (0.650)\n",
"Epoch 4, Train (val) loss 0.513 (0.637) ratio 1.241\n",
"Train (val) accuracy 0.742 (0.700)\n",
"Epoch 5, Train (val) loss 0.509 (0.668) ratio 1.311\n",
"Train (val) accuracy 0.792 (0.700)\n",
"Epoch 6, Train (val) loss 0.497 (0.459) ratio 0.924\n",
"Train (val) accuracy 0.750 (0.667)\n",
"Epoch 7, Train (val) loss 0.567 (0.363) ratio 0.640\n",
"Train (val) accuracy 0.683 (0.908)\n",
"Epoch 8, Train (val) loss 0.450 (0.378) ratio 0.839\n",
"Train (val) accuracy 0.817 (0.767)\n",
"Epoch 9, Train (val) loss 0.481 (0.465) ratio 0.967\n",
"Train (val) accuracy 0.733 (0.633)\n"
]
}
],
"source": [
"\n",
"import numpy as np\n",
"import theano\n",
"import theano.tensor as T\n",
"import lasagne\n",
"\n",
"from sklearn.datasets import load_iris\n",
"from sklearn.cross_validation import train_test_split\n",
"\n",
"\n",
"def to_categorical(y, nb_classes=None):\n",
" y = np.asarray(y, dtype='int32')\n",
" if not nb_classes:\n",
" nb_classes = np.max(y)+1\n",
" Y = np.zeros((len(y), nb_classes))\n",
" for i in range(len(y)):\n",
" Y[i, y[i]] = 1.\n",
" return Y\n",
"\n",
"# Load and store features as X and targets as y\n",
"iris = load_iris()\n",
"X = iris.data\n",
"y = iris.target\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)\n",
"X_val = X_test[:15]\n",
"y_val = y_test[:15]\n",
"X_test = X_test[15:]\n",
"y_test = y_test[15:]\n",
"\n",
"# Categorize them for use in categorical cross-entropy\n",
"categorized_train = np.asarray(y_train, dtype='int32')\n",
"categorized_test = np.asarray(y_test, dtype='int32')\n",
"categorized_val = np.asarray(y_val, dtype='int32')\n",
"\n",
"def batch_gen(X, y, N):\n",
" while True:\n",
" idx = np.random.choice(len(y), N)\n",
" yield X[idx].astype('float32'), y[idx].astype('int32')\n",
"\n",
"\n",
"# 4-dim vector on input\n",
"l_in = lasagne.layers.InputLayer((None, 4))\n",
"# 3-dim ivector on output\n",
"l_out = lasagne.layers.DenseLayer(l_in, num_units=3, nonlinearity=lasagne.nonlinearities.softmax)\n",
"\n",
"X_sym = T.matrix('X')\n",
"y_sym = T.ivector('y')\n",
"\n",
"# Expression for the output distribution\n",
"output = lasagne.layers.get_output(l_out, X_sym)\n",
"pred = output.argmax(-1)\n",
"\n",
"# Loss function\n",
"loss = T.mean(lasagne.objectives.categorical_crossentropy(output, y_sym))\n",
"acc = T.mean(T.eq(pred, y_sym))\n",
"\n",
"# We retrieve the parameters\n",
"params = lasagne.layers.get_all_params(l_out)\n",
"\n",
"# Compute the gradient of the loss function with respect to the parameters.\n",
"# The stochastic gradient descent updates the parameters\n",
"grad = T.grad(loss, params)\n",
"updates = lasagne.updates.sgd(grad, params, learning_rate=0.05)\n",
"\n",
"# Define a training function\n",
"f_train = theano.function([X_sym, y_sym], [loss, acc], updates=updates)\n",
"\n",
"# A validation function, similar but it doesn't alter the parameters\n",
"f_val = theano.function([X_sym, y_sym], [loss, acc])\n",
"\n",
"# Prediction function,\n",
"f_predict = theano.function([X_sym], pred)\n",
"\n",
"# Batch size choice and the number of batches per epoch\n",
"BATCH_SIZE = 5\n",
"N_BATCHES = len(X_train) // BATCH_SIZE\n",
"N_VAL_BATCHES = len(X_val) // BATCH_SIZE\n",
"\n",
"# Minibatch generators\n",
"train_batches = batch_gen(X_train, categorized_train, BATCH_SIZE)\n",
"val_batches = batch_gen(X_val, categorized_val, BATCH_SIZE)\n",
"\n",
"for epoch in range(10):\n",
" train_loss = 0\n",
" train_acc = 0\n",
" for _ in range(N_BATCHES):\n",
" X, y = next(train_batches)\n",
" loss, acc = f_train(X, y)\n",
" train_loss += loss\n",
" train_acc += acc\n",
" train_loss /= N_BATCHES\n",
" train_acc /= N_BATCHES\n",
"\n",
" val_loss = 0\n",
" val_acc = 0\n",
"\n",
" for _ in range(N_BATCHES):\n",
" X, y = next(train_batches)\n",
" loss, acc = f_val(X, y)\n",
" val_loss += loss\n",
" val_acc += acc\n",
" val_loss /= N_BATCHES\n",
" val_acc /= N_BATCHES\n",
"\n",
" print('Epoch {}, Train (val) loss {:.03f} ({:.03f}) ratio {:.03f}'.format(\n",
" epoch, train_loss, val_loss, val_loss/train_loss))\n",
" print('Train (val) accuracy {:.03f} ({:.03f})'.format(train_acc, val_acc))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment