Created
April 11, 2016 19:44
-
-
Save aegorenkov/6a78fd0074664318a32b13c7ed0bc781 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>ri</th>\n", | |
| " <th>na</th>\n", | |
| " <th>mg</th>\n", | |
| " <th>al</th>\n", | |
| " <th>si</th>\n", | |
| " <th>k</th>\n", | |
| " <th>ca</th>\n", | |
| " <th>ba</th>\n", | |
| " <th>fe</th>\n", | |
| " <th>glass_type</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>id</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>22</th>\n", | |
| " <td>1.51966</td>\n", | |
| " <td>14.77</td>\n", | |
| " <td>3.75</td>\n", | |
| " <td>0.29</td>\n", | |
| " <td>72.02</td>\n", | |
| " <td>0.03</td>\n", | |
| " <td>9.00</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>185</th>\n", | |
| " <td>1.51115</td>\n", | |
| " <td>17.38</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>0.34</td>\n", | |
| " <td>75.41</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>6.65</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>40</th>\n", | |
| " <td>1.52213</td>\n", | |
| " <td>14.21</td>\n", | |
| " <td>3.82</td>\n", | |
| " <td>0.47</td>\n", | |
| " <td>71.77</td>\n", | |
| " <td>0.11</td>\n", | |
| " <td>9.57</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>39</th>\n", | |
| " <td>1.52213</td>\n", | |
| " <td>14.21</td>\n", | |
| " <td>3.82</td>\n", | |
| " <td>0.47</td>\n", | |
| " <td>71.77</td>\n", | |
| " <td>0.11</td>\n", | |
| " <td>9.57</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.00</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>51</th>\n", | |
| " <td>1.52320</td>\n", | |
| " <td>13.72</td>\n", | |
| " <td>3.72</td>\n", | |
| " <td>0.51</td>\n", | |
| " <td>71.75</td>\n", | |
| " <td>0.09</td>\n", | |
| " <td>10.06</td>\n", | |
| " <td>0</td>\n", | |
| " <td>0.16</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " ri na mg al si k ca ba fe glass_type\n", | |
| "id \n", | |
| "22 1.51966 14.77 3.75 0.29 72.02 0.03 9.00 0 0.00 1\n", | |
| "185 1.51115 17.38 0.00 0.34 75.41 0.00 6.65 0 0.00 6\n", | |
| "40 1.52213 14.21 3.82 0.47 71.77 0.11 9.57 0 0.00 1\n", | |
| "39 1.52213 14.21 3.82 0.47 71.77 0.11 9.57 0 0.00 1\n", | |
| "51 1.52320 13.72 3.72 0.51 71.75 0.09 10.06 0 0.16 1" | |
| ] | |
| }, | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "from sklearn.linear_model import LogisticRegression\n", | |
| "\n", | |
| "url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data'\n", | |
| "col_names = ['id','ri','na','mg','al','si','k','ca','ba','fe','glass_type']\n", | |
| "glass = pd.read_csv(url, names=col_names, index_col='id')\n", | |
| "glass.sort_values('al', inplace=True)\n", | |
| "glass.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "glass['household'] = glass.glass_type.map({1:0, 2:0, 3:0, 5:1, 6:1, 7:1})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "glass_normal = glass" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "glass_doubled = pd.concat([glass_normal, glass[glass.household == 1]], axis=0)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0 163\n", | |
| "1 51\n", | |
| "Name: household, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "glass_normal.household.value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0 163\n", | |
| "1 102\n", | |
| "Name: household, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "glass_doubled.household.value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[ 4.18040386]])" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Run normal regression on normal data\n", | |
| "\n", | |
| "logreg = LogisticRegression(C=1e9)\n", | |
| "feature_cols = ['al']\n", | |
| "X = glass_normal[feature_cols]\n", | |
| "y = glass_normal.household\n", | |
| "logreg.fit(X, y)\n", | |
| "logreg.coef_" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[ 3.85191349]])" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Run weighted regression on normal data\n", | |
| "\n", | |
| "logreg = LogisticRegression(C=1e9, class_weight={0:1, 1:2})\n", | |
| "feature_cols = ['al']\n", | |
| "X = glass_normal[feature_cols]\n", | |
| "y = glass_normal.household\n", | |
| "logreg.fit(X, y)\n", | |
| "logreg.coef_" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[ 3.85191349]])" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Run normal regression on oversampled data\n", | |
| "\n", | |
| "logreg = LogisticRegression(C=1e9)\n", | |
| "feature_cols = ['al']\n", | |
| "X = glass_doubled[feature_cols]\n", | |
| "y = glass_doubled.household\n", | |
| "logreg.fit(X, y)\n", | |
| "logreg.coef_" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.11" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment