Created
May 3, 2015 04:22
-
-
Save cqcn1991/a330a5eaa7e9621e56fb to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:9685c5d0f663be78b3bccac808e2fbc1c80e6546f15a0451c07ba555efc96e8b" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import sklearn\n", | |
"import sklearn.tree\n", | |
"import sklearn.datasets\n", | |
"import sklearn.preprocessing\n", | |
"import sklearn.decomposition\n", | |
"import sklearn.ensemble\n", | |
"\n", | |
"import urllib2\n", | |
"import bs4\n", | |
"import itertools\n", | |
"\n", | |
"import pandas as pd\n", | |
"import pandas.tools.plotting \n", | |
"\n", | |
"import numpy as np\n", | |
"\n", | |
"import matplotlib\n", | |
"from matplotlib.colors import ListedColormap\n", | |
"%matplotlib inline\n", | |
"import matplotlib.pyplot as plt\n", | |
"\n", | |
"import seaborn as sns\n", | |
"sns.set_style(\"white\")\n", | |
"\n", | |
"cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF', '#000000'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 142 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'\n", | |
"url = 'abalone.csv'\n", | |
"columns = ['sex','length','diameter','height','wholeWeight','shuckedWeight','visceraWeight','shellWeight','rings']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 95 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data_orig = pd.io.parsers.read_csv(url, header=None, names=columns)\n", | |
"data_orig.head(3)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sex</th>\n", | |
" <th>length</th>\n", | |
" <th>diameter</th>\n", | |
" <th>height</th>\n", | |
" <th>wholeWeight</th>\n", | |
" <th>shuckedWeight</th>\n", | |
" <th>visceraWeight</th>\n", | |
" <th>shellWeight</th>\n", | |
" <th>rings</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> M</td>\n", | |
" <td> 0.455</td>\n", | |
" <td> 0.365</td>\n", | |
" <td> 0.095</td>\n", | |
" <td> 0.5140</td>\n", | |
" <td> 0.2245</td>\n", | |
" <td> 0.1010</td>\n", | |
" <td> 0.15</td>\n", | |
" <td> 15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> M</td>\n", | |
" <td> 0.350</td>\n", | |
" <td> 0.265</td>\n", | |
" <td> 0.090</td>\n", | |
" <td> 0.2255</td>\n", | |
" <td> 0.0995</td>\n", | |
" <td> 0.0485</td>\n", | |
" <td> 0.07</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> F</td>\n", | |
" <td> 0.530</td>\n", | |
" <td> 0.420</td>\n", | |
" <td> 0.135</td>\n", | |
" <td> 0.6770</td>\n", | |
" <td> 0.2565</td>\n", | |
" <td> 0.1415</td>\n", | |
" <td> 0.21</td>\n", | |
" <td> 9</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 165, | |
"text": [ | |
" sex length diameter height wholeWeight shuckedWeight visceraWeight \\\n", | |
"0 M 0.455 0.365 0.095 0.5140 0.2245 0.1010 \n", | |
"1 M 0.350 0.265 0.090 0.2255 0.0995 0.0485 \n", | |
"2 F 0.530 0.420 0.135 0.6770 0.2565 0.1415 \n", | |
"\n", | |
" shellWeight rings \n", | |
"0 0.15 15 \n", | |
"1 0.07 7 \n", | |
"2 0.21 9 " | |
] | |
} | |
], | |
"prompt_number": 165 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"## need to convert the label\n", | |
"#0:infant, 1:female, 2:male\n", | |
"def parse_label(label):\n", | |
" options = {'I': 0, 'F': 1, 'M': 2}\n", | |
" return options[label]\n", | |
"\n", | |
"data['sex'] = data_orig['sex'].map(parse_label)\n", | |
"data.head()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sex</th>\n", | |
" <th>length</th>\n", | |
" <th>diameter</th>\n", | |
" <th>height</th>\n", | |
" <th>wholeWeight</th>\n", | |
" <th>shuckedWeight</th>\n", | |
" <th>visceraWeight</th>\n", | |
" <th>shellWeight</th>\n", | |
" <th>rings</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 2</td>\n", | |
" <td> 0.455</td>\n", | |
" <td> 0.365</td>\n", | |
" <td> 0.095</td>\n", | |
" <td> 0.5140</td>\n", | |
" <td> 0.2245</td>\n", | |
" <td> 0.1010</td>\n", | |
" <td> 0.150</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 2</td>\n", | |
" <td> 0.350</td>\n", | |
" <td> 0.265</td>\n", | |
" <td> 0.090</td>\n", | |
" <td> 0.2255</td>\n", | |
" <td> 0.0995</td>\n", | |
" <td> 0.0485</td>\n", | |
" <td> 0.070</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 1</td>\n", | |
" <td> 0.530</td>\n", | |
" <td> 0.420</td>\n", | |
" <td> 0.135</td>\n", | |
" <td> 0.6770</td>\n", | |
" <td> 0.2565</td>\n", | |
" <td> 0.1415</td>\n", | |
" <td> 0.210</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 2</td>\n", | |
" <td> 0.440</td>\n", | |
" <td> 0.365</td>\n", | |
" <td> 0.125</td>\n", | |
" <td> 0.5160</td>\n", | |
" <td> 0.2155</td>\n", | |
" <td> 0.1140</td>\n", | |
" <td> 0.155</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 0</td>\n", | |
" <td> 0.330</td>\n", | |
" <td> 0.255</td>\n", | |
" <td> 0.080</td>\n", | |
" <td> 0.2050</td>\n", | |
" <td> 0.0895</td>\n", | |
" <td> 0.0395</td>\n", | |
" <td> 0.055</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 166, | |
"text": [ | |
" sex length diameter height wholeWeight shuckedWeight visceraWeight \\\n", | |
"0 2 0.455 0.365 0.095 0.5140 0.2245 0.1010 \n", | |
"1 2 0.350 0.265 0.090 0.2255 0.0995 0.0485 \n", | |
"2 1 0.530 0.420 0.135 0.6770 0.2565 0.1415 \n", | |
"3 2 0.440 0.365 0.125 0.5160 0.2155 0.1140 \n", | |
"4 0 0.330 0.255 0.080 0.2050 0.0895 0.0395 \n", | |
"\n", | |
" shellWeight rings \n", | |
"0 0.150 3 \n", | |
"1 0.070 0 \n", | |
"2 0.210 1 \n", | |
"3 0.155 2 \n", | |
"4 0.055 0 " | |
] | |
} | |
], | |
"prompt_number": 166 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"sns.pairplot(data, hue='sex')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 98, | |
"text": [ | |
"<seaborn.axisgrid.PairGrid at 0x40a13898>" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "display_data", |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment