Skip to content

Instantly share code, notes, and snippets.

@cqcn1991
Created May 3, 2015 04:22
Show Gist options
  • Save cqcn1991/a330a5eaa7e9621e56fb to your computer and use it in GitHub Desktop.
Save cqcn1991/a330a5eaa7e9621e56fb to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
{
"metadata": {
"name": "",
"signature": "sha256:9685c5d0f663be78b3bccac808e2fbc1c80e6546f15a0451c07ba555efc96e8b"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import sklearn\n",
"import sklearn.tree\n",
"import sklearn.datasets\n",
"import sklearn.preprocessing\n",
"import sklearn.decomposition\n",
"import sklearn.ensemble\n",
"\n",
"import urllib2\n",
"import bs4\n",
"import itertools\n",
"\n",
"import pandas as pd\n",
"import pandas.tools.plotting \n",
"\n",
"import numpy as np\n",
"\n",
"import matplotlib\n",
"from matplotlib.colors import ListedColormap\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import seaborn as sns\n",
"sns.set_style(\"white\")\n",
"\n",
"cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF', '#000000'])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 142
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'\n",
"url = 'abalone.csv'\n",
"columns = ['sex','length','diameter','height','wholeWeight','shuckedWeight','visceraWeight','shellWeight','rings']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 95
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data_orig = pd.io.parsers.read_csv(url, header=None, names=columns)\n",
"data_orig.head(3)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sex</th>\n",
" <th>length</th>\n",
" <th>diameter</th>\n",
" <th>height</th>\n",
" <th>wholeWeight</th>\n",
" <th>shuckedWeight</th>\n",
" <th>visceraWeight</th>\n",
" <th>shellWeight</th>\n",
" <th>rings</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> M</td>\n",
" <td> 0.455</td>\n",
" <td> 0.365</td>\n",
" <td> 0.095</td>\n",
" <td> 0.5140</td>\n",
" <td> 0.2245</td>\n",
" <td> 0.1010</td>\n",
" <td> 0.15</td>\n",
" <td> 15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> M</td>\n",
" <td> 0.350</td>\n",
" <td> 0.265</td>\n",
" <td> 0.090</td>\n",
" <td> 0.2255</td>\n",
" <td> 0.0995</td>\n",
" <td> 0.0485</td>\n",
" <td> 0.07</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> F</td>\n",
" <td> 0.530</td>\n",
" <td> 0.420</td>\n",
" <td> 0.135</td>\n",
" <td> 0.6770</td>\n",
" <td> 0.2565</td>\n",
" <td> 0.1415</td>\n",
" <td> 0.21</td>\n",
" <td> 9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 165,
"text": [
" sex length diameter height wholeWeight shuckedWeight visceraWeight \\\n",
"0 M 0.455 0.365 0.095 0.5140 0.2245 0.1010 \n",
"1 M 0.350 0.265 0.090 0.2255 0.0995 0.0485 \n",
"2 F 0.530 0.420 0.135 0.6770 0.2565 0.1415 \n",
"\n",
" shellWeight rings \n",
"0 0.15 15 \n",
"1 0.07 7 \n",
"2 0.21 9 "
]
}
],
"prompt_number": 165
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"## need to convert the label\n",
"#0:infant, 1:female, 2:male\n",
"def parse_label(label):\n",
" options = {'I': 0, 'F': 1, 'M': 2}\n",
" return options[label]\n",
"\n",
"data['sex'] = data_orig['sex'].map(parse_label)\n",
"data.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sex</th>\n",
" <th>length</th>\n",
" <th>diameter</th>\n",
" <th>height</th>\n",
" <th>wholeWeight</th>\n",
" <th>shuckedWeight</th>\n",
" <th>visceraWeight</th>\n",
" <th>shellWeight</th>\n",
" <th>rings</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 2</td>\n",
" <td> 0.455</td>\n",
" <td> 0.365</td>\n",
" <td> 0.095</td>\n",
" <td> 0.5140</td>\n",
" <td> 0.2245</td>\n",
" <td> 0.1010</td>\n",
" <td> 0.150</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 2</td>\n",
" <td> 0.350</td>\n",
" <td> 0.265</td>\n",
" <td> 0.090</td>\n",
" <td> 0.2255</td>\n",
" <td> 0.0995</td>\n",
" <td> 0.0485</td>\n",
" <td> 0.070</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 1</td>\n",
" <td> 0.530</td>\n",
" <td> 0.420</td>\n",
" <td> 0.135</td>\n",
" <td> 0.6770</td>\n",
" <td> 0.2565</td>\n",
" <td> 0.1415</td>\n",
" <td> 0.210</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 2</td>\n",
" <td> 0.440</td>\n",
" <td> 0.365</td>\n",
" <td> 0.125</td>\n",
" <td> 0.5160</td>\n",
" <td> 0.2155</td>\n",
" <td> 0.1140</td>\n",
" <td> 0.155</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 0</td>\n",
" <td> 0.330</td>\n",
" <td> 0.255</td>\n",
" <td> 0.080</td>\n",
" <td> 0.2050</td>\n",
" <td> 0.0895</td>\n",
" <td> 0.0395</td>\n",
" <td> 0.055</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 166,
"text": [
" sex length diameter height wholeWeight shuckedWeight visceraWeight \\\n",
"0 2 0.455 0.365 0.095 0.5140 0.2245 0.1010 \n",
"1 2 0.350 0.265 0.090 0.2255 0.0995 0.0485 \n",
"2 1 0.530 0.420 0.135 0.6770 0.2565 0.1415 \n",
"3 2 0.440 0.365 0.125 0.5160 0.2155 0.1140 \n",
"4 0 0.330 0.255 0.080 0.2050 0.0895 0.0395 \n",
"\n",
" shellWeight rings \n",
"0 0.150 3 \n",
"1 0.070 0 \n",
"2 0.210 1 \n",
"3 0.155 2 \n",
"4 0.055 0 "
]
}
],
"prompt_number": 166
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sns.pairplot(data, hue='sex')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 98,
"text": [
"<seaborn.axisgrid.PairGrid at 0x40a13898>"
]
},
{
"metadata": {},
"output_type": "display_data",
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment