Last active
July 24, 2018 22:51
-
-
Save benadaba/d569cc99fca2d4485ea66f576abc955c to your computer and use it in GitHub Desktop.
Predict Income Level Using Microsoft Azure And Consume The Model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 163, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>customerID</th>\n", | |
" <th>age</th>\n", | |
" <th>workclass</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education</th>\n", | |
" <th>education-num</th>\n", | |
" <th>marital-status</th>\n", | |
" <th>occupation</th>\n", | |
" <th>relationship</th>\n", | |
" <th>race</th>\n", | |
" <th>sex</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" <th>native-country</th>\n", | |
" <th>income</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>11020</td>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>243424</td>\n", | |
" <td>54</td>\n", | |
" <td>32</td>\n", | |
" <td>ghana</td>\n", | |
" <td>334434</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>11021</td>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>567</td>\n", | |
" <td>54</td>\n", | |
" <td>67</td>\n", | |
" <td>germany</td>\n", | |
" <td>334435</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>11022</td>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>45446</td>\n", | |
" <td>54</td>\n", | |
" <td>43</td>\n", | |
" <td>germany</td>\n", | |
" <td>334436</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>11023</td>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>76786</td>\n", | |
" <td>54</td>\n", | |
" <td>67</td>\n", | |
" <td>germany</td>\n", | |
" <td>334437</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>11024</td>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>243424</td>\n", | |
" <td>54</td>\n", | |
" <td>12</td>\n", | |
" <td>germany</td>\n", | |
" <td>334438</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" customerID age workclass fnlwgt education education-num \\\n", | |
"0 11020 45 1 3 1 3 \n", | |
"1 11021 45 1 3 1 3 \n", | |
"2 11022 45 1 3 1 3 \n", | |
"3 11023 45 1 3 1 3 \n", | |
"4 11024 45 1 3 1 3 \n", | |
"\n", | |
" marital-status occupation relationship race sex capital-gain \\\n", | |
"0 1 1 0 black male 243424 \n", | |
"1 1 1 0 black male 567 \n", | |
"2 1 1 0 black male 45446 \n", | |
"3 1 1 0 black male 76786 \n", | |
"4 1 1 0 black male 243424 \n", | |
"\n", | |
" capital-loss hours-per-week native-country income \n", | |
"0 54 32 ghana 334434 \n", | |
"1 54 67 germany 334435 \n", | |
"2 54 43 germany 334436 \n", | |
"3 54 67 germany 334437 \n", | |
"4 54 12 germany 334438 " | |
] | |
}, | |
"execution_count": 163, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#import the relevant files\n", | |
"import pandas as pd\n", | |
"import json\n", | |
"import re\n", | |
"import numpy as np\n", | |
"\n", | |
"\n", | |
"sample_data_original = pd.read_csv(\"sample_data.csv\")\n", | |
"sample_data_original.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>workclass</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education</th>\n", | |
" <th>education-num</th>\n", | |
" <th>marital-status</th>\n", | |
" <th>occupation</th>\n", | |
" <th>relationship</th>\n", | |
" <th>race</th>\n", | |
" <th>sex</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" <th>native-country</th>\n", | |
" <th>income</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>243424</td>\n", | |
" <td>54</td>\n", | |
" <td>32</td>\n", | |
" <td>ghana</td>\n", | |
" <td>334434</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>567</td>\n", | |
" <td>54</td>\n", | |
" <td>67</td>\n", | |
" <td>germany</td>\n", | |
" <td>334435</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>45446</td>\n", | |
" <td>54</td>\n", | |
" <td>43</td>\n", | |
" <td>germany</td>\n", | |
" <td>334436</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>76786</td>\n", | |
" <td>54</td>\n", | |
" <td>67</td>\n", | |
" <td>germany</td>\n", | |
" <td>334437</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>45</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>black</td>\n", | |
" <td>male</td>\n", | |
" <td>243424</td>\n", | |
" <td>54</td>\n", | |
" <td>12</td>\n", | |
" <td>germany</td>\n", | |
" <td>334438</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" age workclass fnlwgt education education-num marital-status \\\n", | |
"0 45 1 3 1 3 1 \n", | |
"1 45 1 3 1 3 1 \n", | |
"2 45 1 3 1 3 1 \n", | |
"3 45 1 3 1 3 1 \n", | |
"4 45 1 3 1 3 1 \n", | |
"\n", | |
" occupation relationship race sex capital-gain capital-loss \\\n", | |
"0 1 0 black male 243424 54 \n", | |
"1 1 0 black male 567 54 \n", | |
"2 1 0 black male 45446 54 \n", | |
"3 1 0 black male 76786 54 \n", | |
"4 1 0 black male 243424 54 \n", | |
"\n", | |
" hours-per-week native-country income \n", | |
"0 32 ghana 334434 \n", | |
"1 67 germany 334435 \n", | |
"2 43 germany 334436 \n", | |
"3 67 germany 334437 \n", | |
"4 12 germany 334438 " | |
] | |
}, | |
"execution_count": 164, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# lets drop the customer ID column as that is not needed in the model. But before that, \n", | |
"#lets take a copy of the first 10 records which have the CustomerIDs available\n", | |
"sample_data_10 = sample_data_original.head(10)\n", | |
"sample_data = sample_data_original.drop(['customerID'], axis=1)\n", | |
"sample_data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 165, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#write dataframe to json\n", | |
"with open('temp.json', 'w') as f:\n", | |
" f.write(sample_data.head(10).to_json(orient='records'))\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 166, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{u'age': 45,\n", | |
" u'capital-gain': 243424,\n", | |
" u'capital-loss': 54,\n", | |
" u'education': 1,\n", | |
" u'education-num': 3,\n", | |
" u'fnlwgt': 3,\n", | |
" u'hours-per-week': 32,\n", | |
" u'income': 334434,\n", | |
" u'marital-status': 1,\n", | |
" u'native-country': u'ghana',\n", | |
" u'occupation': 1,\n", | |
" u'race': u'black',\n", | |
" u'relationship': 0,\n", | |
" u'sex': u'male',\n", | |
" u'workclass': 1},\n", | |
" {u'age': 45,\n", | |
" u'capital-gain': 567,\n", | |
" u'capital-loss': 54,\n", | |
" u'education': 1,\n", | |
" u'education-num': 3,\n", | |
" u'fnlwgt': 3,\n", | |
" u'hours-per-week': 67,\n", | |
" u'income': 334435,\n", | |
" u'marital-status': 1,\n", | |
" u'native-country': u'germany',\n", | |
" u'occupation': 1,\n", | |
" u'race': u'black',\n", | |
" u'relationship': 0,\n", | |
" u'sex': u'male',\n", | |
" u'workclass': 1}]" | |
] | |
}, | |
"execution_count": 166, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#load the json\n", | |
"with open('temp.json') as f:\n", | |
" json_data = json.load(f)\n", | |
"\n", | |
"#preview json data\n", | |
"json_data[0:2]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 168, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{\"Results\":{\"output1\":[{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"243424\",\"capital-loss\":\"54\",\"hours-per-week\":\"32\",\"native-country\":\"ghana\",\"income\":\"334434\",\"Scored Labels\":\">50K\",\"Scored Probabilities\":\"1\"},{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"567\",\"capital-loss\":\"54\",\"hours-per-week\":\"67\",\"native-country\":\"germany\",\"income\":\"334435\",\"Scored Labels\":\"<=50K\",\"Scored Probabilities\":\"0\"},{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"45446\",\"capital-loss\":\"54\",\"hours-per-week\":\"43\",\"native-country\":\"germany\",\"income\":\"334436\",\"Scored Labels\":\">50K\",\"Scored Probabilities\":\"1\"},{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"76786\",\"capital-loss\":\"54\",\"hours-per-week\":\"67\",\"native-country\":\"germany\",\"income\":\"334437\",\"Scored Labels\":\">50K\",\"Scored Probabilities\":\"1\"},{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"243424\",\"capital-loss\":\"54\",\"hours-per-week\":\"12\",\"native-country\":\"germany\",\"income\":\"334438\",\"Scored Labels\":\">50K\",\"Scored Probabilities\":\"1\"},{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"556\",\"capital-loss\":\"54\",\"hours-per-week\":\"67\",\"native-country\":\"germany\",\"income\":\"334439\",\"Scored Labels\":\"<=50K\",\"Scored Probabilities\":\"0\"},{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"234\",\"capital-loss\":\"54\",\"hours-per-week\":\"11\",\"native-country\":\"germany\",\"income\":\"334440\",\"Scored Labels\":\"<=50K\",\"Scored Probabilities\":\"0\"},{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"645\",\"capital-loss\":\"54\",\"hours-per-week\":\"67\",\"native-country\":\"germany\",\"income\":\"334441\",\"Scored Labels\":\"<=50K\",\"Scored Probabilities\":\"0\"},{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"645\",\"capital-loss\":\"54\",\"hours-per-week\":\"67\",\"native-country\":\"germany\",\"income\":\"334442\",\"Scored Labels\":\"<=50K\",\"Scored Probabilities\":\"0\"},{\"age\":\"45\",\"workclass\":\"1\",\"education-num\":\"3\",\"marital-status\":\"1\",\"occupation\":\"1\",\"relationship\":\"0\",\"race\":\"black\",\"sex\":\"male\",\"capital-gain\":\"645\",\"capital-loss\":\"54\",\"hours-per-week\":\"67\",\"native-country\":\"germany\",\"income\":\"334443\",\"Scored Labels\":\"<=50K\",\"Scored Probabilities\":\"0\"}]}}\n" | |
] | |
} | |
], | |
"source": [ | |
"#let's consume the Built model from Azure\n", | |
"import urllib2\n", | |
"import json\n", | |
"\n", | |
"\n", | |
"data = {\n", | |
" \"Inputs\": {\n", | |
" \"input1\":json_data # supply our json as the input array\n", | |
" ,\n", | |
" },\n", | |
" \"GlobalParameters\": {\n", | |
" }\n", | |
"}\n", | |
"\n", | |
"body = str.encode(json.dumps(data))\n", | |
"\n", | |
"url = '<your-api-uri>'\n", | |
"api_key = '<your-api-key>' # Replace this with the API key for the web service\n", | |
"headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}\n", | |
"\n", | |
"req = urllib2.Request(url, body, headers)\n", | |
"\n", | |
"try:\n", | |
" response = urllib2.urlopen(req)\n", | |
"\n", | |
" result = response.read()\n", | |
" print(result)\n", | |
"except urllib2.HTTPError, error:\n", | |
" print(\"The request failed with status code: \" + str(error.code))\n", | |
"\n", | |
" # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure\n", | |
" print(error.info())\n", | |
" print(json.loads(error.read())) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 169, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# define a function to extract the predicted values\n", | |
"def get_predictions(results_set):\n", | |
" split_data = result.split(\"\\\"age\\\"\")\n", | |
" predictions = [] # list to hold predictions\n", | |
" for index in range(1, len(split_data)):\n", | |
" outcome = re.search('\\\"Scored Labels\\\":\\\"(.*)\\\"\\,\\\"Scored', split_data[index])\n", | |
" pred = outcome.group(1)\n", | |
" predictions.append(pred)\n", | |
" return predictions\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 170, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['>50K',\n", | |
" '<=50K',\n", | |
" '>50K',\n", | |
" '>50K',\n", | |
" '>50K',\n", | |
" '<=50K',\n", | |
" '<=50K',\n", | |
" '<=50K',\n", | |
" '<=50K',\n", | |
" '<=50K']" | |
] | |
}, | |
"execution_count": 170, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"predictions= get_predictions(result)\n", | |
"predictions" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 171, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Predictions</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>>50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>>50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>>50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>>50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Predictions\n", | |
"0 >50K\n", | |
"1 <=50K\n", | |
"2 >50K\n", | |
"3 >50K\n", | |
"4 >50K\n", | |
"5 <=50K\n", | |
"6 <=50K\n", | |
"7 <=50K\n", | |
"8 <=50K\n", | |
"9 <=50K" | |
] | |
}, | |
"execution_count": 171, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#convert list to dataframe and rename column\n", | |
"preds = pd.DataFrame(predictions)\n", | |
"preds.columns = ['Predictions'] #rename column\n", | |
"preds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 172, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>customerID</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>11020</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>11021</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>11022</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>11023</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>11024</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>11025</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>11026</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>11027</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>11028</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>11029</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" customerID\n", | |
"0 11020\n", | |
"1 11021\n", | |
"2 11022\n", | |
"3 11023\n", | |
"4 11024\n", | |
"5 11025\n", | |
"6 11026\n", | |
"7 11027\n", | |
"8 11028\n", | |
"9 11029" | |
] | |
}, | |
"execution_count": 172, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#get only the customer ids\n", | |
"CustomerIDs = sample_data_10.iloc[:,:1]\n", | |
"CustomerIDs\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 173, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>customerID</th>\n", | |
" <th>Predictions</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>11020</td>\n", | |
" <td>>50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>11021</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>11022</td>\n", | |
" <td>>50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>11023</td>\n", | |
" <td>>50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>11024</td>\n", | |
" <td>>50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>11025</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>11026</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>11027</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>11028</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>11029</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" customerID Predictions\n", | |
"0 11020 >50K\n", | |
"1 11021 <=50K\n", | |
"2 11022 >50K\n", | |
"3 11023 >50K\n", | |
"4 11024 >50K\n", | |
"5 11025 <=50K\n", | |
"6 11026 <=50K\n", | |
"7 11027 <=50K\n", | |
"8 11028 <=50K\n", | |
"9 11029 <=50K" | |
] | |
}, | |
"execution_count": 173, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#concatenate the CustomerIDs to the Predictions -column-wise\n", | |
"final_customer_preds = pd.concat([CustomerIDs, preds], axis=1)\n", | |
"final_customer_preds" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment