Skip to content

Instantly share code, notes, and snippets.

@moxpower
Last active February 4, 2016 11:30
Show Gist options
  • Save moxpower/79c345d035f4551c6316 to your computer and use it in GitHub Desktop.
Save moxpower/79c345d035f4551c6316 to your computer and use it in GitHub Desktop.
jupyter notebook to test nbviewer
{
"cells": [
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# %load poi_id.py\n",
"#!/usr/bin/python\n",
"\n",
"import sys\n",
"import pickle\n",
"import pandas as pd\n",
"import numpy as np\n",
"from pandas.tools.plotting import scatter_matrix\n",
"sys.path.append(\"../tools/\")\n",
"from email_preprocess import preprocess\n",
"\n",
"\n",
"from feature_format import featureFormat, targetFeatureSplit\n",
"from tester import dump_classifier_and_data\n",
"\n",
"### Task 1: Select what features you'll use.\n",
"### features_list is a list of strings, each of which is a feature name.\n",
"### The first feature must be \"poi\".\n",
"features_list = ['poi','salary', 'exercised_stock_options','total_payments'] # You will need to use more features\n",
"\n",
"### Load the dictionary containing the dataset\n",
"with open(\"final_project_dataset.pkl\", \"r\") as data_file:\n",
" data_dict = pickle.load(data_file)\n",
"\n",
"### Task 2: Remove outliers\n",
"data_dict.pop(\"TOTAL\",0)\n",
"\n",
"### Task 3: Create new feature(s)\n",
"### Store to my_dataset for easy export below.\n",
"my_dataset = data_dict\n",
"\n",
"### Extract features and labels from dataset for local testing\n",
"data = featureFormat(my_dataset, features_list, sort_keys = False)\n",
"labels, features = targetFeatureSplit(data)\n",
"\n",
"### Task 4: Try a varity of classifiers\n",
"### Please name your classifier clf for easy export below.\n",
"### Note that if you want to do PCA or other multi-stage operations,\n",
"### you'll need to use Pipelines. For more info:\n",
"### http://scikit-learn.org/stable/modules/pipeline.html\n",
"\n",
"# Provided to give you a starting point. Try a variety of classifiers.\n",
"from sklearn.naive_bayes import GaussianNB\n",
"clf = GaussianNB()\n",
"\n",
"### Task 5: Tune your classifier to achieve better than .3 precision and recall \n",
"### using our testing script. Check the tester.py script in the final project\n",
"### folder for details on the evaluation method, especially the test_classifier\n",
"### function. Because of the small size of the dataset, the script uses\n",
"### stratified shuffle split cross validation. For more info: \n",
"### http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.StratifiedShuffleSplit.html\n",
"\n",
"# Example starting point. Try investigating other evaluation techniques!\n",
"from sklearn.cross_validation import train_test_split\n",
"features_train, features_test, labels_train, labels_test = \\\n",
" train_test_split(features, labels, test_size=0.3, random_state=42)\n",
"\n",
"### Task 6: Dump your classifier, dataset, and features_list so anyone can\n",
"### check your results. You do not need to change anything below, but make sure\n",
"### that the version of poi_id.py that you submit can be run on its own and\n",
"### generates the necessary .pkl files for validating your results.\n",
"\n",
"dump_classifier_and_data(clf, my_dataset, features_list)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"for f1, f2, f3 in features:\n",
" plt.scatter( f1,f2 )\n",
" plt.scatter( f1,f2 )\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"feat = pd.DataFrame(features, columns=features_list[1:])\n",
"scatter_matrix(feat, alpha=0.2, diagonal='hist')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>salary</th>\n",
" <th>exercised_stock_options</th>\n",
" <th>total_payments</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>139</td>\n",
" <td>139</td>\n",
" <td>139</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>192117</td>\n",
" <td>2150471</td>\n",
" <td>2340318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>197336</td>\n",
" <td>4865021</td>\n",
" <td>8994791</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>182110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>211844</td>\n",
" <td>651850</td>\n",
" <td>999356</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>272161</td>\n",
" <td>1741654</td>\n",
" <td>1991740</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1111258</td>\n",
" <td>34348384</td>\n",
" <td>103559793</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" salary exercised_stock_options total_payments\n",
"count 139 139 139\n",
"mean 192117 2150471 2340318\n",
"std 197336 4865021 8994791\n",
"min 0 0 0\n",
"25% 0 0 182110\n",
"50% 211844 651850 999356\n",
"75% 272161 1741654 1991740\n",
"max 1111258 34348384 103559793"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.round(feat.describe(),0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Visualize features:"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<script type=\"text/javascript\">\n",
"if ($(\"#dg-css\").length == 0){\n",
" $(\"head\").append([\n",
" \"<link href='https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//lib/slick.grid.css' rel='stylesheet'>\",\n",
" \"<link href='https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//lib/slick-default-theme.css' rel='stylesheet'>\",\n",
" \"<link href='http://cdnjs.cloudflare.com/ajax/libs/jqueryui/1.10.4/css/jquery-ui.min.css' rel='stylesheet'>\",\n",
" \"<link id='dg-css' href='https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//qgrid.css' rel='stylesheet'>\"\n",
" ]);\n",
"}\n",
"</script>\n",
"<div class='q-grid-container'>\n",
"<div id='69f331e6-ef52-455b-bfb4-013d61391ea2' class='q-grid'></div>\n",
"</div>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"var path_dictionary = {\n",
" jquery_drag: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//lib/jquery.event.drag-2.2\",\n",
" slick_core: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//lib/slick.core.2.2\",\n",
" slick_data_view: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//lib/slick.dataview.2.2\",\n",
" slick_check_box_column: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//lib/slick.checkboxselectcolumn\",\n",
" slick_row_selection_model: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//lib/slick.rowselectionmodel\",\n",
" slick_grid: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//lib/slick.grid.2.2\",\n",
" data_grid: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//qgrid\",\n",
" date_filter: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//qgrid.datefilter\",\n",
" slider_filter: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//qgrid.sliderfilter\",\n",
" text_filter: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//qgrid.textfilter\",\n",
" filter_base: \"https://cdn.rawgit.com/quantopian/qgrid/72d356cb123fab413dba73ec46616e4916fbd827/qgrid/qgridjs//qgrid.filterbase\",\n",
" handlebars: \"https://cdnjs.cloudflare.com/ajax/libs/handlebars.js/2.0.0/handlebars.min\"\n",
"};\n",
"\n",
"var existing_config = require.s.contexts._.config;\n",
"if (!existing_config.paths['underscore']){\n",
" path_dictionary['underscore'] = \"https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.7.0/underscore-min\";\n",
"}\n",
"\n",
"if (!existing_config.paths['moment']){\n",
" path_dictionary['moment'] = \"https://cdnjs.cloudflare.com/ajax/libs/moment.js/2.8.3/moment.min\";\n",
"}\n",
"\n",
"if (!existing_config.paths['jqueryui']){\n",
" path_dictionary['jqueryui'] = \"https://ajax.googleapis.com/ajax/libs/jqueryui/1.11.1/jquery-ui.min\";\n",
"}\n",
"\n",
"require.config({\n",
" paths: path_dictionary\n",
"});\n",
"\n",
"if (typeof jQuery === 'function') {\n",
" define('jquery', function() { return jQuery; });\n",
"}\n",
"\n",
"require([\n",
" 'jquery',\n",
" 'jquery_drag',\n",
" 'slick_core',\n",
" 'slick_data_view'\n",
"],\n",
"function($){\n",
" $('#69f331e6-ef52-455b-bfb4-013d61391ea2').closest('.rendered_html').removeClass('rendered_html');\n",
" require([\n",
" 'slick_check_box_column',\n",
" 'slick_row_selection_model',\n",
" 'slick_grid'\n",
" ], function(){\n",
" require([\"data_grid\"], function(dgrid){\n",
" var grid = new dgrid.QGrid('#69f331e6-ef52-455b-bfb4-013d61391ea2', [{\"null\":0,\"salary\":365788.0,\"exercised_stock_options\":0.0,\"total_payments\":1061827.0},{\"null\":1,\"salary\":267102.0,\"exercised_stock_options\":6680544.0,\"total_payments\":5634343.0},{\"null\":2,\"salary\":170941.0,\"exercised_stock_options\":4890344.0,\"total_payments\":211725.0},{\"null\":3,\"salary\":0.0,\"exercised_stock_options\":651850.0,\"total_payments\":0.0},{\"null\":4,\"salary\":243293.0,\"exercised_stock_options\":5538001.0,\"total_payments\":288682.0},{\"null\":5,\"salary\":267093.0,\"exercised_stock_options\":0.0,\"total_payments\":628522.0},{\"null\":6,\"salary\":0.0,\"exercised_stock_options\":493489.0,\"total_payments\":1848227.0},{\"null\":7,\"salary\":370448.0,\"exercised_stock_options\":1104054.0,\"total_payments\":4099771.0},{\"null\":8,\"salary\":0.0,\"exercised_stock_options\":5210569.0,\"total_payments\":3131860.0},{\"null\":9,\"salary\":197091.0,\"exercised_stock_options\":880290.0,\"total_payments\":1737629.0},{\"null\":10,\"salary\":130724.0,\"exercised_stock_options\":2282768.0,\"total_payments\":3100224.0},{\"null\":11,\"salary\":288589.0,\"exercised_stock_options\":0.0,\"total_payments\":1130461.0},{\"null\":12,\"salary\":248546.0,\"exercised_stock_options\":765313.0,\"total_payments\":2014835.0},{\"null\":13,\"salary\":257486.0,\"exercised_stock_options\":0.0,\"total_payments\":1545059.0},{\"null\":14,\"salary\":0.0,\"exercised_stock_options\":2218275.0,\"total_payments\":966522.0},{\"null\":15,\"salary\":0.0,\"exercised_stock_options\":372205.0,\"total_payments\":0.0},{\"null\":16,\"salary\":288542.0,\"exercised_stock_options\":0.0,\"total_payments\":1490344.0},{\"null\":17,\"salary\":251654.0,\"exercised_stock_options\":1056320.0,\"total_payments\":3202070.0},{\"null\":18,\"salary\":288558.0,\"exercised_stock_options\":185063.0,\"total_payments\":551174.0},{\"null\":19,\"salary\":63744.0,\"exercised_stock_options\":0.0,\"total_payments\":762135.0},{\"null\":20,\"salary\":0.0,\"exercised_stock_options\":1030329.0,\"total_payments\":87410.0},{\"null\":21,\"salary\":357091.0,\"exercised_stock_options\":4346544.0,\"total_payments\":1798780.0},{\"null\":22,\"salary\":271442.0,\"exercised_stock_options\":81042.0,\"total_payments\":3471141.0},{\"null\":23,\"salary\":0.0,\"exercised_stock_options\":3285.0,\"total_payments\":102500.0},{\"null\":24,\"salary\":304110.0,\"exercised_stock_options\":1441898.0,\"total_payments\":3038702.0},{\"null\":25,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":189583.0},{\"null\":26,\"salary\":187922.0,\"exercised_stock_options\":0.0,\"total_payments\":618850.0},{\"null\":27,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":228656.0},{\"null\":28,\"salary\":213625.0,\"exercised_stock_options\":1465734.0,\"total_payments\":2047593.0},{\"null\":29,\"salary\":249201.0,\"exercised_stock_options\":1635238.0,\"total_payments\":1099100.0},{\"null\":30,\"salary\":0.0,\"exercised_stock_options\":98718.0,\"total_payments\":0.0},{\"null\":31,\"salary\":231330.0,\"exercised_stock_options\":0.0,\"total_payments\":969068.0},{\"null\":32,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":475.0},{\"null\":33,\"salary\":182245.0,\"exercised_stock_options\":601438.0,\"total_payments\":2692324.0},{\"null\":34,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":148.0},{\"null\":35,\"salary\":211788.0,\"exercised_stock_options\":0.0,\"total_payments\":2081796.0},{\"null\":36,\"salary\":0.0,\"exercised_stock_options\":850477.0,\"total_payments\":0.0},{\"null\":37,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":7961.0},{\"null\":38,\"salary\":0.0,\"exercised_stock_options\":664375.0,\"total_payments\":0.0},{\"null\":39,\"salary\":224305.0,\"exercised_stock_options\":0.0,\"total_payments\":2652612.0},{\"null\":40,\"salary\":273746.0,\"exercised_stock_options\":0.0,\"total_payments\":2664228.0},{\"null\":41,\"salary\":339288.0,\"exercised_stock_options\":4158995.0,\"total_payments\":10425757.0},{\"null\":42,\"salary\":216582.0,\"exercised_stock_options\":1624396.0,\"total_payments\":228474.0},{\"null\":43,\"salary\":210500.0,\"exercised_stock_options\":2027865.0,\"total_payments\":1204583.0},{\"null\":44,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":213071.0},{\"null\":45,\"salary\":272880.0,\"exercised_stock_options\":436515.0,\"total_payments\":2146973.0},{\"null\":46,\"salary\":477.0,\"exercised_stock_options\":4046157.0,\"total_payments\":916197.0},{\"null\":47,\"salary\":0.0,\"exercised_stock_options\":371750.0,\"total_payments\":77492.0},{\"null\":48,\"salary\":269076.0,\"exercised_stock_options\":607837.0,\"total_payments\":1057548.0},{\"null\":49,\"salary\":428780.0,\"exercised_stock_options\":1835558.0,\"total_payments\":4335388.0},{\"null\":50,\"salary\":211844.0,\"exercised_stock_options\":1624396.0,\"total_payments\":2003885.0},{\"null\":51,\"salary\":0.0,\"exercised_stock_options\":412878.0,\"total_payments\":87492.0},{\"null\":52,\"salary\":206121.0,\"exercised_stock_options\":17378.0,\"total_payments\":1208649.0},{\"null\":53,\"salary\":174246.0,\"exercised_stock_options\":0.0,\"total_payments\":704896.0},{\"null\":54,\"salary\":510364.0,\"exercised_stock_options\":3282960.0,\"total_payments\":4677574.0},{\"null\":55,\"salary\":365038.0,\"exercised_stock_options\":1623010.0,\"total_payments\":2101364.0},{\"null\":56,\"salary\":365163.0,\"exercised_stock_options\":2291113.0,\"total_payments\":4747979.0},{\"null\":57,\"salary\":162779.0,\"exercised_stock_options\":1362375.0,\"total_payments\":999356.0},{\"null\":58,\"salary\":0.0,\"exercised_stock_options\":139130.0,\"total_payments\":0.0},{\"null\":59,\"salary\":236457.0,\"exercised_stock_options\":2549361.0,\"total_payments\":875889.0},{\"null\":60,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":2151.0},{\"null\":61,\"salary\":1072321.0,\"exercised_stock_options\":34348384.0,\"total_payments\":103559793.0},{\"null\":62,\"salary\":261516.0,\"exercised_stock_options\":0.0,\"total_payments\":1271582.0},{\"null\":63,\"salary\":329078.0,\"exercised_stock_options\":1637034.0,\"total_payments\":1321557.0},{\"null\":64,\"salary\":0.0,\"exercised_stock_options\":757301.0,\"total_payments\":0.0},{\"null\":65,\"salary\":184899.0,\"exercised_stock_options\":0.0,\"total_payments\":807956.0},{\"null\":66,\"salary\":192008.0,\"exercised_stock_options\":83237.0,\"total_payments\":900585.0},{\"null\":67,\"salary\":263413.0,\"exercised_stock_options\":506765.0,\"total_payments\":1318763.0},{\"null\":68,\"salary\":262663.0,\"exercised_stock_options\":187500.0,\"total_payments\":1297461.0},{\"null\":69,\"salary\":0.0,\"exercised_stock_options\":759557.0,\"total_payments\":0.0},{\"null\":70,\"salary\":0.0,\"exercised_stock_options\":1753766.0,\"total_payments\":0.0},{\"null\":71,\"salary\":374125.0,\"exercised_stock_options\":608750.0,\"total_payments\":3859065.0},{\"null\":72,\"salary\":278601.0,\"exercised_stock_options\":0.0,\"total_payments\":2669589.0},{\"null\":73,\"salary\":0.0,\"exercised_stock_options\":9803.0,\"total_payments\":0.0},{\"null\":74,\"salary\":199157.0,\"exercised_stock_options\":664461.0,\"total_payments\":1414857.0},{\"null\":75,\"salary\":0.0,\"exercised_stock_options\":59539.0,\"total_payments\":0.0},{\"null\":76,\"salary\":96840.0,\"exercised_stock_options\":7509039.0,\"total_payments\":111529.0},{\"null\":77,\"salary\":80818.0,\"exercised_stock_options\":1599641.0,\"total_payments\":860136.0},{\"null\":78,\"salary\":213999.0,\"exercised_stock_options\":953136.0,\"total_payments\":5501630.0},{\"null\":79,\"salary\":210692.0,\"exercised_stock_options\":1451869.0,\"total_payments\":2093263.0},{\"null\":80,\"salary\":222093.0,\"exercised_stock_options\":4452476.0,\"total_payments\":911453.0},{\"null\":81,\"salary\":440698.0,\"exercised_stock_options\":0.0,\"total_payments\":2424083.0},{\"null\":82,\"salary\":0.0,\"exercised_stock_options\":343434.0,\"total_payments\":181755.0},{\"null\":83,\"salary\":240189.0,\"exercised_stock_options\":0.0,\"total_payments\":1639297.0},{\"null\":84,\"salary\":420636.0,\"exercised_stock_options\":19794175.0,\"total_payments\":505050.0},{\"null\":85,\"salary\":275101.0,\"exercised_stock_options\":850010.0,\"total_payments\":1086821.0},{\"null\":86,\"salary\":314288.0,\"exercised_stock_options\":117551.0,\"total_payments\":1101393.0},{\"null\":87,\"salary\":94941.0,\"exercised_stock_options\":5266578.0,\"total_payments\":249787.0},{\"null\":88,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":27942.0},{\"null\":89,\"salary\":239502.0,\"exercised_stock_options\":0.0,\"total_payments\":1112087.0},{\"null\":90,\"salary\":1111258.0,\"exercised_stock_options\":19250000.0,\"total_payments\":8682716.0},{\"null\":91,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":1279.0},{\"null\":92,\"salary\":0.0,\"exercised_stock_options\":1426469.0,\"total_payments\":0.0},{\"null\":93,\"salary\":0.0,\"exercised_stock_options\":886231.0,\"total_payments\":564348.0},{\"null\":94,\"salary\":6615.0,\"exercised_stock_options\":0.0,\"total_payments\":1146658.0},{\"null\":95,\"salary\":655037.0,\"exercised_stock_options\":28798.0,\"total_payments\":1386690.0},{\"null\":96,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":362096.0},{\"null\":97,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":774401.0},{\"null\":98,\"salary\":404338.0,\"exercised_stock_options\":2022048.0,\"total_payments\":1747522.0},{\"null\":99,\"salary\":0.0,\"exercised_stock_options\":1324578.0,\"total_payments\":0.0},{\"null\":100,\"salary\":259996.0,\"exercised_stock_options\":1668260.0,\"total_payments\":1034395.0},{\"null\":101,\"salary\":317543.0,\"exercised_stock_options\":1297049.0,\"total_payments\":1934359.0},{\"null\":102,\"salary\":0.0,\"exercised_stock_options\":5127155.0,\"total_payments\":0.0},{\"null\":103,\"salary\":201955.0,\"exercised_stock_options\":1729541.0,\"total_payments\":4484442.0},{\"null\":104,\"salary\":248146.0,\"exercised_stock_options\":281073.0,\"total_payments\":1576511.0},{\"null\":105,\"salary\":0.0,\"exercised_stock_options\":431750.0,\"total_payments\":83750.0},{\"null\":106,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":84992.0},{\"null\":107,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":49288.0},{\"null\":108,\"salary\":0.0,\"exercised_stock_options\":257817.0,\"total_payments\":182466.0},{\"null\":109,\"salary\":0.0,\"exercised_stock_options\":754966.0,\"total_payments\":0.0},{\"null\":110,\"salary\":76399.0,\"exercised_stock_options\":4160672.0,\"total_payments\":394475.0},{\"null\":111,\"salary\":262788.0,\"exercised_stock_options\":8191755.0,\"total_payments\":2368151.0},{\"null\":112,\"salary\":0.0,\"exercised_stock_options\":2604490.0,\"total_payments\":15456290.0},{\"null\":113,\"salary\":261809.0,\"exercised_stock_options\":0.0,\"total_payments\":477557.0},{\"null\":114,\"salary\":248017.0,\"exercised_stock_options\":825464.0,\"total_payments\":1054637.0},{\"null\":115,\"salary\":0.0,\"exercised_stock_options\":192758.0,\"total_payments\":55097.0},{\"null\":116,\"salary\":229284.0,\"exercised_stock_options\":400478.0,\"total_payments\":812194.0},{\"null\":117,\"salary\":231946.0,\"exercised_stock_options\":636246.0,\"total_payments\":1566469.0},{\"null\":118,\"salary\":221003.0,\"exercised_stock_options\":0.0,\"total_payments\":319941.0},{\"null\":119,\"salary\":158403.0,\"exercised_stock_options\":8308552.0,\"total_payments\":360300.0},{\"null\":120,\"salary\":0.0,\"exercised_stock_options\":30766064.0,\"total_payments\":91093.0},{\"null\":121,\"salary\":250100.0,\"exercised_stock_options\":1550019.0,\"total_payments\":1410464.0},{\"null\":122,\"salary\":492375.0,\"exercised_stock_options\":8831913.0,\"total_payments\":550981.0},{\"null\":123,\"salary\":1060932.0,\"exercised_stock_options\":10433518.0,\"total_payments\":17252530.0},{\"null\":124,\"salary\":261879.0,\"exercised_stock_options\":15364167.0,\"total_payments\":3123383.0},{\"null\":125,\"salary\":239671.0,\"exercised_stock_options\":0.0,\"total_payments\":827696.0},{\"null\":126,\"salary\":0.0,\"exercised_stock_options\":176378.0,\"total_payments\":50591.0},{\"null\":127,\"salary\":304588.0,\"exercised_stock_options\":940257.0,\"total_payments\":3676340.0},{\"null\":128,\"salary\":309946.0,\"exercised_stock_options\":671737.0,\"total_payments\":1587421.0},{\"null\":129,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":3750.0},{\"null\":130,\"salary\":85274.0,\"exercised_stock_options\":2165172.0,\"total_payments\":1979596.0},{\"null\":131,\"salary\":247338.0,\"exercised_stock_options\":591250.0,\"total_payments\":399393.0},{\"null\":132,\"salary\":349487.0,\"exercised_stock_options\":2070306.0,\"total_payments\":8407016.0},{\"null\":133,\"salary\":330546.0,\"exercised_stock_options\":2542813.0,\"total_payments\":2355702.0},{\"null\":134,\"salary\":0.0,\"exercised_stock_options\":0.0,\"total_payments\":119292.0},{\"null\":135,\"salary\":415189.0,\"exercised_stock_options\":0.0,\"total_payments\":1868758.0},{\"null\":136,\"salary\":265214.0,\"exercised_stock_options\":3181250.0,\"total_payments\":1092663.0},{\"null\":137,\"salary\":278601.0,\"exercised_stock_options\":765920.0,\"total_payments\":875760.0},{\"null\":138,\"salary\":274975.0,\"exercised_stock_options\":384728.0,\"total_payments\":1272284.0}], [{\"field\": null, \"type\": \"Integer\"}, {\"field\": \"salary\", \"type\": \"Float\"}, {\"field\": \"exercised_stock_options\", \"type\": \"Float\"}, {\"field\": \"total_payments\", \"type\": \"Float\"}]);\n",
" grid.initialize_slick_grid({\"fullWidthRows\": true, \"rowHeight\": 28, \"enableColumnReorder\": false, \"enableTextSelectionOnCells\": true, \"syncColumnCellResize\": true, \"forceFitColumns\": true});\n",
" });\n",
" });\n",
"});\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import qgrid\n",
"qgrid.nbinstall(overwrite=True)\n",
"qgrid.set_defaults(remote_js=True, precision=4)\n",
"qgrid.show_grid(feat)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>365788</td>\n",
" <td>0</td>\n",
" <td>1061827</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>267102</td>\n",
" <td>6680544</td>\n",
" <td>5634343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>170941</td>\n",
" <td>4890344</td>\n",
" <td>211725</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>651850</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>243293</td>\n",
" <td>5538001</td>\n",
" <td>288682</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>267093</td>\n",
" <td>0</td>\n",
" <td>628522</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0</td>\n",
" <td>493489</td>\n",
" <td>1848227</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>370448</td>\n",
" <td>1104054</td>\n",
" <td>4099771</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0</td>\n",
" <td>5210569</td>\n",
" <td>3131860</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>197091</td>\n",
" <td>880290</td>\n",
" <td>1737629</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>130724</td>\n",
" <td>2282768</td>\n",
" <td>3100224</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>288589</td>\n",
" <td>0</td>\n",
" <td>1130461</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>248546</td>\n",
" <td>765313</td>\n",
" <td>2014835</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>257486</td>\n",
" <td>0</td>\n",
" <td>1545059</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0</td>\n",
" <td>2218275</td>\n",
" <td>966522</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0</td>\n",
" <td>372205</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>288542</td>\n",
" <td>0</td>\n",
" <td>1490344</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>251654</td>\n",
" <td>1056320</td>\n",
" <td>3202070</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>288558</td>\n",
" <td>185063</td>\n",
" <td>551174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>63744</td>\n",
" <td>0</td>\n",
" <td>762135</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0</td>\n",
" <td>1030329</td>\n",
" <td>87410</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>357091</td>\n",
" <td>4346544</td>\n",
" <td>1798780</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>271442</td>\n",
" <td>81042</td>\n",
" <td>3471141</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0</td>\n",
" <td>3285</td>\n",
" <td>102500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>304110</td>\n",
" <td>1441898</td>\n",
" <td>3038702</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>189583</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>187922</td>\n",
" <td>0</td>\n",
" <td>618850</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>228656</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>213625</td>\n",
" <td>1465734</td>\n",
" <td>2047593</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>249201</td>\n",
" <td>1635238</td>\n",
" <td>1099100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109</th>\n",
" <td>0</td>\n",
" <td>754966</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>76399</td>\n",
" <td>4160672</td>\n",
" <td>394475</td>\n",
" </tr>\n",
" <tr>\n",
" <th>111</th>\n",
" <td>262788</td>\n",
" <td>8191755</td>\n",
" <td>2368151</td>\n",
" </tr>\n",
" <tr>\n",
" <th>112</th>\n",
" <td>0</td>\n",
" <td>2604490</td>\n",
" <td>15456290</td>\n",
" </tr>\n",
" <tr>\n",
" <th>113</th>\n",
" <td>261809</td>\n",
" <td>0</td>\n",
" <td>477557</td>\n",
" </tr>\n",
" <tr>\n",
" <th>114</th>\n",
" <td>248017</td>\n",
" <td>825464</td>\n",
" <td>1054637</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>0</td>\n",
" <td>192758</td>\n",
" <td>55097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>229284</td>\n",
" <td>400478</td>\n",
" <td>812194</td>\n",
" </tr>\n",
" <tr>\n",
" <th>117</th>\n",
" <td>231946</td>\n",
" <td>636246</td>\n",
" <td>1566469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>221003</td>\n",
" <td>0</td>\n",
" <td>319941</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>158403</td>\n",
" <td>8308552</td>\n",
" <td>360300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>120</th>\n",
" <td>0</td>\n",
" <td>30766064</td>\n",
" <td>91093</td>\n",
" </tr>\n",
" <tr>\n",
" <th>121</th>\n",
" <td>250100</td>\n",
" <td>1550019</td>\n",
" <td>1410464</td>\n",
" </tr>\n",
" <tr>\n",
" <th>122</th>\n",
" <td>492375</td>\n",
" <td>8831913</td>\n",
" <td>550981</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123</th>\n",
" <td>1060932</td>\n",
" <td>10433518</td>\n",
" <td>17252530</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124</th>\n",
" <td>261879</td>\n",
" <td>15364167</td>\n",
" <td>3123383</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>239671</td>\n",
" <td>0</td>\n",
" <td>827696</td>\n",
" </tr>\n",
" <tr>\n",
" <th>126</th>\n",
" <td>0</td>\n",
" <td>176378</td>\n",
" <td>50591</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>304588</td>\n",
" <td>940257</td>\n",
" <td>3676340</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>309946</td>\n",
" <td>671737</td>\n",
" <td>1587421</td>\n",
" </tr>\n",
" <tr>\n",
" <th>129</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>130</th>\n",
" <td>85274</td>\n",
" <td>2165172</td>\n",
" <td>1979596</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>247338</td>\n",
" <td>591250</td>\n",
" <td>399393</td>\n",
" </tr>\n",
" <tr>\n",
" <th>132</th>\n",
" <td>349487</td>\n",
" <td>2070306</td>\n",
" <td>8407016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133</th>\n",
" <td>330546</td>\n",
" <td>2542813</td>\n",
" <td>2355702</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>119292</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>415189</td>\n",
" <td>0</td>\n",
" <td>1868758</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>265214</td>\n",
" <td>3181250</td>\n",
" <td>1092663</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>278601</td>\n",
" <td>765920</td>\n",
" <td>875760</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>274975</td>\n",
" <td>384728</td>\n",
" <td>1272284</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>139 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2\n",
"0 365788 0 1061827\n",
"1 267102 6680544 5634343\n",
"2 170941 4890344 211725\n",
"3 0 651850 0\n",
"4 243293 5538001 288682\n",
"5 267093 0 628522\n",
"6 0 493489 1848227\n",
"7 370448 1104054 4099771\n",
"8 0 5210569 3131860\n",
"9 197091 880290 1737629\n",
"10 130724 2282768 3100224\n",
"11 288589 0 1130461\n",
"12 248546 765313 2014835\n",
"13 257486 0 1545059\n",
"14 0 2218275 966522\n",
"15 0 372205 0\n",
"16 288542 0 1490344\n",
"17 251654 1056320 3202070\n",
"18 288558 185063 551174\n",
"19 63744 0 762135\n",
"20 0 1030329 87410\n",
"21 357091 4346544 1798780\n",
"22 271442 81042 3471141\n",
"23 0 3285 102500\n",
"24 304110 1441898 3038702\n",
"25 0 0 189583\n",
"26 187922 0 618850\n",
"27 0 0 228656\n",
"28 213625 1465734 2047593\n",
"29 249201 1635238 1099100\n",
".. ... ... ...\n",
"109 0 754966 0\n",
"110 76399 4160672 394475\n",
"111 262788 8191755 2368151\n",
"112 0 2604490 15456290\n",
"113 261809 0 477557\n",
"114 248017 825464 1054637\n",
"115 0 192758 55097\n",
"116 229284 400478 812194\n",
"117 231946 636246 1566469\n",
"118 221003 0 319941\n",
"119 158403 8308552 360300\n",
"120 0 30766064 91093\n",
"121 250100 1550019 1410464\n",
"122 492375 8831913 550981\n",
"123 1060932 10433518 17252530\n",
"124 261879 15364167 3123383\n",
"125 239671 0 827696\n",
"126 0 176378 50591\n",
"127 304588 940257 3676340\n",
"128 309946 671737 1587421\n",
"129 0 0 3750\n",
"130 85274 2165172 1979596\n",
"131 247338 591250 399393\n",
"132 349487 2070306 8407016\n",
"133 330546 2542813 2355702\n",
"134 0 0 119292\n",
"135 415189 0 1868758\n",
"136 265214 3181250 1092663\n",
"137 278601 765920 875760\n",
"138 274975 384728 1272284\n",
"\n",
"[139 rows x 3 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(features)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"features=np.array(features)"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"sal = [x for count, x in enumerate(features[:,0]) if x > 2000000]"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "IndexError",
"evalue": "list index out of range",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-132-4e4bcd9c691f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmy_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'salary'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0msal\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'salary'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mIndexError\u001b[0m: list index out of range"
]
}
],
"source": [
"for key, item in my_dataset.items():\n",
" if item['salary'] == sal[0]:\n",
" print item['salary'], key"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "KeyError",
"evalue": "'TOTAL'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-133-eaba9dcb8a27>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"TOTAL\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m: 'TOTAL'"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment