Created
August 28, 2016 22:18
-
-
Save TDahlberg/f0ac84468003e66aa3f302d17b776cdf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"from geopy.geocoders import GoogleV3\n", | |
"import json\n", | |
"import googlemaps" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 105, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Add the data using pandas and check that it's working correctly\n", | |
"data = pd.read_excel('/Users/tylerdahlberg/projects/profdev/chantal_geocoding/Mod TRACKSmapping Allentown.xlsx',sheetname='Location Information')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 106, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(366, 14)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"0 NaN\n", | |
"1 NaN\n", | |
"2 NaN\n", | |
"3 5th & Linden, Allentown, PA\n", | |
"4 NaN\n", | |
"Name: gcaddress, dtype: object" | |
] | |
}, | |
"execution_count": 106, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Create a merged address field\n", | |
"data['gcaddress'] = data['CrossStreet A'] + \" & \" + data['CrossStreet B'] + \", \" + data['City'] + \", \" + data['State (eg PA)']\n", | |
"print(data.shape)\n", | |
"data['gcaddress'].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(108, 14)" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"'''# Drop rows with missing addresse values\n", | |
"cleandata = data.dropna(subset=['gcaddress'])\n", | |
"lessdata = cleandata.groupby(['gcaddress']).head(1)\n", | |
"lessdata.shape'''" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Geocode using googlemaps" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 107, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#### Test using google maps\n", | |
"#gmaps.geocode('1716 Mount Vernon St Philadelphia PA')[0]['geometry']['location']['lat']\n", | |
"#[0]['']['lat']\n", | |
"gmaps = googlemaps.Client(key='',queries_per_second=50)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 108, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"data['lat'] = ''\n", | |
"data['lon'] = ''" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 109, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Geocoding complete\n" | |
] | |
} | |
], | |
"source": [ | |
"data['lat'] = data['gcaddress'].apply(gmaps.geocode).apply(lambda x: (x[0]['geometry']['location']['lat']))\n", | |
"data['lon'] = data['gcaddress'].apply(gmaps.geocode).apply(lambda x: (x[0]['geometry']['location']['lng']))\n", | |
"print('Geocoding complete')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 110, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Item_Number</th>\n", | |
" <th>CrossStreet A Question Name</th>\n", | |
" <th>CrossStreet A</th>\n", | |
" <th>CrossStreet B Question Name</th>\n", | |
" <th>CrossStreet B</th>\n", | |
" <th>Distance Descriptor</th>\n", | |
" <th>City</th>\n", | |
" <th>State (eg PA)</th>\n", | |
" <th>Zipcode</th>\n", | |
" <th>Item 1D (Participant ID)</th>\n", | |
" <th>Optional: Exact Street Address</th>\n", | |
" <th>Community</th>\n", | |
" <th>Notes</th>\n", | |
" <th>gcaddress</th>\n", | |
" <th>lat</th>\n", | |
" <th>lon</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Item_6</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.5 to 1</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>PA</td>\n", | |
" <td>18102</td>\n", | |
" <td>Allen316c2</td>\n", | |
" <td>101 Tilghman St</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>Elias Market \"Syrian place\"?</td>\n", | |
" <td>NaN</td>\n", | |
" <td>18.775632</td>\n", | |
" <td>100.773042</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Item_4</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1 to 2</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>PA</td>\n", | |
" <td>18102</td>\n", | |
" <td>Allen324s7</td>\n", | |
" <td>106 N 17th Street</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>\"William\" school (William Allen HS?)</td>\n", | |
" <td>NaN</td>\n", | |
" <td>18.775632</td>\n", | |
" <td>100.773042</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Item_3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1 to 2</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>PA</td>\n", | |
" <td>18102</td>\n", | |
" <td>Allen316c5</td>\n", | |
" <td>106 N 17th Street</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>William Allen High School</td>\n", | |
" <td>NaN</td>\n", | |
" <td>18.775632</td>\n", | |
" <td>100.773042</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Item_11</td>\n", | |
" <td>Pantry_1a</td>\n", | |
" <td>5th</td>\n", | |
" <td>Pantry_1b</td>\n", | |
" <td>Linden</td>\n", | |
" <td>0.5 to 1</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>PA</td>\n", | |
" <td>18102</td>\n", | |
" <td>Allen324c4</td>\n", | |
" <td>108 N 5th St</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>Grace Episcopal Church</td>\n", | |
" <td>5th & Linden, Allentown, PA</td>\n", | |
" <td>40.604685</td>\n", | |
" <td>-75.468577</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Item_11</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.5 to 1</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>PA</td>\n", | |
" <td>18102</td>\n", | |
" <td>Allen324s7</td>\n", | |
" <td>112 N 5th Street</td>\n", | |
" <td>Allentown</td>\n", | |
" <td>New Bethany Ministries</td>\n", | |
" <td>NaN</td>\n", | |
" <td>18.775632</td>\n", | |
" <td>100.773042</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Item_Number CrossStreet A Question Name CrossStreet A \\\n", | |
"0 Item_6 NaN NaN \n", | |
"1 Item_4 NaN NaN \n", | |
"2 Item_3 NaN NaN \n", | |
"3 Item_11 Pantry_1a 5th \n", | |
"4 Item_11 NaN NaN \n", | |
"\n", | |
" CrossStreet B Question Name CrossStreet B Distance Descriptor City \\\n", | |
"0 NaN NaN 0.5 to 1 Allentown \n", | |
"1 NaN NaN 1 to 2 Allentown \n", | |
"2 NaN NaN 1 to 2 Allentown \n", | |
"3 Pantry_1b Linden 0.5 to 1 Allentown \n", | |
"4 NaN NaN 0.5 to 1 Allentown \n", | |
"\n", | |
" State (eg PA) Zipcode Item 1D (Participant ID) \\\n", | |
"0 PA 18102 Allen316c2 \n", | |
"1 PA 18102 Allen324s7 \n", | |
"2 PA 18102 Allen316c5 \n", | |
"3 PA 18102 Allen324c4 \n", | |
"4 PA 18102 Allen324s7 \n", | |
"\n", | |
" Optional: Exact Street Address Community \\\n", | |
"0 101 Tilghman St Allentown \n", | |
"1 106 N 17th Street Allentown \n", | |
"2 106 N 17th Street Allentown \n", | |
"3 108 N 5th St Allentown \n", | |
"4 112 N 5th Street Allentown \n", | |
"\n", | |
" Notes gcaddress \\\n", | |
"0 Elias Market \"Syrian place\"? NaN \n", | |
"1 \"William\" school (William Allen HS?) NaN \n", | |
"2 William Allen High School NaN \n", | |
"3 Grace Episcopal Church 5th & Linden, Allentown, PA \n", | |
"4 New Bethany Ministries NaN \n", | |
"\n", | |
" lat lon \n", | |
"0 18.775632 100.773042 \n", | |
"1 18.775632 100.773042 \n", | |
"2 18.775632 100.773042 \n", | |
"3 40.604685 -75.468577 \n", | |
"4 18.775632 100.773042 " | |
] | |
}, | |
"execution_count": 110, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 111, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"data.to_csv('/Users/tylerdahlberg/projects/profdev/chantal_geocoding/geocoded/geocoded_Mod TRACKSmapping Allentown.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [Root]", | |
"language": "python", | |
"name": "Python [Root]" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment