Last active
October 18, 2022 18:22
-
-
Save hancush/92fff02dbcc8d7a7c19963bb41d07615 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Geocoding template" | |
output: html_notebook | |
--- | |
# Instructions | |
- Upload your file to the data/ directory of this workspace by clicking Upload in the menu bar of the file browser in the lower right pane. Make note of the name of the column containing the addresses you want to geocode. | |
- Follow these instructions to get an API key for the Google Geocoding API in the Google Console: https://developers.google.com/maps/documentation/geocoding/cloud-setup | |
- N.b., You have to set up a billing account to use the Google Geocoding API. See details on pricing here: https://developers.google.com/maps/documentation/geocoding/usage-and-billing#pricing-for-product | |
- Run this notebook by clicking Run in the menu bar of the text editor in the upper left pane. You will be prompted to select your input file with a file picker, then prompted to provide the address column name and your API key in the R console (the bottom left pane). In the console, enter the value, then press Enter on your keyboard to submit. | |
- Once the script completes, the file browser in the lower right pane should contain a file called geocodedAddressData.csv. To download the new data file to your computer, check the box next to it, then in the file picker menu bar, click More > Export... | |
```{r} | |
# Get user inputs | |
inputFile = file.choose(new = TRUE) | |
addressColumn = readline(prompt = "Enter the name of the column containing the address to geocode: ") | |
apiKey = readline(prompt = "Enter your Google Geocoding API key: ") | |
# Load and configure libraries | |
library(rlist) | |
library(ggmap) | |
register_google(apiKey) | |
# Read input file | |
addressData = read.csv(inputFile, stringsAsFactors = FALSE) | |
# Add columns for latitude, longitude, the geocoded address, and ZIP code | |
addressData$lat <- NA | |
addressData$lon <- NA | |
addressData$geoAddress <- NA | |
addressData$zip <- NA | |
for(i in 1:nrow(addressData)) | |
{ | |
address = addressData[[addressColumn]][i] | |
message('Geocoding ', address, '...') | |
# OPTIONAL: If you need to do any pre-processing of your address data, add those lines here, e.g., Remove .5 from address numbers | |
# address = gsub('\\.5\\b', '', address) | |
geocodingError = FALSE | |
tryCatch({ | |
result <- geocode(address, output = "all", source = "google") | |
}, error=function(condition) { | |
message('Could not geocode ', address) | |
message(condition) | |
geocodingError = TRUE | |
}) | |
# Skip addresses that cannot be geocoded (output should contain NA for | |
# geocoding columns for failed addresses). | |
if (geocodingError) next | |
addressData$lon[i] <- as.numeric(result$results[[1]]$geometry$location$lng) | |
addressData$lat[i] <- as.numeric(result$results[[1]]$geometry$location$lat) | |
addressData$geoAddress[i] <- as.character(result$results[[1]]$formatted_address) | |
tryCatch({ | |
# address_components is a list of objects representing each part of the parsed | |
# address. Extract the value from the object of type "postal_code" (i.e., ZIP). | |
# https://rdrr.io/cran/rlist/man/list.first.html | |
addressData$zip[i] <- list.first( | |
result$results[[1]]$address_components, 'postal_code' %in% types | |
)$short_name | |
}, error=function(condition) { | |
message('Response did not contain ZIP code', address) | |
}) | |
} | |
write.csv(addressData, "geocodedAddressData.csv", row.names = FALSE) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment