Created
October 31, 2019 21:53
-
-
Save mkearney/11d4a0d78421af7bdf8aea87b05d1629 to your computer and use it in GitHub Desktop.
Get all accounts followed by members of the U.S. Congress
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## load rtweet and congress116 | |
library(rtweet) | |
library(congress116) | |
## create long-version of congress116 data (and drop rows w/o screen names) | |
sns <- with(congress116, data.frame( | |
bioguide = c(bioguide, bioguide[!is.na(screen_name_personal)]), | |
handle = c(screen_name_official, screen_name_personal[!is.na(screen_name_personal)]), | |
stringsAsFactors = FALSE | |
)) | |
## function for: printing | |
print_status <- function(x, fds) { | |
cat(paste0("@", x, paste(rep(" ", 16 - nchar(x)), collapse = ""), ": ", | |
sprintf("%5d friends", NROW(fds))), fill = TRUE) | |
} | |
## function for: rate-limit and error managing version of rtweet::get_friends | |
get_friends2 <- function(...) { | |
rate_limit_sleep() | |
tryCatch(get_friends(..., token = .tkn), error = function(e) { | |
Sys.sleep(1) | |
get_friends(..., token = .tkn) | |
}) | |
} | |
## function for: error managing version of rtweet::rate_limit | |
rate_limit2 <- function(...) { | |
tryCatch(rate_limit(...), error = function(e) { | |
Sys.sleep(1) | |
rate_limit(...) | |
}) | |
} | |
## function for: sleep until rate limit reset | |
rate_limit_sleep <- function() { | |
is_bearable <- function() { | |
if (!exists(".bearable")) { | |
.bearable <<- grepl("read-write", rtweet:::api_access_level(get_token())) | |
} | |
.bearable | |
} | |
if (!exists(".tkn")) { | |
.tkn <<- get_token() | |
} | |
if (!exists(".rl_count")) { | |
.rl_count <<- 15 - rate_limit2("get_friends", token = .tkn)$remaining | |
.regtoken <<- TRUE | |
} | |
if (.rl_count < 14L) { | |
.rl_count <<- .rl_count + 1L | |
return(invisible()) | |
} | |
if (.regtoken && is_bearable()) { | |
.tkn <<- bearer_token() | |
.regtoken <<- FALSE | |
} else { | |
.tkn <<- get_token() | |
.regtoken <<- TRUE | |
} | |
rl <- rate_limit2("get_friends", token = .tkn) | |
.rl_count <<- 15 - rl$remaining | |
if (.rl_count < 15) { | |
return(invisible()) | |
} | |
s <- as.numeric(difftime(rl$reset_at, Sys.time(), units = "secs")) | |
if (s < 0) { | |
return(invisible()) | |
} | |
cat("Sleeping for about", round(s / 60, 2), "minutes...\n") | |
Sys.sleep(s + 1) | |
} | |
## initalize output vector | |
fds <- vector("list", nrow(sns)) | |
## for loop | |
for (i in seq_along(fds)) { | |
## get friends list – and extract next cursor (page) value | |
fds[[i]] <- get_friends2(sns$handle[i]) | |
np <- next_cursor(fds[[i]]) | |
## if user follows more than 5,000 accounts, make additional calls using np | |
while (length(np) > 0 && np != 0) { | |
fdsi <- get_friends2(sns$handle[i], page = np) | |
np <- next_cursor(fdsi) | |
fds[[i]] <- rbind(fds[[i]], fdsi) | |
} | |
## add congress ID to output data frame | |
if (NROW(fds[[i]]) > 0) { | |
fds[[i]]$bioguide <- sns$bioguide[i] | |
} | |
## print message | |
print_status(sns$handle[i], fds[[i]]) | |
} |
To calculate how long the code will take use the following:
if (grepl("read-write", rtweet:::api_access_level(get_token()))) {
h <- nrow(sns) / 2 / 60
} else {
h <- nrow(sns) / 60
}
cat("This will take a little over", round(h, 2), "hours\n")
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script is useful because it...