Last active
October 29, 2020 02:36
-
-
Save elliottmorris/b1a368c0eee194764f484267e98efc1f to your computer and use it in GitHub Desktop.
Charts the poll-level trend in 2020 polls
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(lubridate) | |
library(pbapply) | |
url<- 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQ56fySJKLL18Lipu1_i3ID9JE06voJEz2EXm6JW4Vh11zmndyTwejMavuNntzIWLY0RyhA1UsVEen0/pub?gid=0&single=true&output=csv' | |
all_polls <- read_csv(url) | |
# remove any polls if biden or trump blank | |
all_polls <- all_polls %>% filter(!is.na(biden),!is.na(trump))#, include == "TRUE") | |
poll_shifts <- all_polls %>% | |
mutate(date = as_date(entry.date.time..et., format = '%m/%d/%Y %H:%M',tz='US/Eastern'), | |
number.of.observations = ifelse(number.of.observations > 1500, | |
1500,number.of.observations)) %>% | |
mutate(sample_size_weight = sqrt(number.of.observations / mean(number.of.observations))) %>% | |
arrange(date) %>% | |
group_by(state, pollster) %>% | |
mutate(shift_biden_margin = biden_margin - lag(biden_margin)) %>% | |
group_by(state) %>% | |
mutate(n_polls = n()) %>% | |
filter(n_polls > 10,!is.na(shift_biden_margin)) %>% | |
mutate(shift_biden_margin = ifelse(is.nan(shift_biden_margin),NA,shift_biden_margin)) | |
poll_shifts <- expand_grid(date = unique(poll_shifts$date), | |
state = unique(poll_shifts$state)) %>% | |
left_join(poll_shifts) | |
poll_shifts <- pblapply(unique(poll_shifts$state), | |
cl = 8, | |
function(x){ | |
print(x) | |
tmp_state <- poll_shifts %>% filter(state == x) | |
lapply(as_date(unique(tmp_state$date)), | |
function(y){ | |
tmp_date <- tmp_state %>% | |
filter(date <= y) %>% | |
group_by(date,state) %>% | |
summarise(shift_biden_margin = weighted.mean(shift_biden_margin, number.of.observations,na.rm=T), | |
sample_size_weight = sum(sample_size_weight,na.rm=T)) | |
sample_size_weight <- tmp_date %>% | |
mutate(shift_biden_margin = ifelse(is.nan(shift_biden_margin),NA,shift_biden_margin), | |
sample_size_weight = ifelse(sample_size_weight == 0,NA,sample_size_weight)) | |
average_shift <- weighted.mean(tmp_date$shift_biden_margin, tmp_date$sample_size_weight * | |
exp(-0.1 * as.numeric(difftime(y, tmp_date$date,units='days'))), | |
na.rm=T) | |
return( | |
tmp_date %>% | |
tail(1) %>% | |
mutate(average_shift = average_shift) | |
) | |
}) %>% | |
bind_rows %>% | |
return | |
}) %>% | |
bind_rows | |
state_order <- poll_shifts %>% | |
group_by(state) %>% | |
summarise(final_shift = last(average_shift)) %>% | |
arrange(desc(final_shift)) %>% pull(state) | |
poll_shifts <- poll_shifts %>% | |
mutate(state = factor(state,levels=state_order)) | |
ggplot(poll_shifts,aes(x=date, y=shift_biden_margin)) + | |
geom_hline(yintercept = 0,col='gray40') + | |
geom_point(shape=1,alpha=0.3,aes(size=sample_size_weight),show.legend = F) + | |
scale_size(range=c(0.5,3)) + | |
geom_smooth(aes(y=shift_biden_margin),span = 0.2,method='loess',se=F,col='black',size=0.5) + | |
theme_minimal() + | |
theme(panel.grid.minor = element_blank()) + | |
scale_y_continuous(breaks = seq(-20,20,5)) + | |
labs(x='Date',y='',title='Change in Biden margin since last poll', | |
subtitle="Each point is a poll, showing the change in Biden's margin since the last poll from that pollster") + | |
scale_x_date(limits=c(ymd("2020-09-01"),ymd("2020-11-03"))) | |
ggplot(poll_shifts,aes(x=date, y=shift_biden_margin)) + | |
geom_hline(yintercept = 0,col='gray40') + | |
geom_point(shape=1,alpha=0.3,aes(size=sample_size_weight),show.legend = F) + | |
scale_size(range=c(0.5,3)) + | |
geom_line(aes(y=average_shift,group=state)) + | |
#geom_smooth(aes(y=shift_biden_margin,group=state),span = 1,method='loess',se=F,col='black',size=0.5) + | |
theme_minimal() + | |
theme(panel.grid.minor = element_blank()) + | |
scale_y_continuous(breaks = seq(-20,20,5)) + | |
facet_wrap(~state) + | |
labs(x='Date',y='',title='Change in Biden margin since last poll', | |
subtitle="Each point is a poll, showing the change in Biden's margin since the last poll from that pollster") + | |
scale_x_date(limits=c(ymd("2020-09-01"),ymd("2020-11-03"))) | |
avg_last_week <- mean(poll_shifts %>% filter(date >= (Sys.Date()-7)) %>% pull(shift_biden_margin),na.rm=T) | |
poll_shifts %>% | |
filter(date >= (Sys.Date()-7)) %>% | |
ggplot(.,aes(x=shift_biden_margin)) + | |
geom_histogram(binwidth=0.5) + | |
#geom_density() + | |
geom_vline(xintercept = 0) + | |
geom_vline(xintercept = avg_last_week,col='red',linetype=2) + | |
theme_minimal() + | |
theme(panel.grid.minor = element_blank()) + | |
labs(x='Change in Biden margin since last poll', | |
y='Number of polls') + | |
scale_x_continuous(breaks = seq(-20,20,1),limits=c(-10,10)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment