Created
November 18, 2016 03:36
-
-
Save rinze/781426c433fa1cbbb3b79b7492187d68 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
get_first_digit <- function(x) { | |
return(substr(x, 1, 1)) | |
} | |
votes <- read_csv("https://github.com/Prooffreader/election_2016_data/raw/master/data/presidential_general_election_2016_by_county.csv") | |
# Not interested in 0 votes | |
votes <- votes[votes$votes != 0, ] | |
votes$first_digit <- sapply(votes$votes, get_first_digit) | |
votes_dist <- votes %>% | |
group_by(name, first_digit) %>% | |
summarise(n = n()) | |
votes_candidate <- votes_dist %>% | |
group_by(name) %>% | |
summarise(n_counties = sum(n)) | |
votes_dist <- left_join(votes_dist, votes_candidate, by = "name") | |
votes_dist$observed <- votes_dist$n / votes_dist$n_counties | |
# Expected probs | |
benford <- log10(1 + (1 / 1:9)) | |
# Get only 4 main candidates | |
votes_dist <- votes_dist[votes_dist$n_counties > 2500, ] | |
votes_dist$expected <- benford | |
plt1 <- ggplot(votes_dist) + | |
geom_line(aes(x = first_digit, y = expected), group = 1) + | |
geom_point(aes(x = first_digit, y = observed)) + | |
facet_wrap(~ name) + | |
ggtitle("Bendford's law applied to USA 2016 Elections by county") + | |
ylab("Observed ratio") + xlab("First digit") | |
plot(plt1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment