Last active
February 7, 2019 21:21
-
-
Save mkearney/496f59da53ded2778c8ec50efef39ae9 to your computer and use it in GitHub Desktop.
Sentiment of tax-related tweets sent to:realDonaldTrump
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## install packages (from CRAN) if not already | |
pkgs <- c("dplyr", "rtweet", "ggplot2", "syuzhet", "ggbeeswarm", "remotes") | |
if (any(!pkgs %in% installed.packages())) { | |
install.packages(pkgs[!pkgs %in% installed.packages()]) | |
} | |
## install {dataviz} theme from github | |
remotes::install_github("mkearney/dataviz") | |
## define paste function I really like rn | |
`%P%` <- function(lhs, rhs) paste0(lhs, rhs) | |
## load rtweet & ggplot2 | |
library(rtweet) | |
library(ggplot2) | |
## tax-related [english] tweets sent to:trump | |
yes_tax <- search_tweets( | |
"to:realdonaldtrump (my taxes) OR (i tax return) OR (my tax return) " %P% | |
"OR (our tax return) OR (i owe taxes) OR (my tax refund) lang:en", | |
n = 5000, | |
include_rts = FALSE | |
) | |
## filter first tweet from each user | |
yes_tax <- dplyr::filter(yes_tax, !duplicated(user_id, fromLast = TRUE)) | |
## non-tax related [english] tweets sent to:trump | |
not_tax <- search_tweets( | |
"to:realdonaldtrump -tax -taxes lang:en", | |
n = 5000, | |
include_rts = FALSE | |
) | |
## filter first tweet from each user | |
not_tax <- dplyr::filter(not_tax, !duplicated(user_id, fromLast = TRUE)) | |
## add topic variable | |
yes_tax$topic <- "About Taxes" | |
not_tax$topic <- "Not Taxes" | |
## randomly sample from 'not_tax' for equal N and then bind rows | |
tt <- not_tax %>% | |
dplyr::slice(sample(seq_len(nrow(not_tax)), nrow(yes_tax))) %>% | |
list(yes_tax) %>% | |
do.call(rbind, .) | |
## estimate sentiment | |
tt$sent <- syuzhet::get_sentiment(tt$text, method = "afinn") | |
## generate density-like plot (switch axes for horizontal-looking layout) | |
base_plot <- tt %>% | |
## sent scores are integers–so this adjusts values by topic to avoid overlap | |
dplyr::mutate(sent = ifelse( | |
topic == "Not Taxes", sent + .25, sent - .25 | |
)) %>% | |
ggplot(aes(x = topic, y = sent, color = topic)) + | |
ggbeeswarm::geom_beeswarm(alpha = .5) + | |
scale_x_discrete(position = "top") + | |
coord_flip() | |
## add labels and stylize plot | |
fancy_plot <- base_plot + | |
labs(x = NULL, y = "Sentiment", | |
title = "Sentiment of tweets to @realDonaldTrump by topic", | |
subtitle = "Estimates for tweets calculated using the 'afinn' " %P% | |
"sentiment dictionary") + | |
dataviz::theme_mwk(base_size = 16) + | |
theme(legend.position = "none") + | |
scale_color_manual( | |
values = c('Not Taxes' = "#1133ee", 'About Taxes' = "#dd2222")) | |
## save plot image to dropbox | |
fancy_plot + ggsave("~/Dropbox/sent-tax-tweets.png") | |
## compare means/standard deviations | |
tt %>% | |
dplyr::group_by(topic) %>% | |
dplyr::summarise(n = dplyr::n(), | |
sent_mean = mean(sent, trim = .05), | |
sent_sd = sd(sent)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
When I ran this code on Feb 7th, 2019, the plot output looks like this: