mkearney · February 7, 2019 21:21 · mkearney · Feb 7, 2019
diff --git a/tax-tweets-to-trump.R b/tax-tweets-to-trump.R
 ## install packages (from CRAN) if not already
 pkgs <- c("dplyr", "rtweet", "ggplot2", "syuzhet", "ggbeeswarm", "remotes")
 if (any(!pkgs %in% installed.packages())) {
  install.packages(pkgs[!pkgs %in% installed.packages()])
 }

 ## install {dataviz} theme from github
 remotes::install_github("mkearney/dataviz")

 ## define paste function I really like rn
 `%P%` <- function(lhs, rhs) paste0(lhs, rhs)

 ## load rtweet & ggplot2
 library(rtweet)
 library(ggplot2)

 ## tax-related [english] tweets sent to:trump
 yes_tax <- search_tweets(
  "to:realdonaldtrump (my taxes) OR (i tax return) OR (my tax return) " %P%
    "OR (our tax return) OR (i owe taxes) OR (my tax refund) lang:en",
  n = 5000,
  include_rts = FALSE
 )

 ## filter first tweet from each user
 yes_tax <- dplyr::filter(yes_tax, !duplicated(user_id, fromLast = TRUE))

 ## non-tax related [english] tweets sent to:trump
 not_tax <- search_tweets(
  "to:realdonaldtrump -tax -taxes lang:en",
  n = 5000,
  include_rts = FALSE
 )

 ## filter first tweet from each user
 not_tax <- dplyr::filter(not_tax, !duplicated(user_id, fromLast = TRUE))

 ## add topic variable
 yes_tax$topic <- "About Taxes"
 not_tax$topic <- "Not Taxes"

 ## randomly sample from 'not_tax' for equal N and then bind rows
 tt <- not_tax %>%
  dplyr::slice(sample(seq_len(nrow(not_tax)), nrow(yes_tax))) %>%
  list(yes_tax) %>%
  do.call(rbind, .)

 ## estimate sentiment
 tt$sent <- syuzhet::get_sentiment(tt$text, method = "afinn")

 ## generate density-like plot (switch axes for horizontal-looking layout)
 base_plot <- tt %>%
  ## sent scores are integers–so this adjusts values by topic to avoid overlap
  dplyr::mutate(sent = ifelse(
    topic == "Not Taxes", sent + .25, sent - .25
  )) %>%
  ggplot(aes(x = topic, y = sent, color = topic)) +
  ggbeeswarm::geom_beeswarm(alpha = .5) +
  scale_x_discrete(position = "top") +
  coord_flip()

 ## add labels and stylize plot
 fancy_plot <- base_plot +
  labs(x = NULL, y = "Sentiment",
    title = "Sentiment of tweets to @realDonaldTrump by topic",
    subtitle = "Estimates for tweets calculated using the 'afinn' " %P%
      "sentiment dictionary") +
  dataviz::theme_mwk(base_size = 16) +
  theme(legend.position = "none") +
  scale_color_manual(
    values = c('Not Taxes' = "#1133ee", 'About Taxes' = "#dd2222"))

 ## save plot image to dropbox
 fancy_plot + ggsave("~/Dropbox/sent-tax-tweets.png")

 ## compare means/standard deviations
 tt %>%
  dplyr::group_by(topic) %>%
  dplyr::summarise(n = dplyr::n(),
    sent_mean = mean(sent, trim = .05),
    sent_sd = sd(sent))
	## install packages (from CRAN) if not already
	pkgs <- c("dplyr", "rtweet", "ggplot2", "syuzhet", "ggbeeswarm", "remotes")
	if (any(!pkgs %in% installed.packages())) {
	install.packages(pkgs[!pkgs %in% installed.packages()])
	}

	## install {dataviz} theme from github
	remotes::install_github("mkearney/dataviz")

	## define paste function I really like rn
	`%P%` <- function(lhs, rhs) paste0(lhs, rhs)

	## load rtweet & ggplot2
	library(rtweet)
	library(ggplot2)

	## tax-related [english] tweets sent to:trump
	yes_tax <- search_tweets(
	"to:realdonaldtrump (my taxes) OR (i tax return) OR (my tax return) " %P%
	"OR (our tax return) OR (i owe taxes) OR (my tax refund) lang:en",
	n = 5000,
	include_rts = FALSE
	)

	## filter first tweet from each user
	yes_tax <- dplyr::filter(yes_tax, !duplicated(user_id, fromLast = TRUE))

	## non-tax related [english] tweets sent to:trump
	not_tax <- search_tweets(
	"to:realdonaldtrump -tax -taxes lang:en",
	n = 5000,
	include_rts = FALSE
	)

	## filter first tweet from each user
	not_tax <- dplyr::filter(not_tax, !duplicated(user_id, fromLast = TRUE))

	## add topic variable
	yes_tax$topic <- "About Taxes"
	not_tax$topic <- "Not Taxes"

	## randomly sample from 'not_tax' for equal N and then bind rows
	tt <- not_tax %>%
	dplyr::slice(sample(seq_len(nrow(not_tax)), nrow(yes_tax))) %>%
	list(yes_tax) %>%
	do.call(rbind, .)

	## estimate sentiment
	tt$sent <- syuzhet::get_sentiment(tt$text, method = "afinn")

	## generate density-like plot (switch axes for horizontal-looking layout)
	base_plot <- tt %>%
	## sent scores are integers–so this adjusts values by topic to avoid overlap
	dplyr::mutate(sent = ifelse(
	topic == "Not Taxes", sent + .25, sent - .25
	)) %>%
	ggplot(aes(x = topic, y = sent, color = topic)) +
	ggbeeswarm::geom_beeswarm(alpha = .5) +
	scale_x_discrete(position = "top") +
	coord_flip()

	## add labels and stylize plot
	fancy_plot <- base_plot +
	labs(x = NULL, y = "Sentiment",
	title = "Sentiment of tweets to @realDonaldTrump by topic",
	subtitle = "Estimates for tweets calculated using the 'afinn' " %P%
	"sentiment dictionary") +
	dataviz::theme_mwk(base_size = 16) +
	theme(legend.position = "none") +
	scale_color_manual(
	values = c('Not Taxes' = "#1133ee", 'About Taxes' = "#dd2222"))

	## save plot image to dropbox
	fancy_plot + ggsave("~/Dropbox/sent-tax-tweets.png")

	## compare means/standard deviations
	tt %>%
	dplyr::group_by(topic) %>%
	dplyr::summarise(n = dplyr::n(),
	sent_mean = mean(sent, trim = .05),
	sent_sd = sd(sent))