trinker · July 8, 2020 19:11 · trinker · Jul 8, 2020 · trinker · Jul 8, 2020
diff --git a/quanteda_wordcloud.R b/quanteda_wordcloud.R
 ## Load dependencies
 library(quanteda)
 library(sentimentr)
 library(tidyverse)
 library(lexicon)
 ## Data set from sentimentr package
 dat <- presidential_debates_2012 
 dat

 corp <- corpus(dat, text_field = "dialogue")
 stopwords <- c(sw_fry_100, function_words, c('obama', 'because', 'romney', 'going', 'our', 'president'))

 # basic wordcloud for Romey
 dfmat1 <- dfm(
    corpus_subset(corp, person == "ROMNEY"), ## get subsets of the original data for Romney
    remove = stopwords,  ## remove overly common words
    remove_numbers = TRUE, 
    remove_punct = TRUE, 
    remove_url = TRUE, 
    remove_symbols = TRUE,
    stem = FALSE
 ) %>%
   dfm_trim(min_termfreq = 8)  #how frequently a temr must show up

 par(xpd=F)
 textplot_wordcloud(dfmat1, color = rev(RColorBrewer::brewer.pal(10, "RdBu")))
 title("Romeny", adj=1, line=1, font=2, col.main = 'orange')

 # basic wordcloud for Obama
 dfmat2 <- dfm(
    corpus_subset(corp, person == "OBAMA"), ## get subsets of the original data for Romney
    remove = stopwords,  ## remove overly common words
    remove_numbers = TRUE, 
    remove_punct = TRUE, 
    remove_url = TRUE, 
    remove_symbols = TRUE,
    stem = FALSE
 ) %>%
   dfm_trim(min_termfreq = 8)  #how frequently a temr must show up

 dev.new()
 par(xpd=F)
 textplot_wordcloud(dfmat2, color = rev(RColorBrewer::brewer.pal(10, "RdBu")))
 title("Obama", adj=1, line=1, font=2, col.main = 'orange')


 ## Now a little more complicated.  
 ## Loop through each person and time and plot out the words.
 ## Stem along the way

 subset_dat <- dat %>%
    filter(person %in% c('ROMNEY', 'OBAMA'))
 subs <- subset_dat %>%
    select(person, time) %>%
    distinct() %>%
    mutate(
        title = paste(time, person, sep = ': '),
        across(everything(), as.character)
    ) 
    
 corp2 <- corpus(subset_dat, text_field = "dialogue")
 for (i in seq_len(nrow(subs))) {

      
    # basic wordcloud for Obama
    dfmat <- dfm(
        corpus_subset(corp2, person == subs[['person']][i] & time == subs[['time']][i]), ## get subsets of the original data for Romney
        remove = stopwords,  ## remove overly common words
        remove_numbers = TRUE, 
        remove_punct = TRUE, 
        remove_url = TRUE, 
        remove_symbols = TRUE,
        stem = TRUE
    ) %>%
       dfm_trim(min_termfreq = 5)  #how frequently a term must show up

    dev.new()
    par(xpd=F)
    textplot_wordcloud(dfmat, color = rev(RColorBrewer::brewer.pal(10, "RdBu")))
    title(subs[['title']][i], adj=1, line=1, font=2, col.main = 'orange')
 }
	## Load dependencies
	library(quanteda)
	library(sentimentr)
	library(tidyverse)
	library(lexicon)
	## Data set from sentimentr package
	dat <- presidential_debates_2012
	dat

	corp <- corpus(dat, text_field = "dialogue")
	stopwords <- c(sw_fry_100, function_words, c('obama', 'because', 'romney', 'going', 'our', 'president'))

	# basic wordcloud for Romey
	dfmat1 <- dfm(
	corpus_subset(corp, person == "ROMNEY"), ## get subsets of the original data for Romney
	remove = stopwords, ## remove overly common words
	remove_numbers = TRUE,
	remove_punct = TRUE,
	remove_url = TRUE,
	remove_symbols = TRUE,
	stem = FALSE
	) %>%
	dfm_trim(min_termfreq = 8) #how frequently a temr must show up

	par(xpd=F)
	textplot_wordcloud(dfmat1, color = rev(RColorBrewer::brewer.pal(10, "RdBu")))
	title("Romeny", adj=1, line=1, font=2, col.main = 'orange')

	# basic wordcloud for Obama
	dfmat2 <- dfm(
	corpus_subset(corp, person == "OBAMA"), ## get subsets of the original data for Romney
	remove = stopwords, ## remove overly common words
	remove_numbers = TRUE,
	remove_punct = TRUE,
	remove_url = TRUE,
	remove_symbols = TRUE,
	stem = FALSE
	) %>%
	dfm_trim(min_termfreq = 8) #how frequently a temr must show up

	dev.new()
	par(xpd=F)
	textplot_wordcloud(dfmat2, color = rev(RColorBrewer::brewer.pal(10, "RdBu")))
	title("Obama", adj=1, line=1, font=2, col.main = 'orange')


	## Now a little more complicated.
	## Loop through each person and time and plot out the words.
	## Stem along the way

	subset_dat <- dat %>%
	filter(person %in% c('ROMNEY', 'OBAMA'))
	subs <- subset_dat %>%
	select(person, time) %>%
	distinct() %>%
	mutate(
	title = paste(time, person, sep = ': '),
	across(everything(), as.character)
	)

	corp2 <- corpus(subset_dat, text_field = "dialogue")
	for (i in seq_len(nrow(subs))) {


	# basic wordcloud for Obama
	dfmat <- dfm(
	corpus_subset(corp2, person == subs[['person']][i] & time == subs[['time']][i]), ## get subsets of the original data for Romney
	remove = stopwords, ## remove overly common words
	remove_numbers = TRUE,
	remove_punct = TRUE,
	remove_url = TRUE,
	remove_symbols = TRUE,
	stem = TRUE
	) %>%
	dfm_trim(min_termfreq = 5) #how frequently a term must show up

	dev.new()
	par(xpd=F)
	textplot_wordcloud(dfmat, color = rev(RColorBrewer::brewer.pal(10, "RdBu")))
	title(subs[['title']][i], adj=1, line=1, font=2, col.main = 'orange')
	}