Last active
November 16, 2025 08:33
-
-
Save andrewheiss/32c824a542b3fdea064e61c39edada81 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(tidyverse) | |
| library(tidytext) | |
| library(schrute) | |
| library(rcartocolor) | |
| library(ggh4x) | |
| # Get all the words as single rows | |
| all_words <- schrute::theoffice |> | |
| mutate(season_cat = factor(season)) |> | |
| unnest_tokens(output = word, input = text) |> | |
| anti_join(stop_words, by = join_by(word)) | |
| # Join the AFINN sentiment values to the words | |
| words_afinn <- all_words |> | |
| inner_join(get_sentiments("afinn"), by = join_by(word)) | |
| # Make the manual facet grid | |
| # PHEW this is convoluted! The seasons have different numbers of episodes *and* | |
| # some episodes are doubled (like S3E10 and S3E11). To get the grid to look nice | |
| # with one row per season, we need to make a matrix that has a value for each | |
| # episode and NA for gaps. Like, if there were three short seasons with 2, 3, | |
| # and 5 episodes each (and one of the episodes in the first season was doubled), | |
| # the matrix would need to look like this: | |
| # | |
| # 1 1 2 NA NA | |
| # 3 4 5 NA NA | |
| # 6 7 8 9 10 | |
| # | |
| # Find all the seasons and episodes and make a sequential index | |
| episode_details <- theoffice |> | |
| distinct(season, episode) |> | |
| mutate(episode_number = 1:n()) | |
| # Find the number of episodes in each season | |
| season_lengths <- episode_details |> | |
| group_by(season) |> | |
| summarize(max_ep = max(episode)) | |
| # Make a new skeleton dataframe using the number of episodes in each season, | |
| # filling in gaps (so S3E10 and S3E11 are both overall episode 38) | |
| episode_details_doubled <- season_lengths |> | |
| rowwise() |> | |
| reframe( | |
| season = season, | |
| episode = 1:max_ep | |
| ) |> | |
| left_join(episode_details, by = c("season", "episode")) |> | |
| fill(episode_number, .direction = "down") | |
| # Build a matrix with rows for seasons and columns for episodes | |
| layout_matrix <- expand_grid( | |
| season = 1:nrow(season_lengths), | |
| episode = 1:max(season_lengths$max_ep) | |
| ) |> | |
| left_join( | |
| episode_details_doubled, | |
| by = c("season", "episode") | |
| ) |> | |
| arrange(season, episode) |> | |
| pull(episode_number) |> | |
| matrix( | |
| nrow = nrow(season_lengths), | |
| ncol = max(season_lengths$max_ep), | |
| byrow = TRUE | |
| ) | |
| # Finally plot this thing using that layout for facets | |
| words_afinn |> | |
| group_by(season_cat, episode, index = index %/% 30) |> | |
| summarize(avg_sentiment = mean(value)) |> | |
| mutate(season_episode = paste0( | |
| "S", season_cat, "E", str_pad(episode, width = 2, pad = "0") | |
| )) |> | |
| ggplot(aes(x = index, y = avg_sentiment, fill = avg_sentiment)) + | |
| geom_col() + | |
| scale_fill_carto_c( | |
| palette = "Temps", | |
| direction = -1, | |
| limits = c(-5, 5), | |
| guide = "none" | |
| ) + | |
| labs( | |
| x = NULL, | |
| y = NULL, | |
| title = "Average sentiment across all 9 seasons of The Office", | |
| subtitle = "Each bar represents the average sentiment over 30 lines of dialogue", | |
| caption = "Source: {schrute}" | |
| ) + | |
| facet_manual(vars(season_episode), design = layout_matrix, scales = "free_x") + | |
| coord_cartesian(ylim = c(-1.5, 2.5)) + | |
| theme_void(base_family = "Archivo Narrow") + | |
| theme( | |
| strip.text = element_text(size = 6, hjust = 0), | |
| panel.background = element_rect(fill = "grey97"), | |
| plot.title = element_text(face = "bold", margin = margin(b = 4)), | |
| plot.subtitle = element_text(size = rel(0.9), margin = margin(b = 8)), | |
| plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), units = "lines"), | |
| plot.caption = element_text(size = rel(0.7), hjust = 0) | |
| ) + | |
| ggview::canvas(width = 10, height = 5.5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment