malcolmbarrett · January 27, 2025 19:20
diff --git a/po_plot.R b/po_plot.R
 plot_data <- data_observed |> 
  select(starts_with("y"), exposure) |> 
  mutate(id = row_number()) |> 
  pivot_longer(
    starts_with("y"),
    names_prefix = "y_",
    names_to = "potential_outcome",
    values_to = "happiness"
  ) |> 
  mutate(
    observed = if_else(exposure == potential_outcome, "observed", "unobserved"),
    potential_outcome = paste0("potential outcome: y(", potential_outcome, ")"),
    exposure = if_else(exposure == "vanilla", "actually ate\nvanilla", "actually ate\nchocolate")
  )

 avg_labels <- plot_data |> 
  group_by(potential_outcome, exposure, observed) |> 
  summarize(
    happiness = mean(happiness),
    .groups = "drop"
  ) |> 
  filter(exposure == "actually ate\nvanilla") |> 
  mutate(
    exposure_lbl = str_replace_all(exposure, "actually ate\n", ""),
    po_lbl = str_replace_all(potential_outcome, "potential outcome: ", ""),
    label = glue::glue(
      "average {po_lbl} for the {exposure_lbl} group ({observed})"
    ) |> str_wrap(19)
  )

 id1 <- plot_data |>
  filter(id == 1) |> 
  mutate(label = glue::glue("potential outcome for ID 1 ({observed})") |> str_wrap(19))

 exchangeability_annotation <- tibble(
  x = 5.43, xend = 5.33, 
  y = 1, yend = 2,
  potential_outcome = "potential outcome: y(chocolate)",
  label = "For exchangeability to hold, we need these averages to be about the same" |> str_wrap(19)
 )

 plot_data |> 
  ggplot(aes(happiness, exposure, color = observed, shape = observed)) + 
  geom_dotplot(aes(fill = observed), alpha = .5, color = "grey50", stackratio = 1.3, dotsize = .8,
               show.legend = FALSE) + 
  stat_summary(fun = "mean", size = 2.75, geom = "point") + 
  stat_summary(
    fun = "mean",
    geom = "text",
    aes(label = round(after_stat(x), 1)),
    vjust = 1.8,
    show.legend = FALSE
  ) + 
  
  # Annotation for individual ID 1's observed potential outcome
  geom_curve(
    data = id1,
    aes(
      x = happiness - 1.5, xend = happiness - .4, y = 2.25, yend = 2.025
    ),
    curvature = 0.2,
    arrow = arrow(length = unit(0.02, "npc")),
    inherit.aes = FALSE,
    color = "grey60"
  ) +
  geom_label(
    data = id1,
    aes(
      label = label
    ),
    nudge_y = .4,
    nudge_x = -4,
    hjust = "left",
    color = "grey40",
    size = 3.000001, 
    label.size = NA
  ) +
  
  # Annotation for vanilla group's average potential outcome
  geom_curve(
    data = avg_labels,
    aes(
      x = happiness + 1, xend = happiness - .1, y = 2.3, yend = 2.05
    ),
    curvature = 0.5,
    arrow = arrow(length = unit(0.02, "npc")),
    inherit.aes = FALSE,
    color = "grey60"
  ) +
  geom_label(
    data = avg_labels,
    aes(
      label = label
    ),
    nudge_y = .3,
    nudge_x = 1.2,
    hjust = "left",
    color = "grey40",
    size = 3.000001, 
    label.size = NA
  ) +
  
  # Annotation for exchangeability between groups
  geom_curve(
    data = exchangeability_annotation,
    aes(x = x, xend = xend + .2, y = y + .05, yend = yend - .2),
    curvature = .1,
    arrow = arrow(length = unit(0.02, "npc"), ends = "both"),
    inherit.aes = FALSE,
    color = "grey60"
  ) +
  geom_label(
    data = exchangeability_annotation,
    aes(x = x + .05, y = 1.44, label = label),
    inherit.aes = FALSE,
    hjust = "left",
    nudge_x = 0.5,
    color = "grey40",
    size = 3.000001, 
    label.size = NA
  ) +
  
  facet_wrap(~potential_outcome) + 
  labs(
    y = "actual exposure", 
    color = NULL,
    shape = NULL,
    fill = NULL
  ) + 
  theme(
    panel.grid.major.y = element_blank(),
    panel.border = element_rect(color = "grey40", fill = NA, linewidth = 0.8),
    axis.title.y = element_blank()
  ) +
  coord_cartesian(clip = "off") + 
  scale_shape_manual(values = c(20, 21)) +
  scale_fill_manual(values = c(observed = "grey40", unobserved = "white")) +
  scale_x_continuous(breaks = seq(0, 12, by = 2.5), limits = c(NA, 12))
	plot_data <- data_observed \|>
	select(starts_with("y"), exposure) \|>
	mutate(id = row_number()) \|>
	pivot_longer(
	starts_with("y"),
	names_prefix = "y_",
	names_to = "potential_outcome",
	values_to = "happiness"
	) \|>
	mutate(
	observed = if_else(exposure == potential_outcome, "observed", "unobserved"),
	potential_outcome = paste0("potential outcome: y(", potential_outcome, ")"),
	exposure = if_else(exposure == "vanilla", "actually ate\nvanilla", "actually ate\nchocolate")
	)

	avg_labels <- plot_data \|>
	group_by(potential_outcome, exposure, observed) \|>
	summarize(
	happiness = mean(happiness),
	.groups = "drop"
	) \|>
	filter(exposure == "actually ate\nvanilla") \|>
	mutate(
	exposure_lbl = str_replace_all(exposure, "actually ate\n", ""),
	po_lbl = str_replace_all(potential_outcome, "potential outcome: ", ""),
	label = glue::glue(
	"average {po_lbl} for the {exposure_lbl} group ({observed})"
	) \|> str_wrap(19)
	)

	id1 <- plot_data \|>
	filter(id == 1) \|>
	mutate(label = glue::glue("potential outcome for ID 1 ({observed})") \|> str_wrap(19))

	exchangeability_annotation <- tibble(
	x = 5.43, xend = 5.33,
	y = 1, yend = 2,
	potential_outcome = "potential outcome: y(chocolate)",
	label = "For exchangeability to hold, we need these averages to be about the same" \|> str_wrap(19)
	)

	plot_data \|>
	ggplot(aes(happiness, exposure, color = observed, shape = observed)) +
	geom_dotplot(aes(fill = observed), alpha = .5, color = "grey50", stackratio = 1.3, dotsize = .8,
	show.legend = FALSE) +
	stat_summary(fun = "mean", size = 2.75, geom = "point") +
	stat_summary(
	fun = "mean",
	geom = "text",
	aes(label = round(after_stat(x), 1)),
	vjust = 1.8,
	show.legend = FALSE
	) +

	# Annotation for individual ID 1's observed potential outcome
	geom_curve(
	data = id1,
	aes(
	x = happiness - 1.5, xend = happiness - .4, y = 2.25, yend = 2.025
	),
	curvature = 0.2,
	arrow = arrow(length = unit(0.02, "npc")),
	inherit.aes = FALSE,
	color = "grey60"
	) +
	geom_label(
	data = id1,
	aes(
	label = label
	),
	nudge_y = .4,
	nudge_x = -4,
	hjust = "left",
	color = "grey40",
	size = 3.000001,
	label.size = NA
	) +

	# Annotation for vanilla group's average potential outcome
	geom_curve(
	data = avg_labels,
	aes(
	x = happiness + 1, xend = happiness - .1, y = 2.3, yend = 2.05
	),
	curvature = 0.5,
	arrow = arrow(length = unit(0.02, "npc")),
	inherit.aes = FALSE,
	color = "grey60"
	) +
	geom_label(
	data = avg_labels,
	aes(
	label = label
	),
	nudge_y = .3,
	nudge_x = 1.2,
	hjust = "left",
	color = "grey40",
	size = 3.000001,
	label.size = NA
	) +

	# Annotation for exchangeability between groups
	geom_curve(
	data = exchangeability_annotation,
	aes(x = x, xend = xend + .2, y = y + .05, yend = yend - .2),
	curvature = .1,
	arrow = arrow(length = unit(0.02, "npc"), ends = "both"),
	inherit.aes = FALSE,
	color = "grey60"
	) +
	geom_label(
	data = exchangeability_annotation,
	aes(x = x + .05, y = 1.44, label = label),
	inherit.aes = FALSE,
	hjust = "left",
	nudge_x = 0.5,
	color = "grey40",
	size = 3.000001,
	label.size = NA
	) +

	facet_wrap(~potential_outcome) +
	labs(
	y = "actual exposure",
	color = NULL,
	shape = NULL,
	fill = NULL
	) +
	theme(
	panel.grid.major.y = element_blank(),
	panel.border = element_rect(color = "grey40", fill = NA, linewidth = 0.8),
	axis.title.y = element_blank()
	) +
	coord_cartesian(clip = "off") +
	scale_shape_manual(values = c(20, 21)) +
	scale_fill_manual(values = c(observed = "grey40", unobserved = "white")) +
	scale_x_continuous(breaks = seq(0, 12, by = 2.5), limits = c(NA, 12))