benzipperer · December 13, 2023 14:51
diff --git a/weighted_percentiles.R b/weighted_percentiles.R
 library(tidyverse)
 library(MetricsWeighted)

 # here's how to calculate multiple weighted percentiles by year 
 # and reshape them so data is long in year but wide in percentiles
 # below I explain this step by step

 # first grab some data
 cps_data <- epiextractr::load_org(1979:2022, year, orgwgt, wage) %>% 
  filter(wage > 0)

 # provide a vector of percentiles of interest
 p <- c(10, 50, 90)

 # calculate percentiles and reshape data 
 cps_data %>% 
  reframe(
    percentile = p, 
    value = weighted_quantile(wage, w = orgwgt, probs = p / 100),
    .by = year
  ) %>% 
  pivot_wider(id_cols = year, names_from = percentile, values_from = value)
  

 #############################################  
 ### INCREMENTAL STEP BY STEP INSTRUCTIONS ###
 #############################################
 # simple example: calculate the (weighted) median wage in 2022
 cps_data %>% 
  filter(year == 2022) %>% 
  summarize(p50 = weighted_median(wage, w = orgwgt))

 # use weighted_quantile to calculate a different percentile, like 10th percentile
 # note that the probs argument to weighted_quantile is between 0 and 1
 cps_data %>% 
  filter(year == 2022) %>% 
  summarize(p10 = weighted_quantile(wage, w = orgwgt, probs = 0.10))

 # to calculate multiple percentiles, provide a vector of percentiles 
 # and also switch from summarize() to reframe()
 # reframe() allows for multiple rows of results, as opposed to a single summary row
 # note that the probs argument to weighted_quantile is still between 0 and 1
 p <- c(10, 50, 90)

 cps_data %>% 
  filter(year == 2022) %>% 
  reframe(
    percentile = p, 
    value = weighted_quantile(wage, w = orgwgt, probs = p / 100)
  )

 # now we can use the .by argument of reframe to do this by year 
 cps_data %>% 
  reframe(
    percentile = p, 
    value = weighted_quantile(wage, w = orgwgt, probs = p / 100),
    .by = year
  ) 

 # finally reshape the data so that it is long in years and wide in percentiles 
 cps_data %>% 
  reframe(
    percentile = p, 
    value = weighted_quantile(wage, w = orgwgt, probs = p / 100),
    .by = year
  ) %>% 
  pivot_wider(id_cols = year, names_from = percentile, values_from = value)

 # extra credit, add "th" to the percentile names
 cps_data %>% 
  reframe(
    percentile = paste0(p, "th"), 
    value = weighted_quantile(wage, w = orgwgt, probs = p / 100),
    .by = year
  ) %>% 
  pivot_wider(id_cols = year, names_from = percentile, values_from = value)
	library(tidyverse)
	library(MetricsWeighted)

	# here's how to calculate multiple weighted percentiles by year
	# and reshape them so data is long in year but wide in percentiles
	# below I explain this step by step

	# first grab some data
	cps_data <- epiextractr::load_org(1979:2022, year, orgwgt, wage) %>%
	filter(wage > 0)

	# provide a vector of percentiles of interest
	p <- c(10, 50, 90)

	# calculate percentiles and reshape data
	cps_data %>%
	reframe(
	percentile = p,
	value = weighted_quantile(wage, w = orgwgt, probs = p / 100),
	.by = year
	) %>%
	pivot_wider(id_cols = year, names_from = percentile, values_from = value)


	#############################################
	### INCREMENTAL STEP BY STEP INSTRUCTIONS ###
	#############################################
	# simple example: calculate the (weighted) median wage in 2022
	cps_data %>%
	filter(year == 2022) %>%
	summarize(p50 = weighted_median(wage, w = orgwgt))

	# use weighted_quantile to calculate a different percentile, like 10th percentile
	# note that the probs argument to weighted_quantile is between 0 and 1
	cps_data %>%
	filter(year == 2022) %>%
	summarize(p10 = weighted_quantile(wage, w = orgwgt, probs = 0.10))

	# to calculate multiple percentiles, provide a vector of percentiles
	# and also switch from summarize() to reframe()
	# reframe() allows for multiple rows of results, as opposed to a single summary row
	# note that the probs argument to weighted_quantile is still between 0 and 1
	p <- c(10, 50, 90)

	cps_data %>%
	filter(year == 2022) %>%
	reframe(
	percentile = p,
	value = weighted_quantile(wage, w = orgwgt, probs = p / 100)
	)

	# now we can use the .by argument of reframe to do this by year
	cps_data %>%
	reframe(
	percentile = p,
	value = weighted_quantile(wage, w = orgwgt, probs = p / 100),
	.by = year
	)

	# finally reshape the data so that it is long in years and wide in percentiles
	cps_data %>%
	reframe(
	percentile = p,
	value = weighted_quantile(wage, w = orgwgt, probs = p / 100),
	.by = year
	) %>%
	pivot_wider(id_cols = year, names_from = percentile, values_from = value)

	# extra credit, add "th" to the percentile names
	cps_data %>%
	reframe(
	percentile = paste0(p, "th"),
	value = weighted_quantile(wage, w = orgwgt, probs = p / 100),
	.by = year
	) %>%
	pivot_wider(id_cols = year, names_from = percentile, values_from = value)