Skip to content

Instantly share code, notes, and snippets.

@favstats
Created June 5, 2025 19:16
Show Gist options
  • Save favstats/ac37f6a7c881dddfa1c156bfb3e2dbdf to your computer and use it in GitHub Desktop.
Save favstats/ac37f6a7c881dddfa1c156bfb3e2dbdf to your computer and use it in GitHub Desktop.
#' @title Get Adaptive Delay Based on App Usage
#' @description Calculates the appropriate sleep delay based on Meta API usage percentages (calls, CPU time, total time).
#' Inspired by the [Wesleyan Media Project's utilities](https://github.com/Wesleyan-Media-Project/fb_ads_import/blob/main/race2022_utils.R).
#'
#' @param call_pct Numeric. Percentage of allowed call count used.
#' @param cpu_pct Numeric. Percentage of allowed CPU time used.
#' @param time_pct Numeric. Percentage of allowed total time used.
#'
#' @return A numeric value indicating the number of seconds to sleep.
#' @export
get_delay_value <- function(call_pct = 0, cpu_pct = 0, time_pct = 0) {
max_usage <- max(call_pct, cpu_pct, time_pct, na.rm = TRUE)
dplyr::case_when(
max_usage > 95 ~ 90,
max_usage > 90 ~ 30,
max_usage > 80 ~ 10,
max_usage > 75 ~ 5,
max_usage > 50 ~ 3,
TRUE ~ 1
)
}
#' @title Paginated Meta API Fetch with Usage-Aware Throttling
#' @description Fetches results from the Meta Ad Library API using pagination. It adapts delays based on Meta's API usage headers (`x-business-use-case-usage`) and shows progress.
#' Inspired by the [Wesleyan Media Project](https://github.com/Wesleyan-Media-Project/fb_ads_import/blob/main/race2022_utils.R).
#'
#' @param query Named list. Query parameters for the Meta Ad Library API.
#' @param token Character. Facebook Graph API access token.
#' @param max_pages Integer. Max number of pages to fetch (default: 50).
#' @param max_usage_limit Numeric (0–100). Threshold beyond which throttling applies (default: 50).
#' @param verbose Logical. If TRUE (default), outputs progress and usage stats.
#'
#' @return A tibble of combined results from all API pages.
#' @export
paginate_meta_api <- function(query, token, max_pages = 50, max_usage_limit = 50, verbose = F, api_health = F) {
# ---- Check dependency on radlibrary ----
if (!requireNamespace("Radlibrary", quietly = TRUE)) {
stop("The 'Radlibrary' package is required but not installed. Please install it to use this function.")
}
# ---- Prepare for iteration ----
all_data <- list()
next_url <- NULL
if (verbose) {
message("Starting Meta Ad Library API pagination...")
pb <- progress::progress_bar$new(
format = " Fetching [:bar] Page :current/:total (:percent) :message",
total = max_pages,
width = 60,
clear = FALSE,
show_after = 0
)
}
for (i in seq_len(max_pages)) {
if (verbose) pb$tick(tokens = list(message = ""))
# if (verbose) message(glue::glue("[Page {i}] Preparing request..."))
# ---- Initial or paginated request ----
result <- if (is.null(next_url)) {
query[["access_token"]] <- token
query_string <- purrr::imap_chr(query, ~ paste0(URLencode(.y), "=", URLencode(as.character(.x), reserved = TRUE))) %>%
paste(collapse = "&")
url <- paste0("https://graph.facebook.com/v20.0/ads_archive?", query_string)
# if (verbose) message(glue::glue("[Page {i}]"))
raw <- httr::GET(url)
parsed <- jsonlite::fromJSON(httr::content(raw, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)
structure(list(data = parsed$data, fields = query$fields), class = "adlib_data_response")
} else {
# if (verbose) message(glue::glue("[Page {i}] Continuing with next_url..."))
raw <- httr::GET(next_url)
parsed <- jsonlite::fromJSON(httr::content(raw, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE)
structure(list(data = parsed$data, fields = query$fields), class = "adlib_data_response")
}
# ---- Convert response to tibble ----
tib_data <- Radlibrary::as_tibble(result, censor_access_token = TRUE)
all_data[[i]] <- tib_data
# ---- Check throttling ----
ad_headers <- httr::headers(raw)
app_use <- ad_headers[["x-business-use-case-usage"]]
if (!is.null(app_use)) {
usage <- jsonlite::fromJSON(app_use)[[1]]
max_usage <- max(usage$call_count, usage$total_cputime, usage$total_time, na.rm = TRUE)
if (api_health) {
message(glue::glue("[Page {i}] Current API usage: calls={usage$call_count}%, cpu={usage$total_cputime}%, time={usage$total_time}%"))
}
if (max_usage > max_usage_limit) {
delay <- get_delay_value(usage$call_count, usage$total_cputime, usage$total_time)
if (verbose) message(glue::glue("[Page {i}] Sleeping for {delay}s due to high usage..."))
Sys.sleep(delay)
}
}
# ---- Pagination handling ----
next_url <- parsed$paging$`next`
if (is.null(next_url)) {
if (verbose) message(glue::glue("[Page {i}] No further pages. Stopping."))
break
}
}
if (verbose) message("All pages retrieved. Binding results...")
dplyr::bind_rows(all_data)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment