Created
June 5, 2025 19:16
-
-
Save favstats/ac37f6a7c881dddfa1c156bfb3e2dbdf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' @title Get Adaptive Delay Based on App Usage | |
#' @description Calculates the appropriate sleep delay based on Meta API usage percentages (calls, CPU time, total time). | |
#' Inspired by the [Wesleyan Media Project's utilities](https://github.com/Wesleyan-Media-Project/fb_ads_import/blob/main/race2022_utils.R). | |
#' | |
#' @param call_pct Numeric. Percentage of allowed call count used. | |
#' @param cpu_pct Numeric. Percentage of allowed CPU time used. | |
#' @param time_pct Numeric. Percentage of allowed total time used. | |
#' | |
#' @return A numeric value indicating the number of seconds to sleep. | |
#' @export | |
get_delay_value <- function(call_pct = 0, cpu_pct = 0, time_pct = 0) { | |
max_usage <- max(call_pct, cpu_pct, time_pct, na.rm = TRUE) | |
dplyr::case_when( | |
max_usage > 95 ~ 90, | |
max_usage > 90 ~ 30, | |
max_usage > 80 ~ 10, | |
max_usage > 75 ~ 5, | |
max_usage > 50 ~ 3, | |
TRUE ~ 1 | |
) | |
} | |
#' @title Paginated Meta API Fetch with Usage-Aware Throttling | |
#' @description Fetches results from the Meta Ad Library API using pagination. It adapts delays based on Meta's API usage headers (`x-business-use-case-usage`) and shows progress. | |
#' Inspired by the [Wesleyan Media Project](https://github.com/Wesleyan-Media-Project/fb_ads_import/blob/main/race2022_utils.R). | |
#' | |
#' @param query Named list. Query parameters for the Meta Ad Library API. | |
#' @param token Character. Facebook Graph API access token. | |
#' @param max_pages Integer. Max number of pages to fetch (default: 50). | |
#' @param max_usage_limit Numeric (0–100). Threshold beyond which throttling applies (default: 50). | |
#' @param verbose Logical. If TRUE (default), outputs progress and usage stats. | |
#' | |
#' @return A tibble of combined results from all API pages. | |
#' @export | |
paginate_meta_api <- function(query, token, max_pages = 50, max_usage_limit = 50, verbose = F, api_health = F) { | |
# ---- Check dependency on radlibrary ---- | |
if (!requireNamespace("Radlibrary", quietly = TRUE)) { | |
stop("The 'Radlibrary' package is required but not installed. Please install it to use this function.") | |
} | |
# ---- Prepare for iteration ---- | |
all_data <- list() | |
next_url <- NULL | |
if (verbose) { | |
message("Starting Meta Ad Library API pagination...") | |
pb <- progress::progress_bar$new( | |
format = " Fetching [:bar] Page :current/:total (:percent) :message", | |
total = max_pages, | |
width = 60, | |
clear = FALSE, | |
show_after = 0 | |
) | |
} | |
for (i in seq_len(max_pages)) { | |
if (verbose) pb$tick(tokens = list(message = "")) | |
# if (verbose) message(glue::glue("[Page {i}] Preparing request...")) | |
# ---- Initial or paginated request ---- | |
result <- if (is.null(next_url)) { | |
query[["access_token"]] <- token | |
query_string <- purrr::imap_chr(query, ~ paste0(URLencode(.y), "=", URLencode(as.character(.x), reserved = TRUE))) %>% | |
paste(collapse = "&") | |
url <- paste0("https://graph.facebook.com/v20.0/ads_archive?", query_string) | |
# if (verbose) message(glue::glue("[Page {i}]")) | |
raw <- httr::GET(url) | |
parsed <- jsonlite::fromJSON(httr::content(raw, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE) | |
structure(list(data = parsed$data, fields = query$fields), class = "adlib_data_response") | |
} else { | |
# if (verbose) message(glue::glue("[Page {i}] Continuing with next_url...")) | |
raw <- httr::GET(next_url) | |
parsed <- jsonlite::fromJSON(httr::content(raw, as = "text", encoding = "UTF-8"), simplifyDataFrame = FALSE) | |
structure(list(data = parsed$data, fields = query$fields), class = "adlib_data_response") | |
} | |
# ---- Convert response to tibble ---- | |
tib_data <- Radlibrary::as_tibble(result, censor_access_token = TRUE) | |
all_data[[i]] <- tib_data | |
# ---- Check throttling ---- | |
ad_headers <- httr::headers(raw) | |
app_use <- ad_headers[["x-business-use-case-usage"]] | |
if (!is.null(app_use)) { | |
usage <- jsonlite::fromJSON(app_use)[[1]] | |
max_usage <- max(usage$call_count, usage$total_cputime, usage$total_time, na.rm = TRUE) | |
if (api_health) { | |
message(glue::glue("[Page {i}] Current API usage: calls={usage$call_count}%, cpu={usage$total_cputime}%, time={usage$total_time}%")) | |
} | |
if (max_usage > max_usage_limit) { | |
delay <- get_delay_value(usage$call_count, usage$total_cputime, usage$total_time) | |
if (verbose) message(glue::glue("[Page {i}] Sleeping for {delay}s due to high usage...")) | |
Sys.sleep(delay) | |
} | |
} | |
# ---- Pagination handling ---- | |
next_url <- parsed$paging$`next` | |
if (is.null(next_url)) { | |
if (verbose) message(glue::glue("[Page {i}] No further pages. Stopping.")) | |
break | |
} | |
} | |
if (verbose) message("All pages retrieved. Binding results...") | |
dplyr::bind_rows(all_data) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment