Skip to content

Instantly share code, notes, and snippets.

@timriffe
Last active March 24, 2025 18:03
Show Gist options
  • Save timriffe/f820143e939f9262b3083122e1ce49ae to your computer and use it in GitHub Desktop.
Save timriffe/f820143e939f9262b3083122e1ce49ae to your computer and use it in GitHub Desktop.
Directly estimate second order citations of a paper using semantic scholar API (just functions, for source()ing)
# get the semantic scholar method
library(httr)
library(jsonlite)
library(dplyr)
get_citing_papers <- function(doi, page_size = 100, max_results = 10000) {
base_url <- paste0("https://api.semanticscholar.org/graph/v1/paper/DOI:",
URLencode(doi, reserved = TRUE),
"/citations")
fields <- "citingPaper.paperId,citingPaper.title,citingPaper.citationCount"
offset <- 0
total_fetched <- 0
done <- FALSE
while (!done) {
success <- FALSE
# how many times have we tried?
attempts <- 0
max_attempts <- 5
while (!success && attempts < max_attempts) {
# grab response from API
response <- GET(base_url,
query = list(fields = fields,
limit = page_size,
offset = offset))
if (status_code(response) == 200) {
# If it was successful then grab the given df (max 100 rows)
success <- TRUE
data <- fromJSON(content(response, "text", encoding = "UTF-8"))
citing_papers <- data$data$citingPaper
if (nrow(citing_data) == 0) {
done <- TRUE
break
}
# row-bind data.
total_fetched <- total_fetched + nrow(citing_data)
if (total_fetched <= page_size){
all_citing_papers <- citing_papers
} else {
all_citing_papers <- bind_rows(all_citing_papers, citing_papers)
}
message("Total fetched so far: ", nrow(all_citing_papers))
if (nrow(citing_papers) < page_size || nrow(all_citing_papers) >= max_results) {
done <- TRUE
}
offset <- offset + nrow(citing_data)
# Forced delay between successful calls, be polite
Sys.sleep(5)
} else if (status_code(response) == 429) {
wait_time <- 15 + attempts * 5 # Increase wait with each failed grab
message("Rate limit. Now we wait ", wait_time, " seconds before re-trying")
Sys.sleep(wait_time)
attempts <- attempts + 1
} else {
stop("Request failed with status: ", status_code(response))
}
}
if (!success) {
warning("Failed after multiple retries.")
break
}
}
return(all_citing_papers)
}
get_second_order_citations <- function(doi, max_results = 10000) {
# no need to give page_size, API has 100 limit
citing_papers <- get_citing_papers(doi, max_results = max_results)
total_second_order_citations <- sum(citing_papers$citationCount, na.rm = TRUE)
return(list(
citing_papers = citing_papers,
second_order_citations = total_second_order_citations
))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment