Skip to content

Instantly share code, notes, and snippets.

@timriffe
Created March 20, 2025 08:32
Show Gist options
  • Save timriffe/8d2e2d58ec0fea6ce1b0a8de01ed739d to your computer and use it in GitHub Desktop.
Save timriffe/8d2e2d58ec0fea6ce1b0a8de01ed739d to your computer and use it in GitHub Desktop.
compiles publications & citation list for OPIK output for a given year range; requires some metadata in Drive
library(scholar)
library(googlesheets4)
library(tidyverse)
gs4_auth(email = "[email protected]")
authors <- read_sheet(ss = "https://docs.google.com/spreadsheets/d/18RWKaMvkUOGp_URLwRu-3jTLUz2iI4y3oN_hu8MLoRg/edit#gid=0")
authors
pubsi <- list()
for (i in 1:nrow(authors)){
pubsi[[i]] <- get_publications(id = authors$scholar_id[i],
sortby = "year",
flush = TRUE) %>%
dplyr::filter(between(year, authors$desde[i], authors$hasta[i])) %>%
mutate(author_id = authors$scholar_id[i])
Sys.sleep(1)
}
pubsi[[1]] %>% View()
db <-
pubsi %>%
bind_rows() %>%
mutate(keep = !duplicated(pubid),
keep = if_else(journal %in% c("","Figshare"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "MPIDR"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "Conference"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "MPIDR"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "PAA"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "demogr.mpg.de"), FALSE, keep),
keep = if_else(grepl(tolower(journal), pattern = "rxiv"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "www.mortality.org"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "https://rdrr."), FALSE, keep),
keep = if_else(grepl(journal, pattern = "ISA Forum"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "project-WP"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "OPIK Working"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "OPIK-Working"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "Congreso"), FALSE, keep),
keep = if_else(grepl(tolower(journal), pattern = "documento de trabajo"), FALSE, keep),
keep = if_else(grepl(tolower(journal), pattern = "the conversation"), FALSE, keep),
keep = if_else(grepl(journal, pattern = "Population Association of"), FALSE, keep)) %>%
dplyr::filter(keep) %>%
arrange(year) %>%
mutate(journal = case_when(grepl(tolower(journal),
pattern = "international journal of integrated care")~
"International Journal of Integrated Care",
grepl(tolower(journal),
pattern = "plos one") ~ "PLOS ONE",
(journal %>%
stringi::stri_trans_general(id = "Latin-ASCII") %>%
tolower()) %in%
c("rev esp salud publica",
"revista espanola de salud publica") ~ "Revista Española de Salud Pública",
TRUE ~ journal),
journal = str_to_title(journal))
db %>%
select(-author_id,
-keep) %>%
write_sheet(ss = "https://docs.google.com/spreadsheets/d/1nR_v_DH-tV-xvcT246UqnC3TJAZBjLDhwpaI8m9Q6B4/edit#gid=0",
sheet = "resultados")
# from here, many duplicated discovered, which need to be merged in Google Scholar.
db$journal %>% unique()
# start on journal impact statistics.
# SCIMAGO
# devtools::install_github("ikashnitsky/sjrdata")
# library(sjrdata)
# sjr_journals %>%
# dplyr::filter(title %in% (db$journal %>% unique()) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment