Created
October 6, 2021 17:56
-
-
Save wpetry/e3bcf2173aa5a081f5ba78f242155786 to your computer and use it in GitHub Desktop.
Download search results from a Web of Science web query
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#################################################- | |
## Download search results from a Web of Science web query ---- | |
## W.K. Petry | |
## | |
## example usage: | |
## get_wos_query(url = "https://www.webofscience.com/wos/woscc/summary/83c53a2b-5a39-4468-84f0-c9ffcfba5e91-01b947ba/relevance/1", profile = fprof) | |
#################################################- | |
## Define function to fetch WoS query hits ---- | |
#################################################- | |
get_wos_query <- function(url, browser = c("firefox", "chrome", "phantomjs"), | |
profile = NULL){ | |
require(RSelenium) | |
browser <- match.arg(browser) | |
rD <<- rsDriver(browser = browser, extraCapabilities = profile) | |
Sys.sleep(2) | |
remDr <- rD[["client"]] | |
# go to the query result page | |
remDr$navigate(url) | |
Sys.sleep(2) | |
# close the annoying popups | |
remDr$findElement(using = "xpath", '//*[@id="pendo-close-guide-8fdced48"]')$clickElement() | |
Sys.sleep(2) | |
remDr$findElement(using = "xpath", '//*[@id="pendo-button-e580fcec"]')$clickElement() | |
Sys.sleep(2.21) | |
remDr$findElement(using = "xpath", '//*[@id="pendo-button-506b4382"]')$clickElement() | |
Sys.sleep(1) | |
# find number of records to be exported | |
nrecs <- as.integer(sub(",", "", | |
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/div/app-input-route/app-base-summary-component/app-search-friendly-display/div[1]/app-general-search-friendly-display/h1/span')$getElementText()[[1]])) | |
for (i in as.character(seq(1, nrecs, by = 1000))) { | |
if(i != "1") remDr$navigate(url); Sys.sleep(3) | |
# set upper record number (limit is 1000 records) | |
j <- as.character(as.integer(i)+999L) | |
# begin export | |
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/div/app-input-route/app-base-summary-component/div/div[2]/app-page-controls[1]/div/app-export-option/div/app-export-menu/div/button/span[1]')$clickElement() | |
Sys.sleep(1) | |
# select output format as RIS | |
remDr$findElement(using = "xpath", '//*[@id="exportToRisButton"]')$clickElement() | |
# choose which records to export | |
Sys.sleep(1) | |
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/app-input-route/app-export-overlay/div/div[3]/div[2]/app-export-out-details/div/div[2]/div/fieldset/mat-radio-group/div[3]/mat-radio-button/label/span[1]/span[1]')$clickElement() | |
Sys.sleep(1) | |
# clear the default start/end records | |
remDr$findElement(using = "xpath", '//*[@id="mat-input-0"]')$clearElement() | |
Sys.sleep(1) | |
remDr$findElement(using = "xpath", '//*[@id="mat-input-1"]')$clearElement() | |
# set the starting record number | |
Sys.sleep(1) | |
remDr$findElement(using = "xpath", '//*[@id="mat-input-0"]')$sendKeysToElement(list(i)) | |
Sys.sleep(1) | |
remDr$findElement(using = "xpath", '//*[@id="mat-input-1"]')$sendKeysToElement(list(j)) | |
Sys.sleep(1) | |
# select 'Full Record' for export | |
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/app-input-route/app-export-overlay/div/div[3]/div[2]/app-export-out-details/div/div[2]/div/div[1]/wos-select/button/span[1]')$clickElement() | |
Sys.sleep(1) | |
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/app-input-route/app-export-overlay/div/div[3]/div[2]/app-export-out-details/div/div[2]/div/div[1]/wos-select/div/div/div[2]/div[3]/span')$clickElement() | |
Sys.sleep(1) | |
remDr$findElement(using = "xpath", '/html/body/app-wos/div/div/main/app-input-route/app-export-overlay/div/div[3]/div[2]/app-export-out-details/div/div[2]/div/div[2]/button[1]')$clickElement() | |
Sys.sleep(10) | |
} | |
rD$server$stop() | |
} | |
#################################################- | |
# set profile options to avoid download dialog -- | |
#################################################- | |
fprof <- RSelenium::makeFirefoxProfile(list(browser.download.dir = "~/Downloads", | |
browser.download.folderList = 2L, | |
browser.download.manager.showWhenStarting = FALSE, | |
browser.helperApps.neverAsk.saveToDisk = "application/json")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment