Skip to content

Instantly share code, notes, and snippets.

@timriffe
Created January 31, 2025 12:03
Show Gist options
  • Save timriffe/1edfc3b8fc381ebff5adcd9cf6f45d78 to your computer and use it in GitHub Desktop.
Save timriffe/1edfc3b8fc381ebff5adcd9cf6f45d78 to your computer and use it in GitHub Desktop.
downloading all cdc vital stats public use microdata files
library(tidyverse)
library(rvest)
library(httr)
options(timeout = 1e6)
cdc_url <- "https://www.cdc.gov/nchs/data_access/vitalstatsonline.htm"
pg <- read_html(cdc_url)
test <- html_attr(html_nodes(pg, "a"), "href")
pdfs <- test[grepl(test, pattern = ".pdf")]
zips <- test[grepl(test, pattern = ".zip")]
zips <- zips[!grepl(zips,pattern = "winzip")]
dir.create("zips")
# I manually opened 4 R sessions for downloading the zips
for (z in zips[2:99]){
filename <- z |> str_split(pattern = "/") %>% '[['(1) |> rev() %>% '[['(1)
destfile <- file.path("zips", filename)
download.file(url = z, destfile = destfile, timeout = 10000)
}
for (z in zips[100:199]){
filename <- z |> str_split(pattern = "/") %>% '[['(1) |> rev() %>% '[['(1)
destfile <- file.path("zips", filename)
download.file(url = z, destfile = destfile, timeout = 10000)
}
for (z in zips[200:299]){
filename <- z |> str_split(pattern = "/") %>% '[['(1) |> rev() %>% '[['(1)
destfile <- file.path("zips", filename)
download.file(url = z, destfile = destfile, timeout = 10000)
}
for (z in zips[300:354]){
filename <- z |> str_split(pattern = "/") %>% '[['(1) |> rev() %>% '[['(1)
destfile <- file.path("zips", filename)
download.file(url = z, destfile = destfile, timeout = 10000)
}
dir.create("pdfs")
i = 0
for (p in pdfs){
i <- i + 1
filename <- p |> str_split(pattern = "/") %>% '[['(1) |> rev() %>% '[['(1)
filename = paste0(i, filename)
destfile <- file.path("pdfs", filename)
download.file(url = p, destfile = destfile, timeout = 10000)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment