Skip to content

Instantly share code, notes, and snippets.

@oousmane
Last active February 14, 2026 23:28
Show Gist options
  • Select an option

  • Save oousmane/a78eb6947215f7e3134de4d4dbc61376 to your computer and use it in GitHub Desktop.

Select an option

Save oousmane/a78eb6947215f7e3134de4d4dbc61376 to your computer and use it in GitHub Desktop.
List the files in a FTP (remote) directory/folder
#' List files on an FTP server
#'
#' Connects to a remote FTP directory and returns the list of file names,
#' optionally filtered by a regular expression pattern. This function is
#' the discovery companion to [ftp_download()]: use it to identify which
#' files exist before deciding what to download.
#'
#' @param url A full FTP or FTPS URL pointing to a directory, e.g.
#' `"ftp://data.example.org/climate/daily/"`. Must start with `ftp://`
#' or `ftps://`.
#' @param pattern An optional regular expression passed to
#' [stringr::str_detect()] to filter file names. `NULL` (default) returns
#' all files.
#' @param use_epsv Logical. Whether to use Extended Passive mode (EPSV).
#' Default is `TRUE`. Set to `FALSE` if the server is behind a firewall
#' or NAT that does not support EPSV.
#'
#' @return A character vector of file names found in the remote directory.
#' Returns an empty character vector if the directory is empty or no
#' files match `pattern`.
#'
#' @seealso [ftp_download()] to download files once their URLs are known.
#'
#' @importFrom curl new_handle handle_setopt curl
#' @importFrom stringr str_detect
#'
#' @examples
#' \dontrun{
#' base_url <- "ftp://data.example.org/climate/daily/"
#'
#' # List all files
#' ftp_list_files(base_url)
#'
#' # List only CSV files
#' ftp_list_files(base_url, pattern = "\\.csv$")
#'
#' # Compose with ftp_download()
#' ftp_list_files(base_url, pattern = "2024") |>
#' paste0(base_url, x = _) |>
#' ftp_download(destdir = "data/raw")
#' }
#'
#' @export
ftp_list_files <- function(url, pattern = NULL, use_epsv = TRUE) {
if (!grepl("^ftps?://", url))
stop("`url` must be an FTP or FTPS URL (starting with ftp:// or ftps://).")
handle <- curl::new_handle()
curl::handle_setopt(handle,
ftp_use_epsv = use_epsv,
dirlistonly = TRUE
)
con <- tryCatch(
curl::curl(url = url, "r", handle = handle),
error = function(e) stop("Failed to connect to FTP server: ", conditionMessage(e))
)
on.exit(close(con), add = TRUE)
files <- readLines(con)
if (!is.null(pattern)) files <- files[stringr::str_detect(files, pattern)]
files
}
#' Download files from an FTP server
#'
#' Downloads a vector of FTP URLs to a local directory. Each URL must point
#' to an individual file; directory URLs are not supported. Non-existent
#' remote files produce a warning and are skipped, leaving the rest of the
#' batch unaffected.
#'
#' @param urls A character vector of full FTP or FTPS file URLs. All elements
#' must start with `ftp://` or `ftps://`.
#' @param destdir Path to an existing local directory where files will be
#' saved. File names are derived from the last component of each URL via
#' [base::basename()].
#' @param overwrite Logical. If `FALSE` (default), files that already exist
#' in `destdir` are skipped. Set to `TRUE` to overwrite them.
#' @param use_epsv Logical. Whether to use Extended Passive mode (EPSV).
#' Default is `TRUE`. Set to `FALSE` if the server is behind a firewall
#' or NAT that does not support EPSV.
#' @param verbose Logical. If `TRUE`, prints a message for each file as it
#' is downloaded. Default is `FALSE`.
#'
#' @return Invisibly returns the character vector of intended local file
#' paths (including skipped files). Use [base::file.exists()] on the
#' result to verify which downloads succeeded.
#'
#' @note This function does not perform directory listing. Use
#' [ftp_list_files()] first to discover available files, then pass the
#' constructed URLs to this function. This separation ensures you only
#' attempt to download files you know exist.
#'
#' @seealso [ftp_list_files()] to list available files before downloading.
#'
#' @importFrom curl new_handle handle_setopt curl_download
#'
#' @examples
#' \dontrun{
#' base_url <- "ftp://data.example.org/climate/daily/"
#'
#' # Download known files directly
#' ftp_download(
#' urls = c("ftp://data.example.org/climate/daily/2024.csv",
#' "ftp://data.example.org/climate/daily/2023.csv"),
#' destdir = "data/raw"
#' )
#'
#' # Discover then download in one pipeline
#' ftp_list_files(base_url, pattern = "\\.csv$") |>
#' paste0(base_url, x = _) |>
#' ftp_download(destdir = "data/raw", verbose = TRUE)
#'
#' # Check what was actually downloaded
#' paths <- ftp_download(
#' urls = paste0(base_url, c("2024.csv", "2023.csv")),
#' destdir = "data/raw"
#' )
#' file.exists(paths)
#' }
#'
#' @export
ftp_download <- function(
urls,
destdir,
overwrite = FALSE,
use_epsv = TRUE,
verbose = FALSE
) {
if (!all(grepl("^ftps?://", urls)))
stop("All `urls` must be FTP or FTPS URLs (starting with ftp:// or ftps://).")
if (!dir.exists(destdir))
stop("`destdir` does not exist: ", destdir)
local_paths <- file.path(destdir, basename(urls))
skip <- !overwrite & file.exists(local_paths)
if (any(skip)) {
message(sum(skip), " file(s) skipped (already exist). Use `overwrite = TRUE` to replace.")
urls <- urls[!skip]
local_paths <- local_paths[!skip]
}
if (length(urls) == 0) return(invisible(local_paths))
handle <- curl::new_handle()
curl::handle_setopt(handle, ftp_use_epsv = use_epsv)
Map(function(src, dst) {
if (verbose) message("Downloading: ", basename(src))
tryCatch(
curl::curl_download(url = src, destfile = dst, handle = handle),
error = function(e)
warning("Failed to download ", basename(src), ": ", conditionMessage(e),
call. = FALSE)
)
}, urls, local_paths)
invisible(local_paths)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment