Last active
February 14, 2026 23:28
-
-
Save oousmane/a78eb6947215f7e3134de4d4dbc61376 to your computer and use it in GitHub Desktop.
List the files in a FTP (remote) directory/folder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #' List files on an FTP server | |
| #' | |
| #' Connects to a remote FTP directory and returns the list of file names, | |
| #' optionally filtered by a regular expression pattern. This function is | |
| #' the discovery companion to [ftp_download()]: use it to identify which | |
| #' files exist before deciding what to download. | |
| #' | |
| #' @param url A full FTP or FTPS URL pointing to a directory, e.g. | |
| #' `"ftp://data.example.org/climate/daily/"`. Must start with `ftp://` | |
| #' or `ftps://`. | |
| #' @param pattern An optional regular expression passed to | |
| #' [stringr::str_detect()] to filter file names. `NULL` (default) returns | |
| #' all files. | |
| #' @param use_epsv Logical. Whether to use Extended Passive mode (EPSV). | |
| #' Default is `TRUE`. Set to `FALSE` if the server is behind a firewall | |
| #' or NAT that does not support EPSV. | |
| #' | |
| #' @return A character vector of file names found in the remote directory. | |
| #' Returns an empty character vector if the directory is empty or no | |
| #' files match `pattern`. | |
| #' | |
| #' @seealso [ftp_download()] to download files once their URLs are known. | |
| #' | |
| #' @importFrom curl new_handle handle_setopt curl | |
| #' @importFrom stringr str_detect | |
| #' | |
| #' @examples | |
| #' \dontrun{ | |
| #' base_url <- "ftp://data.example.org/climate/daily/" | |
| #' | |
| #' # List all files | |
| #' ftp_list_files(base_url) | |
| #' | |
| #' # List only CSV files | |
| #' ftp_list_files(base_url, pattern = "\\.csv$") | |
| #' | |
| #' # Compose with ftp_download() | |
| #' ftp_list_files(base_url, pattern = "2024") |> | |
| #' paste0(base_url, x = _) |> | |
| #' ftp_download(destdir = "data/raw") | |
| #' } | |
| #' | |
| #' @export | |
| ftp_list_files <- function(url, pattern = NULL, use_epsv = TRUE) { | |
| if (!grepl("^ftps?://", url)) | |
| stop("`url` must be an FTP or FTPS URL (starting with ftp:// or ftps://).") | |
| handle <- curl::new_handle() | |
| curl::handle_setopt(handle, | |
| ftp_use_epsv = use_epsv, | |
| dirlistonly = TRUE | |
| ) | |
| con <- tryCatch( | |
| curl::curl(url = url, "r", handle = handle), | |
| error = function(e) stop("Failed to connect to FTP server: ", conditionMessage(e)) | |
| ) | |
| on.exit(close(con), add = TRUE) | |
| files <- readLines(con) | |
| if (!is.null(pattern)) files <- files[stringr::str_detect(files, pattern)] | |
| files | |
| } | |
| #' Download files from an FTP server | |
| #' | |
| #' Downloads a vector of FTP URLs to a local directory. Each URL must point | |
| #' to an individual file; directory URLs are not supported. Non-existent | |
| #' remote files produce a warning and are skipped, leaving the rest of the | |
| #' batch unaffected. | |
| #' | |
| #' @param urls A character vector of full FTP or FTPS file URLs. All elements | |
| #' must start with `ftp://` or `ftps://`. | |
| #' @param destdir Path to an existing local directory where files will be | |
| #' saved. File names are derived from the last component of each URL via | |
| #' [base::basename()]. | |
| #' @param overwrite Logical. If `FALSE` (default), files that already exist | |
| #' in `destdir` are skipped. Set to `TRUE` to overwrite them. | |
| #' @param use_epsv Logical. Whether to use Extended Passive mode (EPSV). | |
| #' Default is `TRUE`. Set to `FALSE` if the server is behind a firewall | |
| #' or NAT that does not support EPSV. | |
| #' @param verbose Logical. If `TRUE`, prints a message for each file as it | |
| #' is downloaded. Default is `FALSE`. | |
| #' | |
| #' @return Invisibly returns the character vector of intended local file | |
| #' paths (including skipped files). Use [base::file.exists()] on the | |
| #' result to verify which downloads succeeded. | |
| #' | |
| #' @note This function does not perform directory listing. Use | |
| #' [ftp_list_files()] first to discover available files, then pass the | |
| #' constructed URLs to this function. This separation ensures you only | |
| #' attempt to download files you know exist. | |
| #' | |
| #' @seealso [ftp_list_files()] to list available files before downloading. | |
| #' | |
| #' @importFrom curl new_handle handle_setopt curl_download | |
| #' | |
| #' @examples | |
| #' \dontrun{ | |
| #' base_url <- "ftp://data.example.org/climate/daily/" | |
| #' | |
| #' # Download known files directly | |
| #' ftp_download( | |
| #' urls = c("ftp://data.example.org/climate/daily/2024.csv", | |
| #' "ftp://data.example.org/climate/daily/2023.csv"), | |
| #' destdir = "data/raw" | |
| #' ) | |
| #' | |
| #' # Discover then download in one pipeline | |
| #' ftp_list_files(base_url, pattern = "\\.csv$") |> | |
| #' paste0(base_url, x = _) |> | |
| #' ftp_download(destdir = "data/raw", verbose = TRUE) | |
| #' | |
| #' # Check what was actually downloaded | |
| #' paths <- ftp_download( | |
| #' urls = paste0(base_url, c("2024.csv", "2023.csv")), | |
| #' destdir = "data/raw" | |
| #' ) | |
| #' file.exists(paths) | |
| #' } | |
| #' | |
| #' @export | |
| ftp_download <- function( | |
| urls, | |
| destdir, | |
| overwrite = FALSE, | |
| use_epsv = TRUE, | |
| verbose = FALSE | |
| ) { | |
| if (!all(grepl("^ftps?://", urls))) | |
| stop("All `urls` must be FTP or FTPS URLs (starting with ftp:// or ftps://).") | |
| if (!dir.exists(destdir)) | |
| stop("`destdir` does not exist: ", destdir) | |
| local_paths <- file.path(destdir, basename(urls)) | |
| skip <- !overwrite & file.exists(local_paths) | |
| if (any(skip)) { | |
| message(sum(skip), " file(s) skipped (already exist). Use `overwrite = TRUE` to replace.") | |
| urls <- urls[!skip] | |
| local_paths <- local_paths[!skip] | |
| } | |
| if (length(urls) == 0) return(invisible(local_paths)) | |
| handle <- curl::new_handle() | |
| curl::handle_setopt(handle, ftp_use_epsv = use_epsv) | |
| Map(function(src, dst) { | |
| if (verbose) message("Downloading: ", basename(src)) | |
| tryCatch( | |
| curl::curl_download(url = src, destfile = dst, handle = handle), | |
| error = function(e) | |
| warning("Failed to download ", basename(src), ": ", conditionMessage(e), | |
| call. = FALSE) | |
| ) | |
| }, urls, local_paths) | |
| invisible(local_paths) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment