Skip to content

Instantly share code, notes, and snippets.

@dougvj
Last active January 10, 2025 02:37
Show Gist options
  • Save dougvj/b9fc4e31efd82136e513181a52405494 to your computer and use it in GitHub Desktop.
Save dougvj/b9fc4e31efd82136e513181a52405494 to your computer and use it in GitHub Desktop.
Fastest rm -rf in the west (recursive unlink with io_uring)
// Originally adapted from https://serverfault.com/a/328305
#include <err.h>
#include <libgen.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <dirent.h>
#include <sys/stat.h>
#include <sys/types.h>
// Posix hashtable
#include <search.h>
#include <liburing.h>
// #include <linux/io_uring.h>
/* Try to keep the queue size to under two pages as internally its stored in
* the kernel as contiguously ordered pages. Basically the bigger you make it
* the higher order it becomes and the less likely you'll have the contiguous
* pages to support it, despite not hitting any user limits.
* This reduces an ENOMEM here by keeping the queue size as order 1
* Ring size internally is rougly 24 bytes per entry plus overheads I haven't
* accounted for.
*/
#define QUEUE_SIZE 256
/* Globals to manage the queue */
static volatile int pending = 0;
static volatile int total_files = 0;
static volatile int open_dirs = 0;
static volatile time_t last_stat_print = 0;
/* Probes kernel uring implementation and checks if action is
* supported inside the kernel */
static void probe_uring(struct io_uring* ring) {
struct io_uring_probe* pb = NULL;
pb = io_uring_get_probe_ring(ring);
/* Can we perform IO uring unlink in this kernel ? */
if (!io_uring_opcode_supported(pb, IORING_OP_UNLINKAT)) {
io_uring_free_probe(pb);
errno = ENOTSUP;
err(EXIT_FAILURE, "Unable to configure uring");
}
io_uring_free_probe(pb);
}
// Referenced count for the directory
// holding a reference to the parent directory as well
struct dir_ref {
DIR* dir;
char* dname;
#ifdef DEBUG_FULL_PATH
char* full_dname;
#endif
int count;
struct dir_ref* parent;
};
void dir_ref(struct dir_ref* dir) {
if (!dir)
return;
dir->count++;
}
static int submit_rm_request(struct dir_ref* dir,
const char* fname,
unsigned char type,
struct io_uring* ring);
void dir_unref(struct dir_ref* dir, struct io_uring* ring) {
if (!dir)
return;
dir->count--;
if (dir->count == 0) {
// Now we can submit the unlink call for the directory
// itself and then close the directory
if (dir->parent) {
while (!submit_rm_request(dir->parent, dir->dname, DT_DIR, ring)) {
io_uring_submit(ring);
sched_yield();
}
dir_unref(dir->parent, ring);
}
closedir(dir->dir);
free(dir->dname);
#ifdef DEBUG_FULL_PATH
free(dir->full_dname);
#endif
free(dir);
open_dirs--;
}
}
struct dir_ref* dir_ref_new(DIR* dir,
const char* dname,
struct dir_ref* parent) {
struct dir_ref* d = malloc(sizeof(struct dir_ref));
if (!d)
err(EXIT_FAILURE, "Cannot allocate memory for directory reference");
d->dir = dir;
d->count = 1;
dir_ref(parent);
d->parent = parent;
d->dname = strdup(dname);
#ifdef DEBUG_FULL_PATH
asprintf(&d->full_dname, "%s/%s", parent ? parent->full_dname : "", dname);
#endif
open_dirs++;
return d;
}
struct unlink_data {
char* fname;
#ifdef DEBUG_FULL_PATH
char* full_fname;
#endif
struct dir_ref* dir;
};
/* Place a unlink call for the specified file/directory on the ring */
static int submit_rm_request(struct dir_ref* dir,
const char* fname,
unsigned char type,
struct io_uring* ring) {
// We need to store the filename and a counted reference to the directory
struct unlink_data* ud = malloc(sizeof(struct unlink_data));
char* fname_cpy = strdup(fname);
int dfd = dirfd(dir->dir);
struct io_uring_sqe* sqe = NULL;
// printf("Unlinking: %s\n", fname);
/* Fetch a free submission entry off the ring */
sqe = io_uring_get_sqe(ring);
if (!sqe)
/* Submission queue full */
return 0;
pending++;
/* Format the unlink call for submission */
io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, fname_cpy, 0, 0);
sqe->unlink_flags = type == DT_DIR ? AT_REMOVEDIR : 0;
#ifdef DEBUG_FULL_PATH
asprintf(&ud->full_fname, "%s/%s", dir->full_dname, fname);
if (sqe->unlink_flags > 0) {
printf("Unlinking directory: %s\n", ud->full_fname);
} else {
printf("Unlinking file: %s\n", ud->full_fname);
}
#endif
dir_ref(dir);
ud->fname = fname_cpy;
ud->dir = dir;
/* Set the data to our unlink_data structure */
io_uring_sqe_set_data(sqe, ud);
return 1;
}
static void consume_queue(struct io_uring* ring);
void submit_rm_and_process(struct dir_ref* dir,
const char* fname,
unsigned char type,
struct io_uring* ring) {
while (!submit_rm_request(dir, fname, type, ring)) {
consume_queue(ring);
sched_yield();
}
}
/* Submit the pending queue, then reap the queue
* clearing up room on the completion queue */
static void consume_queue(struct io_uring* ring) {
struct unlink_data* ud = NULL;
int i = 0, bad = 0;
int rc;
struct io_uring_cqe** cqes = NULL;
if (pending < 0)
abort();
cqes = calloc(pending, sizeof(struct io_uring_cqe*));
if (!cqes)
err(EXIT_FAILURE, "Cannot find memory for CQE pointers");
/* Notify about submitted entries from the queue (this is a async call) */
io_uring_submit(ring);
/* We can immediately take a peek to see if we've anything completed */
rc = io_uring_peek_batch_cqe(ring, cqes, pending);
/* Iterate the list of completed entries. Check nothing crazy happened */
for (i = 0; i < rc; i++) {
/* This returns the filename we set earlier */
ud = io_uring_cqe_get_data(cqes[i]);
char* fn = ud->fname;
/* Check the error code of the unlink calls */
if (cqes[i]->res < 0) {
errno = -cqes[i]->res;
#ifdef DEBUG_FULL_PATH
err(EXIT_FAILURE, "Unlinking entry %s failed", ud->full_fname);
#else
err(EXIT_FAILURE, "Unlinking entry %s failed", fn);
#endif
}
/* Clear up our CQE */
io_uring_cqe_seen(ring, cqes[i]);
dir_unref(ud->dir, ring);
free(fn);
#ifdef DEBUG_FULL_PATH
free(ud->full_fname);
#endif
free(ud);
}
pending -= rc;
total_files += rc - bad;
free(cqes);
}
void process_dir(const char* dir,
struct io_uring* ring,
struct dir_ref* parent_dir) {
DIR* target = NULL;
struct dirent* fn;
struct dir_ref* dir_ref;
/* Open the directory */
target = opendir(dir);
const char* dname = basename((char*)dir);
if (!target) {
// If there are too many open files, try to process the queue
// and then try again. Procesing the queue should clear out
// references to directories once they are unlinked.
while (errno == EMFILE) {
// printf("Too many open files: %d (%d pending): this pid: %d\n",
// open_dirs, pending, getpid());
consume_queue(ring);
sched_yield();
target = opendir(dir);
if (target) {
goto cont;
}
if (errno != EMFILE) {
break;
}
}
err(EXIT_FAILURE, "Opening the directory failed");
}
cont:
/* Create a reference to the directory so we can keep it open
while we're processing it. */
dir_ref = dir_ref_new(target, dname, parent_dir);
/* So as of writing this code, GETDENTS doesn't have URING support.
* but checking the kernel mailing list indicates its in progress.
* For now, we'll just do laymans readdir(). These days theres no
* actual difference between it and making the getdents() call ourselves.
*/
while ((fn = readdir(target))) {
if (fn->d_type == DT_DIR) {
char new_dir[4096];
if (strcmp(fn->d_name, ".") == 0 || strcmp(fn->d_name, "..") == 0) {
continue;
}
// Recurse into directories
snprintf(new_dir, sizeof(new_dir), "%s/%s", dir, fn->d_name);
process_dir(new_dir, ring, dir_ref);
continue;
} else {
// printf("Processing file: %s\n", fn->d_name);
submit_rm_and_process(dir_ref, fn->d_name, fn->d_type, ring);
}
time_t now = time(NULL);
// printf("now: %ld, last_stat_print: %ld\n", now, last_stat_print);
if (now - last_stat_print >= 1) {
printf("Currently Queued->Total Removed: %d->%d\t%s\33[2K\r", pending,
total_files, dir);
fflush(stdout);
last_stat_print = now;
}
}
// Now we are done with the directory, we can unref it here. When all
// references are gone, the directory will be closed and the
// directory queue for removal
dir_unref(dir_ref, ring);
}
/* Main start */
int main(const int argc, const char** argv) {
struct io_uring ring = {0};
struct stat st = {0};
int dfd;
last_stat_print = time(NULL);
/* Check initial arguments passed make sense */
if (argc < 2)
errx(EXIT_FAILURE, "Must pass a directory to remove files from.");
/* Check path validity */
if (lstat(argv[1], &st) < 0)
err(EXIT_FAILURE, "Cannot access target directory");
if (!S_ISDIR(st.st_mode))
errx(EXIT_FAILURE, "Path specified must be a directory");
/* Create the initial uring for handling the file removals */
if (io_uring_queue_init(QUEUE_SIZE, &ring, 0) < 0)
err(EXIT_FAILURE, "Cannot initialize URING");
/* Check the unlink action is supported */
probe_uring(&ring);
char* start_dir = realpath(argv[1], NULL);
if (!start_dir) {
err(EXIT_FAILURE, "Failed to resolve path");
}
if (strcmp(start_dir, "/") == 0) {
err(EXIT_FAILURE, "Refusing to remove root directory");
}
process_dir(start_dir, &ring, NULL);
/* Out of files in directory to list. Just clear the queue */
while (pending) {
consume_queue(&ring);
sched_yield();
}
printf("Queued/Removed: %d->%d\n", pending, total_files);
// We can unlink the specified directory now
if (unlink(argv[1]) < 0)
warn("Failed to remove directory %s", argv[1]);
printf("Total files: %d\n", total_files);
free(start_dir);
io_uring_queue_exit(&ring);
exit(0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment