Last active
January 10, 2025 02:37
-
-
Save dougvj/b9fc4e31efd82136e513181a52405494 to your computer and use it in GitHub Desktop.
Fastest rm -rf in the west (recursive unlink with io_uring)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Originally adapted from https://serverfault.com/a/328305 | |
#include <err.h> | |
#include <libgen.h> | |
#include <sched.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <unistd.h> | |
#include <dirent.h> | |
#include <sys/stat.h> | |
#include <sys/types.h> | |
// Posix hashtable | |
#include <search.h> | |
#include <liburing.h> | |
// #include <linux/io_uring.h> | |
/* Try to keep the queue size to under two pages as internally its stored in | |
* the kernel as contiguously ordered pages. Basically the bigger you make it | |
* the higher order it becomes and the less likely you'll have the contiguous | |
* pages to support it, despite not hitting any user limits. | |
* This reduces an ENOMEM here by keeping the queue size as order 1 | |
* Ring size internally is rougly 24 bytes per entry plus overheads I haven't | |
* accounted for. | |
*/ | |
#define QUEUE_SIZE 256 | |
/* Globals to manage the queue */ | |
static volatile int pending = 0; | |
static volatile int total_files = 0; | |
static volatile int open_dirs = 0; | |
static volatile time_t last_stat_print = 0; | |
/* Probes kernel uring implementation and checks if action is | |
* supported inside the kernel */ | |
static void probe_uring(struct io_uring* ring) { | |
struct io_uring_probe* pb = NULL; | |
pb = io_uring_get_probe_ring(ring); | |
/* Can we perform IO uring unlink in this kernel ? */ | |
if (!io_uring_opcode_supported(pb, IORING_OP_UNLINKAT)) { | |
io_uring_free_probe(pb); | |
errno = ENOTSUP; | |
err(EXIT_FAILURE, "Unable to configure uring"); | |
} | |
io_uring_free_probe(pb); | |
} | |
// Referenced count for the directory | |
// holding a reference to the parent directory as well | |
struct dir_ref { | |
DIR* dir; | |
char* dname; | |
#ifdef DEBUG_FULL_PATH | |
char* full_dname; | |
#endif | |
int count; | |
struct dir_ref* parent; | |
}; | |
void dir_ref(struct dir_ref* dir) { | |
if (!dir) | |
return; | |
dir->count++; | |
} | |
static int submit_rm_request(struct dir_ref* dir, | |
const char* fname, | |
unsigned char type, | |
struct io_uring* ring); | |
void dir_unref(struct dir_ref* dir, struct io_uring* ring) { | |
if (!dir) | |
return; | |
dir->count--; | |
if (dir->count == 0) { | |
// Now we can submit the unlink call for the directory | |
// itself and then close the directory | |
if (dir->parent) { | |
while (!submit_rm_request(dir->parent, dir->dname, DT_DIR, ring)) { | |
io_uring_submit(ring); | |
sched_yield(); | |
} | |
dir_unref(dir->parent, ring); | |
} | |
closedir(dir->dir); | |
free(dir->dname); | |
#ifdef DEBUG_FULL_PATH | |
free(dir->full_dname); | |
#endif | |
free(dir); | |
open_dirs--; | |
} | |
} | |
struct dir_ref* dir_ref_new(DIR* dir, | |
const char* dname, | |
struct dir_ref* parent) { | |
struct dir_ref* d = malloc(sizeof(struct dir_ref)); | |
if (!d) | |
err(EXIT_FAILURE, "Cannot allocate memory for directory reference"); | |
d->dir = dir; | |
d->count = 1; | |
dir_ref(parent); | |
d->parent = parent; | |
d->dname = strdup(dname); | |
#ifdef DEBUG_FULL_PATH | |
asprintf(&d->full_dname, "%s/%s", parent ? parent->full_dname : "", dname); | |
#endif | |
open_dirs++; | |
return d; | |
} | |
struct unlink_data { | |
char* fname; | |
#ifdef DEBUG_FULL_PATH | |
char* full_fname; | |
#endif | |
struct dir_ref* dir; | |
}; | |
/* Place a unlink call for the specified file/directory on the ring */ | |
static int submit_rm_request(struct dir_ref* dir, | |
const char* fname, | |
unsigned char type, | |
struct io_uring* ring) { | |
// We need to store the filename and a counted reference to the directory | |
struct unlink_data* ud = malloc(sizeof(struct unlink_data)); | |
char* fname_cpy = strdup(fname); | |
int dfd = dirfd(dir->dir); | |
struct io_uring_sqe* sqe = NULL; | |
// printf("Unlinking: %s\n", fname); | |
/* Fetch a free submission entry off the ring */ | |
sqe = io_uring_get_sqe(ring); | |
if (!sqe) | |
/* Submission queue full */ | |
return 0; | |
pending++; | |
/* Format the unlink call for submission */ | |
io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, fname_cpy, 0, 0); | |
sqe->unlink_flags = type == DT_DIR ? AT_REMOVEDIR : 0; | |
#ifdef DEBUG_FULL_PATH | |
asprintf(&ud->full_fname, "%s/%s", dir->full_dname, fname); | |
if (sqe->unlink_flags > 0) { | |
printf("Unlinking directory: %s\n", ud->full_fname); | |
} else { | |
printf("Unlinking file: %s\n", ud->full_fname); | |
} | |
#endif | |
dir_ref(dir); | |
ud->fname = fname_cpy; | |
ud->dir = dir; | |
/* Set the data to our unlink_data structure */ | |
io_uring_sqe_set_data(sqe, ud); | |
return 1; | |
} | |
static void consume_queue(struct io_uring* ring); | |
void submit_rm_and_process(struct dir_ref* dir, | |
const char* fname, | |
unsigned char type, | |
struct io_uring* ring) { | |
while (!submit_rm_request(dir, fname, type, ring)) { | |
consume_queue(ring); | |
sched_yield(); | |
} | |
} | |
/* Submit the pending queue, then reap the queue | |
* clearing up room on the completion queue */ | |
static void consume_queue(struct io_uring* ring) { | |
struct unlink_data* ud = NULL; | |
int i = 0, bad = 0; | |
int rc; | |
struct io_uring_cqe** cqes = NULL; | |
if (pending < 0) | |
abort(); | |
cqes = calloc(pending, sizeof(struct io_uring_cqe*)); | |
if (!cqes) | |
err(EXIT_FAILURE, "Cannot find memory for CQE pointers"); | |
/* Notify about submitted entries from the queue (this is a async call) */ | |
io_uring_submit(ring); | |
/* We can immediately take a peek to see if we've anything completed */ | |
rc = io_uring_peek_batch_cqe(ring, cqes, pending); | |
/* Iterate the list of completed entries. Check nothing crazy happened */ | |
for (i = 0; i < rc; i++) { | |
/* This returns the filename we set earlier */ | |
ud = io_uring_cqe_get_data(cqes[i]); | |
char* fn = ud->fname; | |
/* Check the error code of the unlink calls */ | |
if (cqes[i]->res < 0) { | |
errno = -cqes[i]->res; | |
#ifdef DEBUG_FULL_PATH | |
err(EXIT_FAILURE, "Unlinking entry %s failed", ud->full_fname); | |
#else | |
err(EXIT_FAILURE, "Unlinking entry %s failed", fn); | |
#endif | |
} | |
/* Clear up our CQE */ | |
io_uring_cqe_seen(ring, cqes[i]); | |
dir_unref(ud->dir, ring); | |
free(fn); | |
#ifdef DEBUG_FULL_PATH | |
free(ud->full_fname); | |
#endif | |
free(ud); | |
} | |
pending -= rc; | |
total_files += rc - bad; | |
free(cqes); | |
} | |
void process_dir(const char* dir, | |
struct io_uring* ring, | |
struct dir_ref* parent_dir) { | |
DIR* target = NULL; | |
struct dirent* fn; | |
struct dir_ref* dir_ref; | |
/* Open the directory */ | |
target = opendir(dir); | |
const char* dname = basename((char*)dir); | |
if (!target) { | |
// If there are too many open files, try to process the queue | |
// and then try again. Procesing the queue should clear out | |
// references to directories once they are unlinked. | |
while (errno == EMFILE) { | |
// printf("Too many open files: %d (%d pending): this pid: %d\n", | |
// open_dirs, pending, getpid()); | |
consume_queue(ring); | |
sched_yield(); | |
target = opendir(dir); | |
if (target) { | |
goto cont; | |
} | |
if (errno != EMFILE) { | |
break; | |
} | |
} | |
err(EXIT_FAILURE, "Opening the directory failed"); | |
} | |
cont: | |
/* Create a reference to the directory so we can keep it open | |
while we're processing it. */ | |
dir_ref = dir_ref_new(target, dname, parent_dir); | |
/* So as of writing this code, GETDENTS doesn't have URING support. | |
* but checking the kernel mailing list indicates its in progress. | |
* For now, we'll just do laymans readdir(). These days theres no | |
* actual difference between it and making the getdents() call ourselves. | |
*/ | |
while ((fn = readdir(target))) { | |
if (fn->d_type == DT_DIR) { | |
char new_dir[4096]; | |
if (strcmp(fn->d_name, ".") == 0 || strcmp(fn->d_name, "..") == 0) { | |
continue; | |
} | |
// Recurse into directories | |
snprintf(new_dir, sizeof(new_dir), "%s/%s", dir, fn->d_name); | |
process_dir(new_dir, ring, dir_ref); | |
continue; | |
} else { | |
// printf("Processing file: %s\n", fn->d_name); | |
submit_rm_and_process(dir_ref, fn->d_name, fn->d_type, ring); | |
} | |
time_t now = time(NULL); | |
// printf("now: %ld, last_stat_print: %ld\n", now, last_stat_print); | |
if (now - last_stat_print >= 1) { | |
printf("Currently Queued->Total Removed: %d->%d\t%s\33[2K\r", pending, | |
total_files, dir); | |
fflush(stdout); | |
last_stat_print = now; | |
} | |
} | |
// Now we are done with the directory, we can unref it here. When all | |
// references are gone, the directory will be closed and the | |
// directory queue for removal | |
dir_unref(dir_ref, ring); | |
} | |
/* Main start */ | |
int main(const int argc, const char** argv) { | |
struct io_uring ring = {0}; | |
struct stat st = {0}; | |
int dfd; | |
last_stat_print = time(NULL); | |
/* Check initial arguments passed make sense */ | |
if (argc < 2) | |
errx(EXIT_FAILURE, "Must pass a directory to remove files from."); | |
/* Check path validity */ | |
if (lstat(argv[1], &st) < 0) | |
err(EXIT_FAILURE, "Cannot access target directory"); | |
if (!S_ISDIR(st.st_mode)) | |
errx(EXIT_FAILURE, "Path specified must be a directory"); | |
/* Create the initial uring for handling the file removals */ | |
if (io_uring_queue_init(QUEUE_SIZE, &ring, 0) < 0) | |
err(EXIT_FAILURE, "Cannot initialize URING"); | |
/* Check the unlink action is supported */ | |
probe_uring(&ring); | |
char* start_dir = realpath(argv[1], NULL); | |
if (!start_dir) { | |
err(EXIT_FAILURE, "Failed to resolve path"); | |
} | |
if (strcmp(start_dir, "/") == 0) { | |
err(EXIT_FAILURE, "Refusing to remove root directory"); | |
} | |
process_dir(start_dir, &ring, NULL); | |
/* Out of files in directory to list. Just clear the queue */ | |
while (pending) { | |
consume_queue(&ring); | |
sched_yield(); | |
} | |
printf("Queued/Removed: %d->%d\n", pending, total_files); | |
// We can unlink the specified directory now | |
if (unlink(argv[1]) < 0) | |
warn("Failed to remove directory %s", argv[1]); | |
printf("Total files: %d\n", total_files); | |
free(start_dir); | |
io_uring_queue_exit(&ring); | |
exit(0); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment