Skip to content

Instantly share code, notes, and snippets.

@alekrutkowski
Last active November 11, 2024 11:28
Show Gist options
  • Save alekrutkowski/5c2c56c0ee1134fa0aecd4301b3ef9c2 to your computer and use it in GitHub Desktop.
Save alekrutkowski/5c2c56c0ee1134fa0aecd4301b3ef9c2 to your computer and use it in GitHub Desktop.
Rust extendr/rextendr code to take R's data.frame as input, modify it (add columns), and return a modified data.frame
use extendr_api::prelude::*;
use rand::{distributions::Alphanumeric, Rng};
use std::collections::HashMap;
// Define the function to be used in R
#[extendr]
fn add_columns(df: Dataframe<Robj>) -> List {
// Get the number of rows in the data frame
let n_rows = df.get_attrib("row.names").unwrap().len();
// Create a vector with row numbers.
let row_numbers: Vec<i32> = (1..=n_rows as i32).collect();
// Create a vector of random strings of random length (up to 10 characters)
let random_strings: Vec<String> = (0..n_rows)
.map( |_| rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(rand::thread_rng().gen_range(1..=10) as usize)
.map(char::from).collect() )
.collect();
// Convert the original dataframe to a list and add new columns
let mut map: HashMap<&str, Robj> = HashMap::new();
for (name, column) in df.as_list().unwrap().iter() {
map.insert(name, column.clone()); // Column order random / not guaranteed!!!
}
map.insert("row_number", r!(row_numbers)); // Column order random / not guaranteed!!!
map.insert("random_string", r!(random_strings)); // Column order random / not guaranteed!!!
// Create a new List from the HashMap
let mut df_list = List::from_hashmap(map).unwrap();
// Convert StrIter to Vec<String> to use with set_attrib
let names_vec: Vec<String> = df_list.names().unwrap().map(|s| s.to_string()).collect();
// Convert (effectively) the list to a dataframe
let _ = df_list.set_attrib("class", ["data.frame"]);
let _ = df_list.set_attrib("names", names_vec);
let _ = df_list.set_attrib("row.names", df.get_attrib("row.names").unwrap());
// Return the modified data frame.
df_list // Column order random / not guaranteed!!!
}
// Macro to generate exports
extendr_module! {
mod mymodule;
fn add_columns;
}
// Note: Make sure to add `rand` as a dependency in your `Cargo.toml`, like so:
//
// [dependencies]
// extendr-api = "0.7"
// rand = "0.8"
@alekrutkowski
Copy link
Author

> add_columns(data.frame(Aaa=101:110))

   random_string Aaa row_number
1       HjwLcacb 101          1
2          arKMI 102          2
3             sS 103          3
4              C 104          4
5     0lIM4zoHGC 105          5
6        HG7aSp2 106          6
7     7xLHYkLu0h 107          7
8     3l04gC5IIa 108          8
9      rqXFFIES2 109          9
10    my72MUjgWw 110         10

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment