Last active
November 11, 2024 11:28
-
-
Save alekrutkowski/5c2c56c0ee1134fa0aecd4301b3ef9c2 to your computer and use it in GitHub Desktop.
Rust extendr/rextendr code to take R's data.frame as input, modify it (add columns), and return a modified data.frame
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use extendr_api::prelude::*; | |
use rand::{distributions::Alphanumeric, Rng}; | |
use std::collections::HashMap; | |
// Define the function to be used in R | |
#[extendr] | |
fn add_columns(df: Dataframe<Robj>) -> List { | |
// Get the number of rows in the data frame | |
let n_rows = df.get_attrib("row.names").unwrap().len(); | |
// Create a vector with row numbers. | |
let row_numbers: Vec<i32> = (1..=n_rows as i32).collect(); | |
// Create a vector of random strings of random length (up to 10 characters) | |
let random_strings: Vec<String> = (0..n_rows) | |
.map( |_| rand::thread_rng() | |
.sample_iter(&Alphanumeric) | |
.take(rand::thread_rng().gen_range(1..=10) as usize) | |
.map(char::from).collect() ) | |
.collect(); | |
// Convert the original dataframe to a list and add new columns | |
let mut map: HashMap<&str, Robj> = HashMap::new(); | |
for (name, column) in df.as_list().unwrap().iter() { | |
map.insert(name, column.clone()); // Column order random / not guaranteed!!! | |
} | |
map.insert("row_number", r!(row_numbers)); // Column order random / not guaranteed!!! | |
map.insert("random_string", r!(random_strings)); // Column order random / not guaranteed!!! | |
// Create a new List from the HashMap | |
let mut df_list = List::from_hashmap(map).unwrap(); | |
// Convert StrIter to Vec<String> to use with set_attrib | |
let names_vec: Vec<String> = df_list.names().unwrap().map(|s| s.to_string()).collect(); | |
// Convert (effectively) the list to a dataframe | |
let _ = df_list.set_attrib("class", ["data.frame"]); | |
let _ = df_list.set_attrib("names", names_vec); | |
let _ = df_list.set_attrib("row.names", df.get_attrib("row.names").unwrap()); | |
// Return the modified data frame. | |
df_list // Column order random / not guaranteed!!! | |
} | |
// Macro to generate exports | |
extendr_module! { | |
mod mymodule; | |
fn add_columns; | |
} | |
// Note: Make sure to add `rand` as a dependency in your `Cargo.toml`, like so: | |
// | |
// [dependencies] | |
// extendr-api = "0.7" | |
// rand = "0.8" |
Author
alekrutkowski
commented
Nov 11, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment