Last active
November 19, 2024 09:23
-
-
Save alekrutkowski/5cd6bbe1b2f19673b8e2f4ebc730a771 to your computer and use it in GitHub Desktop.
Rust extendr/rextendr code to take R's data.frame as input, modify it as a polars DataFrame (add columns), and return a modified R data.frame.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use extendr_api::prelude::*; | |
use polars::prelude::*; | |
// Helper function to convert an R data.frame to a Polars DataFrame | |
fn r_to_polars_dataframe(r_df: List) -> Result<DataFrame> { | |
let mut columns = Vec::new(); | |
for (name, col) in r_df.iter() { | |
let col_name: PlSmallStr = name.into(); // Convert column name to PlSmallStr | |
let series = if let Some(slice) = col.as_real_slice() { | |
Series::new(col_name.clone(), slice) | |
} else if let Some(slice) = col.as_integer_slice() { | |
Series::new(col_name.clone(), slice) | |
} else if let Some(vec) = col.as_str_vector() { | |
Series::new(col_name.clone(), vec) | |
} else { | |
return Err(Error::Other(format!( | |
"Unsupported column type in '{}'", | |
name | |
))); | |
}; | |
columns.push(series.into()); | |
} | |
DataFrame::new(columns).map_err(|e| Error::Other(e.to_string())) | |
} | |
// Helper function to convert a Polars DataFrame to an R data.frame | |
fn polars_to_r_dataframe(df: DataFrame) -> Result<Robj> { | |
let mut names = Vec::new(); | |
let mut values = Vec::new(); | |
for col in df.iter() { | |
let r_col = match col.dtype() { | |
DataType::String => { | |
let utf8_values = col.str().unwrap(); | |
let values: Vec<_> = utf8_values | |
.into_iter() | |
.map(|opt| opt.unwrap_or("").to_string()) | |
.collect(); | |
Robj::from(values) | |
} | |
DataType::Float64 => { | |
let float_values = col.f64().unwrap(); | |
let values: Vec<_> = float_values | |
.into_iter() | |
.map(|opt| opt.unwrap_or(f64::NAN)) | |
.collect(); | |
Robj::from(values) | |
} | |
DataType::Int32 => { | |
let int_values = col.i32().unwrap(); | |
let values: Vec<_> = int_values | |
.into_iter() | |
.map(|opt| opt.unwrap_or_default()) | |
.collect(); | |
Robj::from(values) | |
} | |
_ => { | |
return Err(Error::Other(format!( | |
"Unsupported column type: {:?}", | |
col.dtype() | |
))) | |
} | |
}; | |
names.push(col.name().to_string()); | |
values.push(r_col); | |
} | |
let list = List::from_pairs(names.into_iter().zip(values)); | |
Ok(list.into_robj()) | |
} | |
// Main function to add two columns | |
#[extendr] | |
fn add_columns(r_df: List) -> Result<Robj> { | |
// Convert R data.frame to Polars DataFrame | |
let mut df = r_to_polars_dataframe(r_df)?; | |
// Get the length of the DataFrame | |
let df_len = df.height(); | |
// Create new columns with matching length | |
let string_column = Series::new( | |
"new_string_col".into(), | |
(0..df_len).map(|i| format!("str_{}", i)).collect::<Vec<_>>(), | |
); | |
let float_column = Series::new( | |
"new_float_col".into(), | |
(0..df_len).map(|i| i as f64 * 0.1).collect::<Vec<_>>(), | |
); | |
// Add the new columns | |
df.hstack_mut(&[string_column.into(), float_column.into()]) | |
.map_err(|e| Error::Other(e.to_string()))?; | |
// Convert Polars DataFrame back to R data.frame | |
let mut df_list = polars_to_r_dataframe(df).unwrap(); | |
// Convert (effectively) the list to a dataframe | |
let names_vec: Vec<String> = df_list.names().unwrap().map(|s| s.to_string()).collect(); | |
let _ = df_list.set_attrib("class", ["data.frame"]); | |
let _ = df_list.set_attrib("names", names_vec); | |
let _ = df_list.set_attrib("row.names", (0..df_len).map(|s| s.to_string()).collect::<Vec<_>>()); | |
Ok(df_list) | |
} | |
// Macro to export the function to R | |
extendr_module! { | |
mod mymodule; | |
fn add_columns; | |
} | |
// Note: Make sure to add `polars` as a dependency in your `Cargo.toml`, like so: | |
// | |
// [dependencies] | |
// extendr-api = "0.7" | |
// polars = "0.44" |
Author
alekrutkowski
commented
Nov 15, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment