Skip to content

Instantly share code, notes, and snippets.

@alekrutkowski
Last active November 19, 2024 09:23
Show Gist options
  • Save alekrutkowski/5cd6bbe1b2f19673b8e2f4ebc730a771 to your computer and use it in GitHub Desktop.
Save alekrutkowski/5cd6bbe1b2f19673b8e2f4ebc730a771 to your computer and use it in GitHub Desktop.
Rust extendr/rextendr code to take R's data.frame as input, modify it as a polars DataFrame (add columns), and return a modified R data.frame.
use extendr_api::prelude::*;
use polars::prelude::*;
// Helper function to convert an R data.frame to a Polars DataFrame
fn r_to_polars_dataframe(r_df: List) -> Result<DataFrame> {
let mut columns = Vec::new();
for (name, col) in r_df.iter() {
let col_name: PlSmallStr = name.into(); // Convert column name to PlSmallStr
let series = if let Some(slice) = col.as_real_slice() {
Series::new(col_name.clone(), slice)
} else if let Some(slice) = col.as_integer_slice() {
Series::new(col_name.clone(), slice)
} else if let Some(vec) = col.as_str_vector() {
Series::new(col_name.clone(), vec)
} else {
return Err(Error::Other(format!(
"Unsupported column type in '{}'",
name
)));
};
columns.push(series.into());
}
DataFrame::new(columns).map_err(|e| Error::Other(e.to_string()))
}
// Helper function to convert a Polars DataFrame to an R data.frame
fn polars_to_r_dataframe(df: DataFrame) -> Result<Robj> {
let mut names = Vec::new();
let mut values = Vec::new();
for col in df.iter() {
let r_col = match col.dtype() {
DataType::String => {
let utf8_values = col.str().unwrap();
let values: Vec<_> = utf8_values
.into_iter()
.map(|opt| opt.unwrap_or("").to_string())
.collect();
Robj::from(values)
}
DataType::Float64 => {
let float_values = col.f64().unwrap();
let values: Vec<_> = float_values
.into_iter()
.map(|opt| opt.unwrap_or(f64::NAN))
.collect();
Robj::from(values)
}
DataType::Int32 => {
let int_values = col.i32().unwrap();
let values: Vec<_> = int_values
.into_iter()
.map(|opt| opt.unwrap_or_default())
.collect();
Robj::from(values)
}
_ => {
return Err(Error::Other(format!(
"Unsupported column type: {:?}",
col.dtype()
)))
}
};
names.push(col.name().to_string());
values.push(r_col);
}
let list = List::from_pairs(names.into_iter().zip(values));
Ok(list.into_robj())
}
// Main function to add two columns
#[extendr]
fn add_columns(r_df: List) -> Result<Robj> {
// Convert R data.frame to Polars DataFrame
let mut df = r_to_polars_dataframe(r_df)?;
// Get the length of the DataFrame
let df_len = df.height();
// Create new columns with matching length
let string_column = Series::new(
"new_string_col".into(),
(0..df_len).map(|i| format!("str_{}", i)).collect::<Vec<_>>(),
);
let float_column = Series::new(
"new_float_col".into(),
(0..df_len).map(|i| i as f64 * 0.1).collect::<Vec<_>>(),
);
// Add the new columns
df.hstack_mut(&[string_column.into(), float_column.into()])
.map_err(|e| Error::Other(e.to_string()))?;
// Convert Polars DataFrame back to R data.frame
let mut df_list = polars_to_r_dataframe(df).unwrap();
// Convert (effectively) the list to a dataframe
let names_vec: Vec<String> = df_list.names().unwrap().map(|s| s.to_string()).collect();
let _ = df_list.set_attrib("class", ["data.frame"]);
let _ = df_list.set_attrib("names", names_vec);
let _ = df_list.set_attrib("row.names", (0..df_len).map(|s| s.to_string()).collect::<Vec<_>>());
Ok(df_list)
}
// Macro to export the function to R
extendr_module! {
mod mymodule;
fn add_columns;
}
// Note: Make sure to add `polars` as a dependency in your `Cargo.toml`, like so:
//
// [dependencies]
// extendr-api = "0.7"
// polars = "0.44"
@alekrutkowski
Copy link
Author

> add_columns(data.frame(a=101:112,bB=letters[1:12]))
     a bB new_string_col new_float_col
0  101  a          str_0           0.0
1  102  b          str_1           0.1
2  103  c          str_2           0.2
3  104  d          str_3           0.3
4  105  e          str_4           0.4
5  106  f          str_5           0.5
6  107  g          str_6           0.6
7  108  h          str_7           0.7
8  109  i          str_8           0.8
9  110  j          str_9           0.9
10 111  k         str_10           1.0
11 112  l         str_11           1.1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment