Created
March 20, 2025 14:30
-
-
Save lucharo/0823237823582a083cab671899586dd0 to your computer and use it in GitHub Desktop.
Marimo Notebook to Pick CSV Columns: Heya, as part of my work I receive client data in CSVs that are meant to follow a strict blueprint (column names, data types, etc). Client can be a bit unreliable in that regard and send over column names in different languages or with typos. Instead of manually checking each column and adjusting the names ma…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# [tool.marimo.runtime] | |
# auto_instantiate = false | |
# /// | |
import marimo | |
__generated_with = "0.11.20-dev10" | |
app = marimo.App(width="medium") | |
@app.cell | |
def _(): | |
import marimo as mo | |
import polars as pl | |
from io import StringIO | |
return StringIO, mo, pl | |
@app.cell | |
def _(StringIO, pl): | |
# Simulate loading a CSV file with variable headers | |
csv_data = ''' | |
Fecha de viaje,Destino,Duración | |
2023-01-01,Paris,5 | |
2023-02-15,New York,7 | |
2023-03-10,Tokyo,10 | |
''' | |
# Read the CSV data into a DataFrame | |
uploaded_df = pl.read_csv(StringIO(csv_data)) | |
uploaded_df | |
return csv_data, uploaded_df | |
@app.cell | |
def _(mo, uploaded_df): | |
# Define expected column names for the downstream pipeline | |
expected_columns = ['Travel Date', 'Destination', 'Duration'] | |
# Create a dictionary to store the mapping from uploaded columns to expected columns | |
column_mapping = {} | |
# Create a UI element for each expected column to map it to one of the uploaded columns | |
radio = {} | |
for expected_col in expected_columns: | |
radio[expected_col] = mo.ui.radio( | |
options=uploaded_df.columns, | |
value=uploaded_df.columns[0], | |
label=f"Map to **'{expected_col}'**:", | |
inline=True | |
) | |
map_button = mo.ui.run_button(label = "Map columns!") | |
# Display the UI elements in a vertical stack | |
mo.vstack(list(radio.values())+[map_button]) | |
return column_mapping, expected_col, expected_columns, map_button, radio | |
@app.cell | |
def _(map_button, mo, radio, uploaded_df): | |
mo.stop(not map_button.value) | |
# Create a new DataFrame with columns renamed according to the user's mapping | |
mapped_columns = {element.value: expected for expected, element in radio.items()} | |
renamed_df = uploaded_df.rename(mapped_columns) | |
renamed_df | |
return mapped_columns, renamed_df | |
if __name__ == "__main__": | |
app.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment