Skip to content

Instantly share code, notes, and snippets.

@lucharo
Created March 20, 2025 14:30
Show Gist options
  • Save lucharo/0823237823582a083cab671899586dd0 to your computer and use it in GitHub Desktop.
Save lucharo/0823237823582a083cab671899586dd0 to your computer and use it in GitHub Desktop.
Marimo Notebook to Pick CSV Columns: Heya, as part of my work I receive client data in CSVs that are meant to follow a strict blueprint (column names, data types, etc). Client can be a bit unreliable in that regard and send over column names in different languages or with typos. Instead of manually checking each column and adjusting the names ma…
# /// script
# [tool.marimo.runtime]
# auto_instantiate = false
# ///
import marimo
__generated_with = "0.11.20-dev10"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
import polars as pl
from io import StringIO
return StringIO, mo, pl
@app.cell
def _(StringIO, pl):
# Simulate loading a CSV file with variable headers
csv_data = '''
Fecha de viaje,Destino,Duración
2023-01-01,Paris,5
2023-02-15,New York,7
2023-03-10,Tokyo,10
'''
# Read the CSV data into a DataFrame
uploaded_df = pl.read_csv(StringIO(csv_data))
uploaded_df
return csv_data, uploaded_df
@app.cell
def _(mo, uploaded_df):
# Define expected column names for the downstream pipeline
expected_columns = ['Travel Date', 'Destination', 'Duration']
# Create a dictionary to store the mapping from uploaded columns to expected columns
column_mapping = {}
# Create a UI element for each expected column to map it to one of the uploaded columns
radio = {}
for expected_col in expected_columns:
radio[expected_col] = mo.ui.radio(
options=uploaded_df.columns,
value=uploaded_df.columns[0],
label=f"Map to **'{expected_col}'**:",
inline=True
)
map_button = mo.ui.run_button(label = "Map columns!")
# Display the UI elements in a vertical stack
mo.vstack(list(radio.values())+[map_button])
return column_mapping, expected_col, expected_columns, map_button, radio
@app.cell
def _(map_button, mo, radio, uploaded_df):
mo.stop(not map_button.value)
# Create a new DataFrame with columns renamed according to the user's mapping
mapped_columns = {element.value: expected for expected, element in radio.items()}
renamed_df = uploaded_df.rename(mapped_columns)
renamed_df
return mapped_columns, renamed_df
if __name__ == "__main__":
app.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment