Created
December 20, 2020 18:31
-
-
Save mtanco/43f80c8c219aea52d8a708f05be21bb1 to your computer and use it in GitHub Desktop.
Example of how to format a pandas dataframe for plotting with native Wave plot functions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plot / Dataframe | |
# Examples of how to format pandas data when plotting | |
# Use the `tolist()` function on `df.columns` and `df.values` along with Wave's `data` class | |
# --- | |
from h2o_wave import site, data, ui, main | |
import pandas as pd | |
import numpy as np | |
# Page to hold our charts | |
page = site['/demo'] | |
n = 100 | |
df = pd.DataFrame({'length': np.random.rand(n), | |
'width': np.random.rand(n), | |
'data_type': np.random.choice(a=['Train', 'Test'], size=n, p=[0.8, 0.2]) | |
}) | |
print(df.head()) # logging: view data shape | |
# length width data_type | |
# 0 0.675802 0.022420 Train | |
# 1 0.025449 0.527442 Train | |
# 2 0.236596 0.497024 Train | |
# 3 0.971468 0.025671 Train | |
# 4 0.159988 0.564333 Train | |
# Plot two numeric columns by each other and color based on a third, categorical column | |
v = page.add('scatter', ui.plot_card( | |
box='1 1 4 4', | |
title='Scatter Plot from Dataframe', | |
data=data( | |
fields=df.columns.tolist(), | |
rows=df.values.tolist() | |
), | |
plot=ui.plot(marks=[ui.mark(type='point', | |
x='=length', x_title='Length (cm)', | |
y='=width', y_title='Width (cm)', | |
color='=data_type', shape='circle', | |
)]) | |
)) | |
# Aggregate the data in pandas and plot a bar chart of the average value of one column by some other column | |
df_agg = df.groupby(['data_type']).mean().reset_index() | |
print(df_agg.head()) # logging: view data shape | |
# data_type length width | |
# 0 Test 0.438477 0.484714 | |
# 1 Train 0.574956 0.495716 | |
v = page.add('bar', ui.plot_card( | |
box='5 1 4 4', | |
title='Bar Plot from Aggregated Dataframe', | |
data=data( | |
fields=df_agg.columns.tolist(), | |
rows=df_agg.values.tolist() | |
), | |
plot=ui.plot(marks=[ui.mark(type='interval', | |
x='=data_type', x_title='Modeling Data Type', | |
y='=length', y_title='Length (cm)', | |
)]) | |
)) | |
# After aggregating use melt to format wide data as tall data for other types of plotting | |
df_agg_melt = pd.melt(df_agg, id_vars=['data_type'], value_vars=['length', 'width']) | |
print(df_agg_melt.head()) | |
# data_type variable value | |
# 0 Test length 0.581804 | |
# 1 Train length 0.526647 | |
# 2 Test width 0.406644 | |
# 3 Train width 0.477375 | |
v = page.add('bar_group', ui.plot_card( | |
box='1 5 4 4', | |
title='Grouped Bar Plot from Data Frame', | |
data=data( | |
fields=df_agg_melt.columns.tolist(), | |
rows=df_agg_melt.values.tolist() | |
), | |
plot=ui.plot(marks=[ui.mark(type='interval', | |
x='=variable', x_title='Data Feature', | |
y='=value', y_title='cm', | |
color='=data_type', dodge='auto', | |
)]) | |
)) | |
page.save() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment