Last active
July 14, 2021 18:14
-
-
Save aflansburg/87b7a9583e0a89cebc4a26b223a81f99 to your computer and use it in GitHub Desktop.
Dual histogram + boxplot with KDE for univariate analysis + Mean & Median Lines
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import libs | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
# this function will create a boxplot + histogram plot using Seaborn's Jointgrid | |
# we'll also provide Type annotations to provide hints to future users | |
def dual_plot(series: pd.Series, figsize: tuple = (16,8), | |
bins: int = None, return_plot: bool = False, | |
color: str = "pastel", xlim: tuple = None, tick_m: int = None, | |
add_lbl: str = None) -> None: | |
''' | |
Parameters: | |
series: the feature to plot, | |
figsize: the size of the plot, | |
bins: interval size for data points to be contained in. aka. classes, buckets, | |
return_plot: Return the plot if true, otherwise just show the plot using `Matplotlib.pyplot.show()`, | |
color: seaborn color palette to use for the plot, | |
xlim: right-left limits of the x axis, | |
tick_m: show more ticks in specific multiples, | |
add_lbl: additional text to append to the x-label | |
Returns: | |
Returns plot if `return_plot` is true, otherwise just show the plot using `Matplotlib.pyplot.show()` | |
''' | |
# set the color palette | |
sns.set_palette(color) | |
# create the JointGrid object | |
dual_plot = sns.JointGrid(x=series,height=figsize[1]) | |
# add the seaborn histplot | |
dual_plot.plot_joint(sns.histplot, kde=True) | |
# get ax_joint for joint plot so we can 'do things' with it | |
ax_joint = dual_plot.ax_joint | |
# set the seaborn plot title with some padding | |
ax_joint.set_title(label=series.name, fontdict={'fontsize': 20}, pad=(16)) | |
# add to x-label if necessary | |
if add_lbl: | |
ax_joint.set_xlabel(ax_joint.get_xlabel() + ' ' + add_lbl) | |
# set limit of x-axis | |
ax_joint.set_xlim(xlim) | |
# show more ticks - useful for integers | |
if tick_m: | |
x_ticks = np.unique([(tick_m*round(i/5)) for i in sorted(series.unique())]) | |
ax_joint.set_xticks(x_ticks) | |
# obtain the individual axes to add our mean and median lines for | |
# central tendency visualization | |
for axis in (dual_plot.ax_joint, dual_plot.ax_marg_x): | |
axis.axvline(np.mean(series), color='r', linestyle='-') | |
axis.axvline(np.median(series), color='b', linestyle='--') | |
# add the boxplot | |
dual_plot.plot_marginals(sns.boxplot) | |
return dual_plot if return_plot else plt.show() | |
dual_plot(series=data['Income'],color='colorblind',add_lbl="(in thousands - USD)") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment