Created
May 3, 2021 11:52
-
-
Save ahue/a27654c453bf5c89227ab72ce17c5753 to your computer and use it in GitHub Desktop.
Analyize cardinalities of a pandas Data.Frame
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import seaborn as sns | |
def cardinalities(df: pd.DataFrame, plot: bool = False): | |
""" | |
Analyses cardinalities in a data frame | |
df DataFrame the data frame to be analyized | |
plot Plot whether to plot rather than return the result DataFrame | |
returns (DataFrame, Plot) of a matrix M(i,j) that reads row-wise, | |
i.e. for each entry in attribute i, there are 1 (dark) or n (light) values in attribute j | |
""" | |
l = [] | |
for cnm in df.columns: | |
# here is the heart | |
# count number of distinct values in any column per value in the grouped column and take max | |
rw = df.groupby(cnm).agg(['nunique']).agg(["max"]) > 1 | |
rw.index = [cnm] | |
l += [rw] | |
car = pd.concat(l) | |
# postprocessing | |
car = car.sort_index() | |
car.columns = df.columns.sort_values() | |
car[car == True] = "n" | |
car[car == False] = "1" | |
car[car.isna()] = "1" | |
if plot: | |
pcar = car.copy() | |
pcar[pcar == "n"] = 2 | |
pcar[pcar == "1"] = 1 | |
plt = sns.heatmap(pcar.astype(float), cbar=False) | |
return (car, plt) | |
return (car, ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment