Last active
November 26, 2024 19:57
-
-
Save jxu/ea586ed03af62b9f58e42e4a7d3a49dd to your computer and use it in GitHub Desktop.
Pandas Cheatsheet (Avoid indexes)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
### Data import ### | |
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_csv.html | |
df = pd.read_csv("file.csv") | |
### Data types ### | |
df.info() | |
### DataFrame operations ### | |
# Subset, rename, retype columns | |
df[["a", "b"]] | |
df.rename(columns={"y":"year"}) | |
df.astype({"a": int}) | |
df.sort_values("col", ascending=False) | |
# Unique and duplicated values | |
df["col"].unique # numpy array of unique values | |
df["col"].is_unique | |
df.drop_duplicates(subset=["col"]) | |
df.duplicated(subset="col") # Boolean Series | |
# Summary statistics | |
df["col"].value_counts(dropna=False) | |
df["col"].isna().sum() | |
# Table join | |
# Warning: NULLs are joined together, unlike SQL | |
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.merge.html | |
# Use on="key" if joining columns are named the same | |
df.merge(df2, how="left", left_on="lk", right_on="rk") | |
# SQL-style GROUP BY named aggregation functions | |
# Common functions: size, count (non-NAs), nunique (count distinct), first, max, min, mean | |
df.groupby("group", as_index=False).agg( | |
a_sum=('a', "sum"), | |
d_range=('d', lambda x: x.max() - x.min()) | |
) | |
### Data export | |
df.to_csv("file.csv", index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment