Created
December 2, 2020 18:49
-
-
Save toby-p/f3e2e7e41525be9d6494b1d2b3ecac09 to your computer and use it in GitHub Desktop.
Apply scikit-learn's PolynomialFeatures class to a Pandas.DataFrame, keeping original index/column labels.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.preprocessing import PolynomialFeatures | |
def apply_polynomials(df: pd.DataFrame, degree: int = 2, | |
interaction_only: bool = False, | |
include_bias: bool = False): | |
"""Apply scikit-learn's PolynomialFeatures class to a pandas DataFrame, | |
keeping the original column labels and index, and extending the columns to | |
include all new polynomial features. Generally speaking creates a lot of new | |
features, and may cause a MemoryError if too many input features are in the | |
DataFrame. See scikit-learn documentation for more details: | |
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html | |
Args: | |
df: original non-transformed data. | |
degree: of polynomial features. | |
interaction_only: if true, only interaction features are produced. | |
include_bias: if True add bias column of all 1s. | |
""" | |
pnf = PolynomialFeatures(degree=degree, interaction_only=interaction_only, include_bias=include_bias) | |
np_array = pnf.fit_transform(df) | |
if include_bias: | |
columns = ["bias"] | |
else: | |
columns = list() | |
columns += list(df.columns) | |
columns += pnf.get_feature_names()[len(columns):] | |
return pd.DataFrame(np_array, index=df.index, columns=columns) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment