Created
March 7, 2018 14:14
-
-
Save kumar1202/0733c16ccdd4a36ab98cdba6393b3ac4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For one-hot encoding string categorical data | |
from numpy import array | |
from numpy import argmax | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.preprocessing import OneHotEncoder | |
# define example | |
data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot'] | |
values = array(data) | |
print(values) | |
# integer encode | |
label_encoder = LabelEncoder() | |
integer_encoded = label_encoder.fit_transform(values) | |
print(integer_encoded) | |
# binary encode | |
onehot_encoder = OneHotEncoder(sparse=False) | |
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1) | |
onehot_encoded = onehot_encoder.fit_transform(integer_encoded) | |
print(onehot_encoded) | |
# invert first example | |
inverted = label_encoder.inverse_transform([argmax(onehot_encoded[0, :])]) | |
print(inverted) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment