Skip to content

Instantly share code, notes, and snippets.

@mizchi
Created February 7, 2019 18:53
Show Gist options
  • Save mizchi/40b058e21cdda20b10794c326926a814 to your computer and use it in GitHub Desktop.
Save mizchi/40b058e21cdda20b10794c326926a814 to your computer and use it in GitHub Desktop.
from keras.optimizers import SGD, Adam
from keras.layers import Dense, Activation, Dropout
from keras.models import Sequential
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
def normalize_data(data):
not_concerned_columns = ["PassengerId", "Name",
"Ticket", "Fare", "Cabin", "Embarked"]
data = data.drop(not_concerned_columns, axis=1)
data = data.dropna()
# normalize
dummy_columns = ["Pclass"]
for column in dummy_columns:
data = pd.concat([data, pd.get_dummies(
data[column], prefix=column)], axis=1)
data = data.drop(column, axis=1)
# normalize Label:Sex to int
le = LabelEncoder()
le.fit(["male", "female"])
data["Sex"] = le.transform(data["Sex"])
# normalize Age
ss = StandardScaler()
data["Age"] = ss.fit_transform(data["Age"].values.reshape(-1, 1))
return data
def split_train_and_test(data, rate=0.8):
data_y = data["Survived"]
data_x = data.drop(["Survived"], axis=1)
train_valid_split_idx = int(len(data_x) * rate)
train_x = data_x[:train_valid_split_idx]
train_y = data_y[:train_valid_split_idx]
valid_test_split_idx = (len(data_x) - train_valid_split_idx) // 2
test_x = data_x[train_valid_split_idx + valid_test_split_idx:]
test_y = data_y[train_valid_split_idx + valid_test_split_idx:]
return train_x.values, train_y.values.reshape(-1, 1), test_x.values, test_y.values.reshape(-1, 1)
def build_model(input_dim):
model = Sequential()
model.add(Dense(20, input_dim=input_dim))
model.add(Activation('relu'))
model.add(Dense(1, input_dim=20))
model.add(Activation('sigmoid'))
model.compile(optimizer=SGD(lr=0.01),
loss='binary_crossentropy',
metrics=['accuracy'])
return model
# load data
train_data = pd.read_csv("data/train.csv")
normalized_data = normalize_data(train_data)
train_x, train_y, test_x, test_y = split_train_and_test(normalized_data, 0.8)
model = build_model(train_x.shape[1])
# train
model.fit(train_x, train_y, nb_epoch=120, batch_size=16)
# test
[loss, accuracy] = model.evaluate(test_x, test_y)
print("loss:{0} -- accuracy:{1}".format(loss, accuracy))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment