Skip to content

Instantly share code, notes, and snippets.

@BrentonPoke
Created January 1, 2022 22:39
Show Gist options
  • Save BrentonPoke/a0a3e810171de9ba862d3b5e892d02ae to your computer and use it in GitHub Desktop.
Save BrentonPoke/a0a3e810171de9ba862d3b5e892d02ae to your computer and use it in GitHub Desktop.
Python script that trains on IoT intrusions
# %% [markdown]
# # IoT Intrusion Detection
#
# The N-BaIoT Dataset contains traffic data for 9 IoT devices. The data comprise of both benign traffic and of a variety of malicious attacks. Here we run three deep neural networks to identify cyberattacks on a Provision PT-737E Security Camera.
# %% [code] {"jupyter":{"outputs_hidden":false}}
import datetime
import numpy as np
import pandas as pd
# %% [code] {"jupyter":{"outputs_hidden":false}}
benign=pd.read_csv('input/8.benign.csv')
g_c=pd.read_csv('input/8.gafgyt.combo.csv')
g_j=pd.read_csv('input/8.gafgyt.junk.csv')
g_s=pd.read_csv('input/8.gafgyt.scan.csv')
g_t=pd.read_csv('input/8.gafgyt.tcp.csv')
g_u=pd.read_csv('input/8.gafgyt.udp.csv')
ack=pd.read_csv('input/8.mirai.ack.csv')
sca=pd.read_csv('input/8.mirai.scan.csv')
syn=pd.read_csv('input/8.mirai.syn.csv')
udp=pd.read_csv('input/8.mirai.udp.csv')
pln=pd.read_csv('input/8.mirai.udpplain.csv')
benign7 = pd.read_csv('input/7.benign.csv')
g_c7=pd.read_csv('input/7.gafgyt.combo.csv')
g_j7=pd.read_csv('input/7.gafgyt.junk.csv')
g_s7=pd.read_csv('input/7.gafgyt.scan.csv')
g_t7=pd.read_csv('input/7.gafgyt.tcp.csv')
g_u7=pd.read_csv('input/7.gafgyt.udp.csv')
benign1 = pd.read_csv('input/1.benign.csv')
g_c1=pd.read_csv('input/1.gafgyt.combo.csv')
g_j1=pd.read_csv('input/1.gafgyt.junk.csv')
g_s1=pd.read_csv('input/1.gafgyt.scan.csv')
g_t1=pd.read_csv('input/1.gafgyt.tcp.csv')
g_u1=pd.read_csv('input/1.gafgyt.udp.csv')
ack1=pd.read_csv('input/1.mirai.ack.csv')
sca1=pd.read_csv('input/1.mirai.scan.csv')
syn1=pd.read_csv('input/1.mirai.syn.csv')
udp1=pd.read_csv('input/1.mirai.udp.csv')
pln1=pd.read_csv('input/1.mirai.udpplain.csv')
benign2 = pd.read_csv('input/2.benign.csv')
g_c2=pd.read_csv('input/2.gafgyt.combo.csv')
g_j2=pd.read_csv('input/2.gafgyt.junk.csv')
g_s2=pd.read_csv('input/2.gafgyt.scan.csv')
g_t2=pd.read_csv('input/2.gafgyt.tcp.csv')
g_u2=pd.read_csv('input/2.gafgyt.udp.csv')
ack2=pd.read_csv('input/2.mirai.ack.csv')
sca2=pd.read_csv('input/2.mirai.scan.csv')
syn2=pd.read_csv('input/2.mirai.syn.csv')
udp2=pd.read_csv('input/2.mirai.udp.csv')
pln2=pd.read_csv('input/2.mirai.udpplain.csv')
frames = [g_c,g_c7,g_c1,g_c2]
frames1 = [g_j,g_j7,g_j1,g_j2]
frames2 = [g_s,g_s7,g_s1,g_s2]
frames3 = [g_t,g_t7,g_t1,g_t2]
frames4 = [g_u,g_u7,g_u1,g_u2]
frames5 = [ack,ack2,ack1]
frames6 = [sca,sca2,sca1]
frames7 = [syn,syn2,syn1]
frames8 = [udp,udp2,udp1]
frames9 = [pln,pln2,pln1]
frames0 = [benign,benign7,benign1,benign2]
benign = pd.concat(frames0)
g_c = pd.concat(frames)
g_j = pd.concat(frames1)
g_s = pd.concat(frames2)
g_t = pd.concat(frames3)
g_u = pd.concat(frames4)
ack = pd.concat(frames5)
sca = pd.concat(frames6)
syn = pd.concat(frames7)
udp = pd.concat(frames8)
pln = pd.concat(frames9)
benign=benign.sample(frac=0.25,replace=False)
g_c=g_c.sample(frac=0.25,replace=False)
g_j=g_j.sample(frac=0.5,replace=False)
g_s=g_s.sample(frac=0.5,replace=False)
g_t=g_t.sample(frac=0.15,replace=False)
g_u=g_u.sample(frac=0.15,replace=False)
ack = ack.sample(frac=0.15,replace=False)
sca = sca.sample(frac=0.15,replace=False)
syn = syn.sample(frac=0.15,replace=False)
udp = udp.sample(frac=0.15,replace=False)
pln = pln.sample(frac=0.15,replace=False)
benign['type']='benign'
g_c['type']='gafgyt_combo'
g_j['type']='gafgyt_junk'
g_s['type']='gafgyt_scan'
g_t['type']='gafgyt_tcp'
g_u['type']='gafgyt_udp'
ack['type']='mirai_ack'
sca['type']='mirai_scan'
syn['type']='mirai_syn'
udp['type']='mirai_udp'
pln['type']='mirai_udpplain'
data=pd.concat([benign,g_c,g_j,g_s,g_t,g_u,ack,sca,syn,udp,pln],
axis=0, sort=False, ignore_index=True)
# %% [code] {"jupyter":{"outputs_hidden":false}}
#how many instances of each class
data.groupby('type')['type'].count()
# %% [code] {"jupyter":{"outputs_hidden":false}}
#shuffle rows of dataframe
sampler=np.random.permutation(len(data))
data=data.take(sampler)
data.head(20)
# %% [code] {"jupyter":{"outputs_hidden":false}}
#dummy encode labels, store separately
labels_full=pd.get_dummies(data['type'], prefix='type')
labels_full.head()
# %% [code] {"jupyter":{"outputs_hidden":false}}
#drop labels from training dataset
data=data.drop(columns='type')
data.head()
# %% [code] {"jupyter":{"outputs_hidden":false}}
#standardize numerical columns
def standardize(df,col):
df[col]= (df[col]-df[col].mean())/df[col].std()
data_st=data.copy()
for i in (data_st.iloc[:,:-1].columns):
standardize (data_st,i)
data_st.head()
# %% [code] {"jupyter":{"outputs_hidden":false}}
#training data for the neural net
train_data_st=data_st.values
train_data_st
# %% [code] {"jupyter":{"outputs_hidden":false}}
#labels for training
labels=labels_full.values
labels
# %% [markdown]
# ### Keras model
# %% [code] {"jupyter":{"outputs_hidden":false}}
#import libraries
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import EarlyStopping
# test/train split 25% test
x_train_st, x_test_st, y_train_st, y_test_st = train_test_split(
train_data_st, labels, test_size=0.25, random_state=42)
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
# create and fit model
model = Sequential()
model.add(Dense(10, input_dim=train_data_st.shape[1], activation='relu'))
model.add(Dense(40, input_dim=train_data_st.shape[1], activation='relu'))
model.add(Dense(10, input_dim=train_data_st.shape[1], activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
model.add(Dense(labels.shape[1],activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3,
patience=5, verbose=1, mode='auto')
model.fit(x_train_st,y_train_st,validation_data=(x_test_st,y_test_st),
callbacks=[monitor, tensorboard_callback],verbose=2,epochs=50)
# %% [code] {"jupyter":{"outputs_hidden":false}}
# metrics
pred_st = model.predict(x_test_st)
pred_st = np.argmax(pred_st,axis=1)
y_eval_st = np.argmax(y_test_st,axis=1)
score_st = metrics.accuracy_score(y_eval_st, pred_st)
print("accuracy: {}".format(score_st))
# %% [code] {"jupyter":{"outputs_hidden":false}}
#second model
# model2 = Sequential()
# model2.add(Dense(32, input_dim=train_data_st.shape[1], activation='relu'))
# model2.add(Dense(72, input_dim=train_data_st.shape[1], activation='relu'))
# model2.add(Dense(32, input_dim=train_data_st.shape[1], activation='relu'))
# model2.add(Dense(1, kernel_initializer='normal'))
# model2.add(Dense(labels.shape[1],activation='softmax'))
# model2.compile(loss='categorical_crossentropy', optimizer='adam')
# monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3,
# patience=5, verbose=1, mode='auto')
# model2.fit(x_train_st,y_train_st,validation_data=(x_test_st,y_test_st),
# callbacks=[monitor, tensorboard_callback], verbose=2, epochs=100)
#
# # %% [code] {"jupyter":{"outputs_hidden":false}}
# # metrics
# pred_st1 = model2.predict(x_test_st)
# pred_st1 = np.argmax(pred_st1,axis=1)
# y_eval_st1 = np.argmax(y_test_st,axis=1)
# score_st1 = metrics.accuracy_score(y_eval_st1, pred_st1)
# print("accuracy: {}".format(score_st1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment