Last active
February 16, 2021 22:56
-
-
Save islem-esi/d6914746615e93a9eeae972e422425e3 to your computer and use it in GitHub Desktop.
lstm for opseq
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#do a lot of imports | |
from keras.models import Sequential | |
from keras.layers import LSTM, Dense | |
import numpy as np | |
from os import listdir | |
import os.path | |
import json | |
from sklearn.preprocessing import OneHotEncoder | |
#names of assembly instructions families | |
labels = ["cdt", "udt", "sdt", "adt", "cmpdt", "cvt", "bai", "iai", | |
"dai", "fai", "fci", "sai", "li", "sri", "bii", "byi", | |
"cj", "uj", "int", "si", "io", "flg", "seg", "misc", "sr", | |
"rng", "arr", "pmi", "pci", "mmxt", "mmxc", "mmxa", | |
"mmxcmp", "mmxl", "mmxsr", "mmxsm", "sset", "ssea", | |
"ssecmp", "ssel", "ssesu", "ssecvt", "fdt", "ftrdt", "flc", | |
"tse", "ssebi", "vmx", "other"] | |
#encode labels into one hot vector | |
labels_array = np.array(labels).reshape(-1, 1) | |
hot_encoder = OneHotEncoder(sparse = False) | |
encoded_labels = hot_encoder.fit_transform(labels_array) | |
encode_dict = {} | |
for l,e in zip(labels,encoded_labels): | |
encode_dict[l] = e | |
#configs for training | |
data_dim = 49 #dimension of a vector | |
num_classes = 2 #2 classes | |
batch_size = 256 | |
lstm_units = 128 #number of units by layer | |
lstm_layers = 3 | |
stateful = False | |
epochs = 100 | |
#LSTM Builder | |
def create_lstm(): | |
model = Sequential() | |
layer1 = LSTM(lstm_units, return_sequences = True, input_shape = (timesteps, data_dim)) | |
model.add(layer1) | |
for _ in range(lstm_layers -2): | |
model.add(LSTM(lstm_units, return_sequences = True)) | |
model.add(LSTM(lstm_units)) | |
model.add(Dense(1, activation = 'sigmoid')) | |
return model | |
def model_compiler(model): | |
#you can definitely change those parameters | |
loss = 'binary_crossentropy' | |
optimizer = 'rmsprop' | |
metrics = ['binary_accuracy'] | |
model.compile(loss = loss, optimizer = optimizer, metrics = metrics) | |
model.summary() | |
def model_trainer(model, data, labels): | |
history = model.fit(data, labels, validation_split = 0.25, epochs = 50, batch_size = 1536) | |
return history, model | |
return model | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment