Last active
January 31, 2019 08:59
-
-
Save alesee/00e4f729201ee155ed587f2b9c44ab76 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from torch import * | |
import torch.nn.functional as F | |
import torch.nn as nn | |
class RNNModel(nn.Module): | |
""" | |
Neural Network Module with an embedding layer, a recurent module and an output linear layer | |
Arguments: | |
rnn_type(str) -- type of rnn module to use options are ['LSTM', 'GRU', 'RNN_TANH', 'RNN_RELU'] | |
input_size(int) -- size of the dictionary of embeddings | |
embz_size(int) -- the size of each embedding vector | |
hidden_size(int) -- the number of features in the hidden state | |
batch_size(int) -- the size of training batches | |
output_size(int) -- the number of output classes to be predicted | |
num_layers(int, optional) -- Number of recurrent layers. Default=1 | |
dropout(float, optional) -- dropout probabilty. Default=0 | |
bidirectional(bool, optional) -- If True, becomes a bidirectional RNN. Default=False | |
tie_weights(bool, optional) -- if True, ties the weights of the embedding and output layer. Default=False | |
Inputs: input | |
input of shape (seq_length, batch_size) -- tensor containing the features of the input sequence | |
Returns: output | |
output of shape (batch_size, output_size) -- tensor containing the sigmoid activation on the | |
output features h_t from the last layer of the rnn, | |
for the last time-step t. | |
""" | |
def __init__(self, rnn_type, input_size, embz_size, hidden_size, batch_size, output_size, | |
num_layers=1, dropout=0.5, bidirectional=True, tie_weights=False): | |
super().__init__() | |
if bidirectional: self.num_directions = 2 | |
else: self.num_directions = 1 | |
self.hidden_size, self.output_size, self.embz_size = hidden_size, output_size, embz_size | |
self.bidirectional, self.rnn_type, self.num_layers = bidirectional, rnn_type, num_layers | |
self.drop = nn.Dropout(dropout) | |
self.embedding_layer = nn.Embedding(input_size, embz_size) | |
self.output_layer = nn.Linear(hidden_size*self.num_directions, output_size) | |
self.init_hidden(batch_size) | |
if rnn_type in ['LSTM', 'GRU']: | |
self.rnn = getattr(nn, rnn_type)(embz_size, hidden_size, num_layers=num_layers, | |
dropout=dropout, bidirectional=bidirectional) | |
else: | |
try: | |
nonlinearity = {'RNN_TANH':'tanh', 'RNN_RELU':'relu'}[rnn_type] | |
except KeyError: | |
raise ValueError("""An invalid option for '--rnn_type' was supplied, | |
options are ['LSTM', 'GRU', 'RNN_TANH', 'RNN_RELU']""") | |
self.rnn = nn.RNN(embz_size, hidden_size, num_layers=num_layers, | |
dropout=dropout, bidirectional=bidirectional, nonlinearity=nonlinearity) | |
if tie_weights: | |
if hidden_size != embz_size: | |
raise ValueError("When using the tied flag, hidden size must be equal to embeddign size") | |
elif bidirectional: | |
raise ValueError("When using the tied flag, set bidirectional=False") | |
self.output_layer.weight = self.embedding_layer.weight | |
def init_emb_weights(self, vector_weight_matrix): | |
self.embedding_layer.weight.data.copy_(vector_weight_matrix) | |
def init_identity_weights(self): | |
if self.rnn_type == 'RNN_RELU': | |
self.rnn.weight_ih_l0.data.copy_(torch.eye(self.hidden_size, self.embz_size)) | |
self.rnn.weight_hh_l0.data.copy_(torch.eye(self.hidden_size, self.hidden_size)) | |
if self.bidirectional: | |
self.rnn.weight_ih_l0_reverse.data.copy_(torch.eye(self.hidden_size, self.embz_size)) | |
self.rnn.weight_hh_l0_reverse.data.copy_(torch.eye(self.hidden_size, self.hidden_size)) | |
else: | |
pass | |
def init_hidden(self, batch_size): | |
if self.rnn_type == 'LSTM': | |
self.hidden = (V(torch.zeros(self.num_layers*self.num_directions, batch_size, self.hidden_size)), | |
V(torch.zeros(self.num_layers*self.num_directions, batch_size, self.hidden_size))) | |
else: | |
self.hidden = V(torch.zeros(self.num_layers*self.num_directions, batch_size, self.hidden_size)) | |
def forward(self, seq): | |
batch_size = seq[0].size(0) | |
if self.hidden[0].size(1) != batch_size: | |
self.init_hidden(batch_size) | |
input_tensor = self.drop(self.embedding_layer(seq)) | |
output, hidden = self.rnn(input_tensor, self.hidden) | |
output = self.drop(self.output_layer(output)) | |
return F.sigmoid(output[-1, :, :]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Is 'V' in V(torch.zeros(self.num_layers*self.num_directions, batch_size, self.hidden_size)) meaning Variable?