Last active
December 13, 2016 11:35
-
-
Save thmavri/49d3f86e0bd609cdc68836e3c8e8ffa0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create an index of all the words (words2idx) | |
... | |
#create an index of all the labels (labels2idx) | |
... | |
#query to classify | |
q="hotel amsterdam wifi" | |
#labels to use "prop", "dest", "fac" | |
#this will contain all our training data | |
matrix=[] | |
#this array will contain all the indexes of the words | |
words=[] | |
#this array will contain all the indexes of the labels | |
labels=[] | |
#this will contain the arrays of indexes | |
sentence=[] | |
words.append(words2idx["hotel"]) | |
labels.append(labels2idx['proptype']) | |
words.append(words2idx["amsterdam"]) | |
labels.append(labels2idx['dest']) | |
words.append(words2idx["wifi"]) | |
labels.append(labels2idx['fac']) | |
sentence.append(words) | |
sentence.append(labels) | |
sentence.append(labels) | |
matrix.append(sentence) | |
... | |
#the RNN settings | |
s = { 'fold':3, # 5 folds 0,1,2,3,4 | |
'lr':0.0627142536696559, | |
'verbose':1, | |
'decay':False, # decay on the learning rate | |
'win':7, # number of words in the context window | |
'bs':9, # number of backprop through time steps | |
'nhidden':100, # number of hidden units | |
'seed':345, | |
'emb_dimension':100, # dimension of word embedding | |
'nepochs':50} | |
# instanciate the model | |
numpy.random.seed(s['seed']) | |
random.seed(s['seed']) | |
rnn = model( nh = s['nhidden'], | |
nc = nclasses, | |
ne = vocsize, | |
de = s['emb_dimension'], | |
cs = s['win'] ) | |
#separate in train, test, validation | |
... | |
#train | |
for i in xrange(nsentences): | |
train_lex_list=train_lex[i].tolist()[0] | |
cwords = contextwin(train_lex_list, s['win']) | |
#print cwords | |
words = map(lambda x: numpy.asarray(x).astype('int32'),\ | |
minibatch(cwords, s['bs'])) | |
labels = train_y[i] | |
cnt_w=0 | |
for word_batch , label_last_word in zip(words, labels): | |
#print word_batch, label_last_word | |
#cnt_w+=1 | |
#print cnt_w | |
rnn.train(word_batch, label_last_word, s['clr']) | |
rnn.normalize() | |
if s['verbose']: | |
print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic), | |
sys.stdout.flush() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment