Created
August 8, 2017 10:50
-
-
Save kjanjua26/b46388bbde9ded5cf1f077a9f0dedc4f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Handling the imports | |
import sklearn | |
from sklearn.model_selection import train_test_split | |
import pandas | |
import seaborn as sb | |
import matplotlib as plt | |
import numpy as np | |
from sklearn.preprocessing import StandardScaler | |
import cv2 | |
from PIL import Image | |
from keras import backend as K | |
from keras.layers.convolutional import Conv2D, MaxPooling2D | |
from keras.layers import Input, Dense, Activation | |
from keras.layers import Reshape, Lambda | |
from keras.layers.merge import add, concatenate | |
from keras.models import Model | |
from keras.layers.recurrent import GRU | |
from keras.optimizers import SGD | |
from keras.utils.data_utils import get_file | |
from keras.preprocessing import image | |
import keras.callbacks | |
import editdistance | |
import datetime | |
read_file = pandas.read_csv("/home/kamranjanjua/ownKeras/data.csv") | |
print "Info: " | |
print read_file.info() | |
pandas.isnull(read_file) | |
#x = read_file.ix[:,0:10] | |
#y = read_file['gt'] | |
#y = np.asarray(y) | |
""" | |
y = np.ravel(read_file.type) | |
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=42) | |
print "X Train: ", x_train.shape | |
print "X Test: ", x_test.shape | |
print "Y Train: ", y_train.shape | |
print "Y Test: ", y_test.shape | |
""" | |
""" | |
in_data = read_file['path'] | |
in_data = np.asarray(in_data) | |
print in_data | |
print "Shape: ", in_data.shape | |
""" | |
print "\n" | |
x = [] | |
y = [] | |
print "AUNN" | |
print read_file | |
#Getting the shape of the images and then resizing those to a common height and width | |
for i in range(0,len(read_file)): | |
path = read_file['path'][i] | |
label = read_file['gt'][i] | |
path = path.strip('\n') | |
img = cv2.imread(path,0) | |
#Re-sizing the images | |
#height = 64, width = 128 | |
#res_img = cv2.resize(img, (128,64)) | |
#cv2.imwrite(i,res_img) | |
h,w = img.shape | |
x.append(img) | |
y.append(label) | |
size = img.size | |
""" | |
print "Height: ", h #Height | |
print "Width: ", w #Width | |
print "Channel: ", c #Channel | |
print "Size: ", size | |
print "\n" | |
""" | |
print "H: ", h | |
print "W: ", w | |
print "S: ", size | |
x = np.array(x).astype(np.float32) | |
y = np.array(y) | |
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=42) | |
x_train = np.array(x_train).astype(np.float32) | |
y_train = np.array(y_train) | |
x_train = np.array(x_train) | |
x_test = np.array(x_test) | |
y_test = np.array(y_test) | |
print "Printing the shapes. \n" | |
print "X_train shape: ", x_train.shape | |
print "Y_train shape: ", y_train.shape | |
print "X_test shape: ", x_test.shape | |
print "Y_test shape: ", y_test.shape | |
print "\n" | |
#Input Shape for CNN | |
def getShape(w,h): | |
input_shape = [w,h,1] | |
input_shape = np.asarray(input_shape) | |
input_shape = np.array(input_shape).astype(np.float32) | |
return input_shape | |
#print "Input_Shape: ", input_shape | |
print "Input_Shape: ", getShape(w,h) | |
""" | |
#Stacking the images | |
for i in read_file['path']: | |
i = i.strip('\n') | |
img = cv2.imread(i) | |
h,w,c = img.shape | |
input_shape = getShape(w,h,c) | |
stacked_list.append(input_shape) | |
stacked_list = np.array(input_shape).astype(np.float32) | |
print stacked_list | |
""" | |
#Neural Network Model | |
def next_train(self): | |
while 1: | |
ret = self.get_batch(self.cur_train_index, self.minibatch_size, train=True) | |
self.cur_train_index += self.minibatch_size | |
if self.cur_train_index >= self.val_split: | |
self.cur_train_index = self.cur_train_index % 32 | |
(self.X_text, self.Y_data, self.Y_len) = shuffle_mats_or_lists( | |
[self.X_text, self.Y_data, self.Y_len], self.val_split) | |
yield ret | |
def next_val(self): | |
while 1: | |
ret = self.get_batch(self.cur_val_index, self.minibatch_size, train=False) | |
self.cur_val_index += self.minibatch_size | |
if self.cur_val_index >= self.num_words: | |
self.cur_val_index = self.val_split + self.cur_val_index % 32 | |
yield ret | |
def on_train_begin(self, logs={}): | |
self.build_word_list(16000, 4, 1) | |
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h, | |
rotate=False, ud=False, multi_fonts=False) | |
def on_epoch_begin(self, epoch, logs={}): | |
# rebind the paint function to implement curriculum learning | |
if epoch >= 3 and epoch < 6: | |
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h, | |
rotate=False, ud=True, multi_fonts=False) | |
elif epoch >= 6 and epoch < 9: | |
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h, | |
rotate=False, ud=True, multi_fonts=True) | |
elif epoch >= 9: | |
self.paint_func = lambda text: paint_text(text, self.img_w, self.img_h, | |
rotate=True, ud=True, multi_fonts=True) | |
if epoch >= 21 and self.max_string_len < 12: | |
self.build_word_list(32000, 12, 0.5) | |
def decode_batch(test_func, word_batch): | |
out = test_func([word_batch])[0] | |
ret = [] | |
for j in range(out.shape[0]): | |
out_best = list(np.argmax(out[j, 2:], 1)) | |
out_best = [k for k, g in itertools.groupby(out_best)] | |
# 26 is space, 27 is CTC blank char | |
outstr = '' | |
for c in out_best: | |
if c >= 0 and c < 26: | |
outstr += chr(c + ord('a')) | |
elif c == 26: | |
outstr += ' ' | |
ret.append(outstr) | |
return ret | |
def get_batch(self, index, size, train): | |
# width and height are backwards from typical Keras convention | |
# because width is the time dimension when it gets fed into the RNN | |
if K.image_data_format() == 'channels_first': | |
X_data = np.ones([size, 1, self.img_w, self.img_h]) | |
else: | |
X_data = np.ones([size, self.img_w, self.img_h, 1]) | |
labels = np.ones([size, 8]) | |
input_length = np.zeros([size, 1]) | |
label_length = np.zeros([size, 1]) | |
source_str = [] | |
for i in range(0, size): | |
# Mix in some blank inputs. This seems to be important for | |
# achieving translational invariance | |
if train and i > size - 4: | |
if K.image_data_format() == 'channels_first': | |
X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[0, :, :].T | |
else: | |
X_data[i, 0:self.img_w, :, 0] = self.paint_func('',)[0, :, :].T | |
labels[i, 0] = self.blank_label | |
input_length[i] = self.img_w // self.downsample_factor - 2 | |
label_length[i] = 1 | |
source_str.append('') | |
else: | |
if K.image_data_format() == 'channels_first': | |
X_data[i, 0, 0:self.img_w, :] = self.paint_func(self.X_text[index + i])[0, :, :].T | |
else: | |
X_data[i, 0:self.img_w, :, 0] = self.paint_func(self.X_text[index + i])[0, :, :].T | |
labels[i, :] = self.Y_data[index + i] | |
input_length[i] = self.img_w // self.downsample_factor - 2 | |
label_length[i] = self.Y_len[index + i] | |
source_str.append(self.X_text[index + i]) | |
inputs = {'the_input': X_data, | |
'the_labels': labels, | |
'input_length': input_length, | |
'label_length': label_length, | |
'source_str': source_str # used for visualization only | |
} | |
outputs = {'ctc': np.zeros([size])} # dummy data for dummy loss function | |
return (inputs, outputs) | |
def ctc_lambda_func(args): | |
y_pred, labels, input_length, label_length = args | |
# the 2 is critical here since the first couple outputs of the RNN | |
# tend to be garbage: | |
y_pred = y_pred[:, 2:, :] | |
return K.ctc_batch_cost(labels, y_pred, input_length, label_length) | |
def train(run_name, start_epoch, stop_epoch, img_w): | |
# Input Parameters | |
img_h = h | |
words_per_epoch = 16000 | |
val_split = 0.2 | |
val_words = int(words_per_epoch * (val_split)) | |
val_split=words_per_epoch - val_words | |
input_length = np.zeros([size,1]) | |
label_length = np.zeros([size,1]) | |
# Network parameters | |
conv_filters = 16 | |
kernel_size = (3, 3) | |
pool_size = 2 | |
time_dense_size = 32 | |
rnn_size = 512 | |
if K.image_data_format() == 'channels_first': | |
input_shape = (1, img_w, img_h) | |
else: | |
input_shape = (img_w, img_h, 1) | |
""" | |
fdir = os.path.dirname(get_file('wordlists.tgz', | |
origin='http://www.mythic-ai.com/datasets/wordlists.tgz', untar=True)) | |
img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'), | |
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'), | |
minibatch_size=32, | |
img_w=img_w, | |
img_h=img_h, | |
downsample_factor=(pool_size ** 2), | |
val_split=words_per_epoch - val_words | |
) | |
""" | |
act = 'relu' | |
print "INPUT TO CONV" | |
print input_shape | |
input_data = Input(name='the_input', shape=input_shape, dtype='float32') | |
inner = Conv2D(conv_filters, kernel_size, padding='same', | |
activation=act, kernel_initializer='he_normal', | |
name='conv1')(input_data) | |
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) | |
inner = Conv2D(conv_filters, kernel_size, padding='same', | |
activation=act, kernel_initializer='he_normal', | |
name='conv2')(inner) | |
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) | |
conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters) | |
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) | |
# cuts down input size going into RNN: | |
inner = Dense(time_dense_size, activation=act, name='dense1')(inner) | |
# Two layers of bidirectional GRUs | |
#gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner) | |
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner) | |
gru1_merged = add([gru_1, gru_1b]) | |
gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) | |
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged) | |
# transforms RNN output to character activations: | |
inner = Dense(26, kernel_initializer='he_normal', | |
name='dense2')(concatenate([gru_2, gru_2b])) | |
y_pred = Activation('softmax', name='softmax')(inner) | |
Model(inputs=input_data, outputs=y_pred).summary() | |
#Give the maximum string length | |
labels = Input(name='the_labels', shape=[8], dtype='float32') | |
input_length = Input(name='input_length', shape=[1], dtype='int64') | |
label_length = Input(name='label_length', shape=[1], dtype='int64') | |
# Keras doesn't currently support loss funcs with extra parameters | |
# so CTC loss is implemented in a lambda layer | |
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) | |
# clipnorm seems to speeds up convergence | |
sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) | |
#model = Model(inputs=input_data, outputs=loss_out) | |
model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) | |
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss | |
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) | |
if start_epoch > 0: | |
weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) | |
model.load_weights(weight_file) | |
# captures output of softmax so we can decode the output during visualization | |
test_func = K.function([input_data], [y_pred]) | |
#viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) | |
#model.fit(generator=(x_train, y_train, epochs=stop_epoch, validation_data=None, validation_steps=val_words, initial_epoch=start_epoch) | |
model.fit(next_train(x_train), y_train, batch_size=7, epochs=20, verbose=1, validation_split=0.1, shuffle=True, initial_epoch=0) | |
""" | |
model.fit_generator(generator=img_gen.next_train(), steps_per_epoch=(words_per_epoch - val_words), | |
epochs=stop_epoch, validation_data=img_gen.next_val(), validation_steps=val_words, | |
callbacks=[viz_cb, img_gen], initial_epoch=start_epoch) | |
#score, evalute line | |
""" | |
if __name__ == '__main__': | |
run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S') | |
train(run_name, 0, 20, w) GRU seems to work as well, if not better than LSTM: | |
Is this working fine or the error is still present? that you posted on keras issues?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I want to run your awesome code.
would you have any plan for uploading your csv file?
thank you