Created
March 15, 2017 14:30
-
-
Save kotaroito/fb7bed6caa490c9aa2e4fc5b6e52fe02 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[1]: | |
import os | |
import tensorflow as tf | |
import glob | |
import random | |
from numpy.random import randint | |
import time | |
import math | |
# In[2]: | |
train_files = [] | |
validation_files = [] | |
test_files = [] | |
for file in os.listdir('./tfr_train'): | |
path = os.path.join('./tfr_train', file) | |
train_files.append(path) | |
for file in os.listdir('./tfr_validation'): | |
path = os.path.join('./tfr_validation', file) | |
validation_files.append(path) | |
for file in os.listdir('./tfr_test'): | |
path = os.path.join('./tfr_test', file) | |
test_files.append(path) | |
print(len(test_files)) | |
print(len(validation_files)) | |
print(len(train_files)) | |
# In[3]: | |
IMAGE_SIZE = 138 | |
INPUT_SIZE = 128 | |
# In[4]: | |
def inputs(files, distortion=True, batch_params={'size': 10, 'min_after_dequeue': 20}): | |
fqueue = tf.train.string_input_producer(files, shuffle=True) | |
reader = tf.TFRecordReader() | |
key, value = reader.read(fqueue) | |
features = tf.parse_single_example(value, features={ | |
'label': tf.FixedLenFeature([], tf.int64), | |
'image_raw': tf.FixedLenFeature([], tf.string), | |
}) | |
label = tf.cast(features['label'], tf.int32) | |
image = tf.decode_raw(features['image_raw'], tf.int32) | |
image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3]) | |
image.set_shape([IMAGE_SIZE, IMAGE_SIZE, 3]) | |
image = tf.cast(image, tf.float32) | |
if distortion: | |
cropsize = random.randint(INPUT_SIZE, INPUT_SIZE + (IMAGE_SIZE - INPUT_SIZE) / 2) | |
framesize = INPUT_SIZE + (cropsize - INPUT_SIZE) * 2 | |
image = tf.image.resize_image_with_crop_or_pad(image, framesize, framesize) | |
image = tf.random_crop(image, [cropsize, cropsize, 3]) | |
image = tf.image.random_flip_left_right(image) | |
image = tf.image.random_flip_up_down(image) | |
one_hot_label = tf.one_hot(label, depth=99, dtype=tf.float32) | |
capacity = batch_params['min_after_dequeue'] + 3 * batch_params['size'] | |
images, labels = tf.train.shuffle_batch( | |
[image, one_hot_label], | |
batch_size= batch_params['size'], | |
capacity=capacity, | |
min_after_dequeue=batch_params['min_after_dequeue'] | |
) | |
images = tf.image.resize_images(images, [INPUT_SIZE, INPUT_SIZE]) | |
return images, labels | |
def test_inputs(files): | |
fqueue = tf.train.string_input_producer(files) | |
reader = tf.TFRecordReader() | |
key, value = reader.read(fqueue) | |
features = tf.parse_single_example(value, features={ | |
'id': tf.FixedLenFeature([], tf.int64), | |
'image_raw': tf.FixedLenFeature([], tf.string), | |
}) | |
id = tf.cast(features['id'], tf.int32) | |
image = tf.decode_raw(features['image_raw'], tf.int32) | |
image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3]) | |
image.set_shape([IMAGE_SIZE, IMAGE_SIZE, 3]) | |
image = tf.cast(image, tf.float32) | |
images, ids = tf.train.batch( | |
[image, id], | |
batch_size= len(files) | |
) | |
images = tf.image.resize_images(images, [INPUT_SIZE, INPUT_SIZE]) | |
return images, ids | |
# In[5]: | |
def conv2d(x, W): | |
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') | |
def max_pool_2x2(x): | |
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], | |
strides=[1, 2, 2, 1], padding='SAME') | |
def inference(images, keep_prob): | |
x = tf.image.rgb_to_grayscale(images) | |
x_image = tf.reshape(x, [-1,128, 128,1]) | |
with tf.variable_scope("conv1") as scope: | |
stddev = 2.0 / math.sqrt(5 * 5 * 1) | |
W_conv1 = tf.get_variable('weights', [5, 5, 1, 32], initializer=tf.random_normal_initializer(stddev=stddev)) | |
b_conv1 = tf.get_variable("biases", [32], initializer=tf.constant_initializer(0.0)) | |
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) | |
h_pool1 = max_pool_2x2(h_conv1) | |
with tf.variable_scope("conv2") as scope: | |
stddev = 2.0 / math.sqrt(5 * 5 * 32) | |
W_conv2 = tf.get_variable('weights', [5, 5, 32, 64], initializer=tf.random_normal_initializer(stddev=stddev)) | |
b_conv2 = tf.get_variable("biases", [64], initializer=tf.constant_initializer(0.0)) | |
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) | |
h_pool2 = max_pool_2x2(h_conv2) | |
with tf.variable_scope("fc1") as scope: | |
stddev = 2.0 / math.sqrt(32 * 32 * 64) | |
W_fc1 = tf.get_variable('weights', [32 * 32 * 64, 1024], initializer=tf.random_normal_initializer(stddev=stddev)) | |
b_fc1 = tf.get_variable("biases", [1024], initializer=tf.constant_initializer(0.0)) | |
h_pool2_flat = tf.reshape(h_pool2, [-1, 32*32*64]) | |
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) | |
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) | |
with tf.variable_scope("fc2") as scope: | |
stddev = 1.0 / math.sqrt(1024) | |
W_fc2 = tf.get_variable('weights', [1024, 99], initializer=tf.random_normal_initializer(stddev=0.1)) | |
b_fc2 = tf.get_variable("biases", [99], initializer=tf.constant_initializer(0.0)) | |
logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 | |
return logits | |
def loss(logits, labels, l2=True): | |
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)) | |
if l2: | |
variables = tf.trainable_variables() | |
l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in variables if 'bias' not in v.name ]) * 0.001 | |
loss = loss + l2_loss | |
return loss | |
# In[6]: | |
keep_prob = tf.placeholder(tf.float32) | |
images, labels = inputs(train_files, batch_params={'size': 64, 'min_after_dequeue': 1000}) | |
validation_images, validation_labels = inputs(validation_files, batch_params={'size': 100, 'min_after_dequeue': 1000}) | |
with tf.variable_scope('inference') as scope: | |
logits = inference(images, keep_prob) | |
scope.reuse_variables() | |
validation_logits = inference(validation_images, keep_prob) | |
cross_entropy = loss(logits, labels) | |
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) | |
correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(labels,1)) | |
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) | |
validation_cross_entropy = loss(validation_logits, validation_labels) | |
validation_correct_prediction = tf.equal(tf.argmax(validation_logits,1), tf.argmax(validation_labels,1)) | |
validation_accuracy = tf.reduce_mean(tf.cast(validation_correct_prediction, tf.float32)) | |
test_images, test_ids = test_inputs(test_files) | |
test_logits = inference(test_images, keep_prob) | |
test_prediction = tf.nn.softmax(test_logits) | |
# In[7]: | |
variables = tf.trainable_variables() | |
for v in variables: | |
print(v.name) | |
# In[ ]: | |
sess = tf.Session() | |
sess.run(tf.global_variables_initializer()) | |
coord = tf.train.Coordinator() | |
threads = tf.train.start_queue_runners(sess=sess, coord=coord) | |
for i in range(2000): | |
_, loss_value = sess.run([train_step, cross_entropy], feed_dict={keep_prob: 0.5}) | |
if i%100 == 0: | |
train_acc, validation_acc = sess.run([accuracy, validation_accuracy], feed_dict={keep_prob: 1.0}) | |
print("step %d, loss value %g, training accuracy %g, validation accuracy %g"%(i, loss_value, train_acc, validation_acc)) | |
# In[ ]: | |
for i in range(5000): | |
_, loss_value = sess.run([train_step, cross_entropy], feed_dict={keep_prob: 0.5}) | |
if i%100 == 0: | |
train_acc, validation_acc = sess.run([accuracy, validation_accuracy], feed_dict={keep_prob: 1.0}) | |
print("step %d, loss value %g, training accuracy %g, validation accuracy %g"%(i, loss_value, train_acc, validation_acc)) | |
# In[11]: | |
res_pred, res_ids = sess.run([test_prediction, test_ids], feed_dict={keep_prob: 1.0}) | |
print(res_pred.shape) | |
print(res_ids.shape) | |
# In[12]: | |
import pandas as pd | |
import numpy as np | |
from sklearn import preprocessing | |
df_train = pd.read_csv('train.csv', index_col='id') | |
df_test = pd.read_csv('test.csv', index_col='id') | |
labels = df_train['species'].values | |
le = preprocessing.LabelEncoder() | |
le.fit(labels) | |
classes = le.classes_ | |
df_sub = pd.DataFrame(res_pred, index=res_ids, columns=classes) | |
df_sub.sort_index(inplace=True) | |
print(df_sub.head()) | |
df_sub.to_csv('submission.csv', index='id') | |
# In[ ]: | |
coord.request_stop() | |
coord.join(threads) | |
sess.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment