Progress!

Monday August 27, 2018 at 11:27 am CST

A great many changes were made to the neural network over the past week. With a good deal of help from Igor, we got to a point where the network was reproducing the borders of the images.

Again, we start by defining methods to fetch data:

features_path = "./training_set/features/"
labels_path = "./training_set/labels/"

def load_data(img_dir):
    img_total = []
    for img in sorted(os.listdir(img_dir)):
        if img.endswith(".jpeg"):
            image = cv2.imread(os.path.join(img_dir, img))
            image = cv2.resize(image, (28, 28))
            img_total.append(image)
    return np.array(img_total)

def get_next_batch(batch_size, iteration):
    # shuffle training set
    indices = np.random.randint(0,3640, batch_size)
    # take the red channel from RGB images
    return label_imgs[indices,:,:,0], label_imgs[indices,:,:,0]

This time around, pixel values are scaled to be between 0 and 1:

feature_imgs = load_data(features_path) # (7280, 32, 42, 3)
label_imgs = load_data(labels_path) # (3640, 32, 42, 3)

# scale image values to [0, 1]
feature_imgs = feature_imgs / 255
label_imgs = label_imgs / 255

Below, we construct our network and print out the actual and predicted values:

tf.reset_default_graph()

batch_size = 64

X_in = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name='X')
Y    = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name='Y')
Y_flat = tf.reshape(Y, shape=[-1, 28 * 28])
keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name='keep_prob')

reshaped_dim = [-1, 7, 7, 1]

def lrelu(x, alpha=0.3):
    return tf.maximum(x, tf.multiply(x, alpha))

def encoder(X_in, keep_prob):
    activation = lrelu
    with tf.variable_scope("encoder", reuse=None):
        X = tf.reshape(X_in, shape=[-1, 28, 28, 1])
        x = tf.layers.conv2d(X, filters=64, kernel_size=4, strides=2, padding='same', activation=activation)
        x = tf.nn.dropout(x, keep_prob)
        x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=2, padding='same', activation=activation)
        x = tf.nn.dropout(x, keep_prob)
        x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=1, padding='same', activation=activation)
        x = tf.nn.dropout(x, keep_prob)
        x = tf.contrib.layers.flatten(x)
        return tf.layers.dense(x, units=64)


def decoder(sampled_z, keep_prob):
    with tf.variable_scope("decoder", reuse=None):
        x = tf.layers.dense(sampled_z, units=7*7, activation=lrelu)
        x = tf.reshape(x, reshaped_dim)
        x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=2, padding='same', activation=tf.nn.relu)
        x = tf.nn.dropout(x, keep_prob)
        x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu)
        x = tf.nn.dropout(x, keep_prob)
        x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu)

        x = tf.contrib.layers.flatten(x)
        x = tf.layers.dense(x, units=28*28, activation=tf.nn.sigmoid)
        img = tf.reshape(x, shape=[-1, 28, 28])
        return img

conv = encoder(X_in, keep_prob)
dec = decoder(conv, keep_prob)

unreshaped = tf.reshape(dec, [-1, 28*28])
img_loss = tf.reduce_sum(tf.squared_difference(unreshaped, Y_flat), 1)
loss = tf.reduce_mean(img_loss)
optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for i in range(30000):
    batch, _ = get_next_batch(batch_size, i)
    sess.run(optimizer, feed_dict = {X_in: batch, Y: batch, keep_prob: 0.8})

    if not i % 200:
        ls, d = sess.run([loss, dec], feed_dict = {X_in: batch, Y: batch, keep_prob: 1.0})

        plt.subplot(1,2,1)
        plt.imshow(np.reshape(batch[0], [28, 28]))
        plt.subplot(1,2,2)
        plt.imshow(d[0])
        plt.show()
        print(i, ls)

I’m going to create a dataset wherein all of the circles have the same color to see if that improves anything.