Progress!
Monday August 27, 2018 at 11:27 am CST
A great many changes were made to the neural network over the past week. With a good deal of help from Igor, we got to a point where the network was reproducing the borders of the images.
Again, we start by defining methods to fetch data:
features_path = "./training_set/features/"
labels_path = "./training_set/labels/"
def load_data(img_dir):
img_total = []
for img in sorted(os.listdir(img_dir)):
if img.endswith(".jpeg"):
image = cv2.imread(os.path.join(img_dir, img))
image = cv2.resize(image, (28, 28))
img_total.append(image)
return np.array(img_total)
def get_next_batch(batch_size, iteration):
# shuffle training set
indices = np.random.randint(0,3640, batch_size)
# take the red channel from RGB images
return label_imgs[indices,:,:,0], label_imgs[indices,:,:,0]
This time around, pixel values are scaled to be between 0 and 1:
feature_imgs = load_data(features_path) # (7280, 32, 42, 3)
label_imgs = load_data(labels_path) # (3640, 32, 42, 3)
# scale image values to [0, 1]
feature_imgs = feature_imgs / 255
label_imgs = label_imgs / 255
Below, we construct our network and print out the actual and predicted values:
tf.reset_default_graph()
batch_size = 64
X_in = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name='X')
Y = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name='Y')
Y_flat = tf.reshape(Y, shape=[-1, 28 * 28])
keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name='keep_prob')
reshaped_dim = [-1, 7, 7, 1]
def lrelu(x, alpha=0.3):
return tf.maximum(x, tf.multiply(x, alpha))
def encoder(X_in, keep_prob):
activation = lrelu
with tf.variable_scope("encoder", reuse=None):
X = tf.reshape(X_in, shape=[-1, 28, 28, 1])
x = tf.layers.conv2d(X, filters=64, kernel_size=4, strides=2, padding='same', activation=activation)
x = tf.nn.dropout(x, keep_prob)
x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=2, padding='same', activation=activation)
x = tf.nn.dropout(x, keep_prob)
x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=1, padding='same', activation=activation)
x = tf.nn.dropout(x, keep_prob)
x = tf.contrib.layers.flatten(x)
return tf.layers.dense(x, units=64)
def decoder(sampled_z, keep_prob):
with tf.variable_scope("decoder", reuse=None):
x = tf.layers.dense(sampled_z, units=7*7, activation=lrelu)
x = tf.reshape(x, reshaped_dim)
x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=2, padding='same', activation=tf.nn.relu)
x = tf.nn.dropout(x, keep_prob)
x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu)
x = tf.nn.dropout(x, keep_prob)
x = tf.layers.conv2d_transpose(x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu)
x = tf.contrib.layers.flatten(x)
x = tf.layers.dense(x, units=28*28, activation=tf.nn.sigmoid)
img = tf.reshape(x, shape=[-1, 28, 28])
return img
conv = encoder(X_in, keep_prob)
dec = decoder(conv, keep_prob)
unreshaped = tf.reshape(dec, [-1, 28*28])
img_loss = tf.reduce_sum(tf.squared_difference(unreshaped, Y_flat), 1)
loss = tf.reduce_mean(img_loss)
optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(30000):
batch, _ = get_next_batch(batch_size, i)
sess.run(optimizer, feed_dict = {X_in: batch, Y: batch, keep_prob: 0.8})
if not i % 200:
ls, d = sess.run([loss, dec], feed_dict = {X_in: batch, Y: batch, keep_prob: 1.0})
plt.subplot(1,2,1)
plt.imshow(np.reshape(batch[0], [28, 28]))
plt.subplot(1,2,2)
plt.imshow(d[0])
plt.show()
print(i, ls)
I’m going to create a dataset wherein all of the circles have the same color to see if that improves anything.