convolutional_variational_autoencoder_fashion_mnist-checkpoint.ipynb (Source)
Preamble¶
In [1]:
%matplotlib notebook
In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from keras import backend as K
from keras.layers import (Input, Lambda, Layer, Reshape, Flatten,
Add, Multiply)
from keras.layers import Dense, Conv2D, Conv2DTranspose
from keras.models import Model, Sequential
from keras.datasets import fashion_mnist
In [3]:
import pandas as pd
from matplotlib.ticker import FormatStrFormatter
from keras.utils.vis_utils import model_to_dot, plot_model
from IPython.display import SVG
Notebook Configuration¶
In [4]:
np.set_printoptions(precision=2,
edgeitems=3,
linewidth=80,
suppress=True)
In [5]:
'TensorFlow version: ' + K.tf.__version__
Out[5]:
Dataset (MNIST)¶
In [6]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = np.expand_dims(x_train, axis=-1) / 255.
x_test = np.expand_dims(x_test, axis=-1) / 255.
Constant definitions¶
In [7]:
# input image dimensions
img_rows, img_cols, img_chns = x_train.shape[1:]
# number of convolutional filters to use
filters = 64
original_img_size = (img_rows, img_cols, img_chns)
upsample_shape = (img_rows // 2, img_cols // 2, filters)
epsilon_std = 1.0
latent_dim = 2
intermediate_dim = 128
batch_size = 100
epochs = 30
Model specification¶
Encoder¶
Convolutional Hidden Layers¶
In [8]:
encoder_conv_hidden_layers = Sequential([
Conv2D(img_chns, input_shape=original_img_size,
kernel_size=2, padding='same',
activation='relu'),
Conv2D(filters, kernel_size=2, padding='same',
activation='relu', strides=(2, 2)),
Conv2D(filters,
kernel_size=3, padding='same',
activation='relu', strides=1),
Conv2D(filters, kernel_size=3, padding='same',
activation='relu', strides=1),
Flatten(),
Dense(intermediate_dim, activation='relu')
], name='conv_hidden_layers')
In [9]:
SVG(model_to_dot(encoder_conv_hidden_layers,
show_layer_names=False,
show_shapes=True).create(prog='dot', format='svg'))
Out[9]:
In [10]:
# plot_model(model=encoder_conv_hidden_layers,
# show_layer_names=False,
# show_shapes=True,
# to_file='../../images/vae/encoder_conv_layers.svg')
Inference Network¶
In [11]:
class KLDivergenceLayer(Layer):
""" Identity transform layer that adds KL divergence
to the final model loss.
"""
def __init__(self, *args, **kwargs):
self.is_placeholder = True
super(KLDivergenceLayer, self).__init__(*args, **kwargs)
def call(self, inputs):
mu, log_var = inputs
kl_batch = - .5 * K.sum(1 + log_var -
K.square(mu) -
K.exp(log_var), axis=-1)
self.add_loss(K.mean(kl_batch), inputs=inputs)
return inputs
In [12]:
x = Input(shape=original_img_size, name='x')
h = encoder_conv_hidden_layers(x)
z_mu = Dense(latent_dim, name='mu')(h)
z_log_var = Dense(latent_dim, name='log_var')(h)
z_mu, z_log_var = KLDivergenceLayer(name='kl')([z_mu, z_log_var])
z_sigma = Lambda(lambda t: K.exp(.5*t), name='sigma')(z_log_var)
Reparameterization trick¶
In [13]:
eps = Input(name='epsilon', tensor=K.random_normal(shape=(K.shape(x)[0], latent_dim)))
z_eps = Multiply(name='z_eps')([z_sigma, eps])
z = Add(name='z')([z_mu, z_eps])
Finalizing the Encoder¶
In [14]:
encoder = Model(inputs=[x, eps], outputs=z, name='encoder')
SVG(model_to_dot(encoder, show_shapes=True)
.create(prog='dot', format='svg'))
Out[14]:
In [15]:
# plot_model(model=encoder,
# show_layer_names=True,
# show_shapes=True,
# to_file='../../images/vae/encoder_conv.svg')
Decoder¶
In [16]:
decoder = Sequential([
Dense(intermediate_dim, input_dim=latent_dim, activation='relu'),
Dense(np.prod(upsample_shape), activation='relu'),
Reshape(upsample_shape),
Conv2DTranspose(filters, kernel_size=3, padding='same', strides=1,
activation='relu'),
Conv2DTranspose(filters, kernel_size=3, padding='same', strides=1,
activation='relu'),
Conv2DTranspose(filters, kernel_size=3, padding='valid', strides=2,
activation='relu'),
Conv2D(img_chns, kernel_size=2, padding='valid',
activation='sigmoid')
], name='decoder')
In [17]:
SVG(model_to_dot(decoder, show_layer_names=False, show_shapes=True)
.create(prog='dot', format='svg'))
Out[17]:
In [18]:
# plot_model(decoder,
# show_layer_names=False,
# show_shapes=True,
# to_file='../../images/vae/decoder_conv.svg')
In [19]:
x_pred = decoder(z)
Finalizing the VAE¶
In [20]:
def nll(y_true, y_pred):
""" Negative log likelihood. """
# keras.losses.binary_crossentropy give the mean
# over the last axis. we require the sum
return K.sum(K.binary_crossentropy(y_true, y_pred), axis=(1, 2, 3))
In [21]:
vae = Model(inputs=[x, eps], outputs=x_pred, name='vae')
vae.compile(optimizer='rmsprop', loss=nll)
In [22]:
SVG(model_to_dot(vae, show_layer_names=True, show_shapes=True)
.create(prog='dot', format='svg'))
Out[22]:
In [23]:
# plot_model(vae, show_layer_names=True, show_shapes=True,
# to_file='../../images/vae/vae_conv.svg')
Model fitting¶
In [24]:
hist = vae.fit(
x_train,
x_train,
shuffle=True,
epochs=epochs,
batch_size=batch_size,
validation_data=(x_test, x_test)
)
Model Evaluation¶
In [25]:
golden_size = lambda width: (width, 2. * width / (1 + np.sqrt(5)))
NELBO¶
In [26]:
fig, ax = plt.subplots(figsize=golden_size(6))
hist_df = pd.DataFrame(hist.history)
hist_df.plot(ax=ax)
ax.set_ylabel('NELBO')
ax.set_xlabel('# epochs')
plt.savefig('../../images/vae/nelbo_conv_fashion.svg', format='svg')
plt.show()
Observed space manifold¶
In [27]:
# display a 2D manifold of the images
n = 15 # figure with 15x15 images
digit_size = 28
quantile_min = 0.01
quantile_max = 0.99
# linearly spaced coordinates on the unit square were transformed
# through the inverse CDF (ppf) of the Gaussian to produce values
# of the latent variables z, since the prior of the latent space
# is Gaussian
z1 = norm.ppf(np.linspace(quantile_min, quantile_max, n))
z2 = norm.ppf(np.linspace(quantile_max, quantile_min, n))
z_grid = np.dstack(np.meshgrid(z1, z2))
In [28]:
x_pred_grid = decoder.predict(z_grid.reshape(n*n, latent_dim)) \
.reshape(n, n, img_rows, img_cols)
In [29]:
fig, ax = plt.subplots(figsize=(6, 6))
ax.imshow(np.block(list(map(list, x_pred_grid))), cmap='gray')
ax.set_xticks(np.arange(0, n*img_rows, img_rows) + .5 * img_rows)
ax.set_xticklabels(map('{:.2f}'.format, z1), rotation=90)
ax.set_yticks(np.arange(0, n*img_cols, img_cols) + .5 * img_cols)
ax.set_yticklabels(map('{:.2f}'.format, z2))
ax.set_xlabel('$z_1$')
ax.set_ylabel('$z_2$')
plt.savefig('../../images/vae/result_manifold_conv_fashion.png')
plt.show()
In [30]:
# deterministic test time encoder
test_encoder = Model(x, z_mu)
# display a 2D plot of the digit classes in the latent space
z_test = test_encoder.predict(x_test, batch_size=batch_size)
In [31]:
fig, ax = plt.subplots(figsize=(6, 5))
cbar = ax.scatter(z_test[:, 0], z_test[:, 1], c=y_test,
alpha=.4, s=3**2, cmap='viridis')
fig.colorbar(cbar, ax=ax)
ax.set_xlim(2.*norm.ppf((quantile_min, quantile_max)))
ax.set_ylim(2.*norm.ppf((quantile_min, quantile_max)))
ax.set_xlabel('$z_1$')
ax.set_ylabel('$z_2$')
plt.savefig('../../images/vae/result_latent_space_conv_fashion.png')
plt.show()
In [32]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 4.5))
ax1.imshow(np.block(list(map(list, x_pred_grid))), cmap='gray')
ax1.set_xticks(np.arange(0, n*img_rows, img_rows) + .5 * img_rows)
ax1.set_xticklabels(map('{:.2f}'.format, z1), rotation=90)
ax1.set_yticks(np.arange(0, n*img_cols, img_cols) + .5 * img_cols)
ax1.set_yticklabels(map('{:.2f}'.format, z2))
ax.set_xlabel('$z_1$')
ax.set_ylabel('$z_2$')
cbar = ax2.scatter(z_test[:, 0], z_test[:, 1], c=y_test,
alpha=.4, s=3**2, cmap='viridis')
fig.colorbar(cbar, ax=ax2)
ax2.set_xlim(norm.ppf((quantile_min, quantile_max)))
ax2.set_ylim(norm.ppf((quantile_min, quantile_max)))
ax2.set_xlabel('$z_1$')
ax2.set_ylabel('$z_2$')
plt.savefig('../../images/vae/result_combined_conv_fashion.png')
plt.show()