convolutional_variational_autoencoder_mnist-checkpoint.ipynb (Source)

Preamble

In [1]:
%matplotlib notebook
In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

from keras import backend as K

from keras.layers import (Input, Lambda, Layer, Reshape, Flatten, 
                          Add, Multiply)
from keras.layers import Dense, Conv2D, Conv2DTranspose
from keras.models import Model, Sequential
from keras.datasets import mnist
Using TensorFlow backend.
In [3]:
import pandas as pd

from matplotlib.ticker import FormatStrFormatter
from keras.utils.vis_utils import model_to_dot, plot_model
from IPython.display import SVG

Notebook Configuration

In [4]:
np.set_printoptions(precision=2,
                    edgeitems=3,
                    linewidth=80,
                    suppress=True)
In [5]:
'TensorFlow version: ' + K.tf.__version__
Out[5]:
'TensorFlow version: 1.4.0'

Dataset (MNIST)

In [6]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = np.expand_dims(x_train, axis=-1) / 255.
x_test = np.expand_dims(x_test, axis=-1) / 255.
Constant definitions
In [7]:
# input image dimensions
img_rows, img_cols, img_chns = x_train.shape[1:]

# number of convolutional filters to use
filters = 64

original_img_size = (img_rows, img_cols, img_chns)
upsample_shape = (img_rows // 2, img_cols // 2, filters)

epsilon_std = 1.0
latent_dim = 2
intermediate_dim = 128
batch_size = 100
epochs = 50

Model specification

Encoder

Convolutional Hidden Layers

In [8]:
encoder_conv_hidden_layers = Sequential([
    Conv2D(img_chns, input_shape=original_img_size,
           kernel_size=2, padding='same', 
           activation='relu'),
    Conv2D(filters, kernel_size=2, padding='same', 
           activation='relu', strides=(2, 2)),
    Conv2D(filters,
           kernel_size=3, padding='same', 
           activation='relu', strides=1),
    Conv2D(filters, kernel_size=3, padding='same', 
           activation='relu', strides=1),
    Flatten(),
    Dense(intermediate_dim, activation='relu')
], name='conv_hidden_layers')
In [9]:
SVG(model_to_dot(encoder_conv_hidden_layers, 
                 show_layer_names=False, 
                 show_shapes=True).create(prog='dot', format='svg'))
Out[9]:
G 140633619472056 InputLayerinput:output:(None, 28, 28, 1)(None, 28, 28, 1)140633619469704 Conv2Dinput:output:(None, 28, 28, 1)(None, 28, 28, 1)140633619472056->140633619469704 140633619470208 Conv2Dinput:output:(None, 28, 28, 1)(None, 14, 14, 64)140633619469704->140633619470208 140633619470600 Conv2Dinput:output:(None, 14, 14, 64)(None, 14, 14, 64)140633619470208->140633619470600 140633619470992 Conv2Dinput:output:(None, 14, 14, 64)(None, 14, 14, 64)140633619470600->140633619470992 140633619471384 Flatteninput:output:(None, 14, 14, 64)(None, 12544)140633619470992->140633619471384 140633619471552 Denseinput:output:(None, 12544)(None, 128)140633619471384->140633619471552
In [10]:
plot_model(model=encoder_conv_hidden_layers, 
           show_layer_names=False, 
           show_shapes=True,
           to_file='../../images/vae/encoder_conv_layers.svg')

Inference Network

In [11]:
class KLDivergenceLayer(Layer):

    """ Identity transform layer that adds KL divergence
    to the final model loss.
    """

    def __init__(self, *args, **kwargs):
        self.is_placeholder = True
        super(KLDivergenceLayer, self).__init__(*args, **kwargs)

    def call(self, inputs):

        mu, log_var = inputs

        kl_batch = - .5 * K.sum(1 + log_var -
                                K.square(mu) -
                                K.exp(log_var), axis=-1)

        self.add_loss(K.mean(kl_batch), inputs=inputs)

        return inputs
In [12]:
x = Input(shape=original_img_size, name='x')

h = encoder_conv_hidden_layers(x)

z_mu = Dense(latent_dim, name='mu')(h)
z_log_var = Dense(latent_dim, name='log_var')(h)
z_mu, z_log_var = KLDivergenceLayer(name='kl')([z_mu, z_log_var])
z_sigma = Lambda(lambda t: K.exp(.5*t), name='sigma')(z_log_var)

Reparameterization trick

In [13]:
eps = Input(name='epsilon', tensor=K.random_normal(shape=(K.shape(x)[0], latent_dim)))
z_eps = Multiply(name='z_eps')([z_sigma, eps])
z = Add(name='z')([z_mu, z_eps])

Finalizing the Encoder

In [14]:
encoder = Model(inputs=[x, eps], outputs=z, name='encoder')
SVG(model_to_dot(encoder, show_shapes=True)
    .create(prog='dot', format='svg'))
Out[14]:
G 140633617042960 x: InputLayerinput:output:(None, 28, 28, 1)(None, 28, 28, 1)140633619471944 conv_hidden_layers: Sequentialinput:output:(None, 28, 28, 1)(None, 128)140633617042960->140633619471944 140633617080504 mu: Denseinput:output:(None, 128)(None, 2)140633619471944->140633617080504 140633616926704 log_var: Denseinput:output:(None, 128)(None, 2)140633619471944->140633616926704 140633616988704 kl: KLDivergenceLayerinput:output:[(None, 2), (None, 2)][(None, 2), (None, 2)]140633617080504->140633616988704 140633616926704->140633616988704 140633617100584 sigma: Lambdainput:output:(None, 2)(None, 2)140633616988704->140633617100584 140633617168256 z: Addinput:output:[(None, 2), (None, 2)](None, 2)140633616988704->140633617168256 140633616842312 z_eps: Multiplyinput:output:[(None, 2), (None, 2)](None, 2)140633617100584->140633616842312 140633616798384 epsilon: InputLayerinput:output:(None, 2)(None, 2)140633616798384->140633616842312 140633616842312->140633617168256
In [15]:
plot_model(model=encoder, 
           show_layer_names=True, 
           show_shapes=True,
           to_file='../../images/vae/encoder_conv.svg')

Decoder

In [16]:
decoder = Sequential([
    Dense(intermediate_dim, input_dim=latent_dim, activation='relu'),
    Dense(np.prod(upsample_shape), activation='relu'),
    Reshape(upsample_shape),
    Conv2DTranspose(filters, kernel_size=3, padding='same', strides=1,
                    activation='relu'),
    Conv2DTranspose(filters, kernel_size=3, padding='same', strides=1,
                    activation='relu'),
    Conv2DTranspose(filters, kernel_size=3, padding='valid', strides=2, 
                    activation='relu'),
    Conv2D(img_chns, kernel_size=2, padding='valid', 
           activation='sigmoid')
], name='decoder')
In [17]:
SVG(model_to_dot(decoder, show_layer_names=False, show_shapes=True)
    .create(prog='dot', format='svg'))
Out[17]:
G 140633617386632 InputLayerinput:output:(None, 2)(None, 2)140633617386520 Denseinput:output:(None, 2)(None, 128)140633617386632->140633617386520 140633617383832 Denseinput:output:(None, 128)(None, 12544)140633617386520->140633617383832 140633617384168 Reshapeinput:output:(None, 12544)(None, 14, 14, 64)140633617383832->140633617384168 140633617384112 Conv2DTransposeinput:output:(None, 14, 14, 64)(None, 14, 14, 64)140633617384168->140633617384112 140633617384056 Conv2DTransposeinput:output:(None, 14, 14, 64)(None, 14, 14, 64)140633617384112->140633617384056 140633617385848 Conv2DTransposeinput:output:(None, 14, 14, 64)(None, 29, 29, 64)140633617384056->140633617385848 140633617386800 Conv2Dinput:output:(None, 29, 29, 64)(None, 28, 28, 1)140633617385848->140633617386800
In [18]:
plot_model(decoder, 
           show_layer_names=False, 
           show_shapes=True,
           to_file='../../images/vae/decoder_conv.svg')
In [19]:
x_pred = decoder(z)

Finalizing the VAE

In [20]:
def nll(y_true, y_pred):
    """ Negative log likelihood. """

    # keras.losses.binary_crossentropy give the mean
    # over the last axis. we require the sum
    return K.sum(K.binary_crossentropy(y_true, y_pred), axis=(1, 2, 3))
In [21]:
vae = Model(inputs=[x, eps], outputs=x_pred, name='vae')
vae.compile(optimizer='rmsprop', loss=nll)
In [22]:
SVG(model_to_dot(vae, show_layer_names=True, show_shapes=True)
    .create(prog='dot', format='svg'))
Out[22]:
G 140633617042960 x: InputLayerinput:output:(None, 28, 28, 1)(None, 28, 28, 1)140633619471944 conv_hidden_layers: Sequentialinput:output:(None, 28, 28, 1)(None, 128)140633617042960->140633619471944 140633617080504 mu: Denseinput:output:(None, 128)(None, 2)140633619471944->140633617080504 140633616926704 log_var: Denseinput:output:(None, 128)(None, 2)140633619471944->140633616926704 140633616988704 kl: KLDivergenceLayerinput:output:[(None, 2), (None, 2)][(None, 2), (None, 2)]140633617080504->140633616988704 140633616926704->140633616988704 140633617100584 sigma: Lambdainput:output:(None, 2)(None, 2)140633616988704->140633617100584 140633617168256 z: Addinput:output:[(None, 2), (None, 2)](None, 2)140633616988704->140633617168256 140633616842312 z_eps: Multiplyinput:output:[(None, 2), (None, 2)](None, 2)140633617100584->140633616842312 140633616798384 epsilon: InputLayerinput:output:(None, 2)(None, 2)140633616798384->140633616842312 140633616842312->140633617168256 140633617387136 decoder: Sequentialinput:output:(None, 2)(None, 28, 28, 1)140633617168256->140633617387136
In [23]:
plot_model(vae, show_layer_names=True, show_shapes=True,
           to_file='../../images/vae/vae_conv.svg')

Model fitting

In [24]:
hist = vae.fit(
    x_train,
    x_train,
    shuffle=True,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(x_test, x_test)
)
Train on 60000 samples, validate on 10000 samples
Epoch 1/50
60000/60000 [==============================] - 20s 326us/step - loss: 186.1521 - val_loss: 161.2163
Epoch 2/50
60000/60000 [==============================] - 19s 315us/step - loss: 158.8958 - val_loss: 154.6279
Epoch 3/50
60000/60000 [==============================] - 19s 317us/step - loss: 153.3961 - val_loss: 151.0090
Epoch 4/50
60000/60000 [==============================] - 19s 319us/step - loss: 150.3251 - val_loss: 150.8477
Epoch 5/50
60000/60000 [==============================] - 19s 319us/step - loss: 148.1933 - val_loss: 147.4588
Epoch 6/50
60000/60000 [==============================] - 19s 319us/step - loss: 146.6999 - val_loss: 144.8877
Epoch 7/50
60000/60000 [==============================] - 19s 320us/step - loss: 145.5844 - val_loss: 145.2916
Epoch 8/50
60000/60000 [==============================] - 19s 320us/step - loss: 144.7897 - val_loss: 145.2968
Epoch 9/50
60000/60000 [==============================] - 19s 320us/step - loss: 143.9959 - val_loss: 144.3361
Epoch 10/50
60000/60000 [==============================] - 19s 321us/step - loss: 143.4341 - val_loss: 143.1575
Epoch 11/50
60000/60000 [==============================] - 19s 320us/step - loss: 142.8752 - val_loss: 144.5963
Epoch 12/50
60000/60000 [==============================] - 19s 322us/step - loss: 142.3936 - val_loss: 143.1496
Epoch 13/50
60000/60000 [==============================] - 19s 322us/step - loss: 142.0428 - val_loss: 142.9749
Epoch 14/50
60000/60000 [==============================] - 19s 321us/step - loss: 141.6667 - val_loss: 142.6248
Epoch 15/50
60000/60000 [==============================] - 19s 324us/step - loss: 141.3294 - val_loss: 144.1269
Epoch 16/50
60000/60000 [==============================] - 19s 322us/step - loss: 141.0190 - val_loss: 142.7994
Epoch 17/50
60000/60000 [==============================] - 19s 324us/step - loss: 140.7385 - val_loss: 142.4357
Epoch 18/50
60000/60000 [==============================] - 19s 320us/step - loss: 140.5070 - val_loss: 142.9322
Epoch 19/50
60000/60000 [==============================] - 19s 319us/step - loss: 140.2851 - val_loss: 141.4447
Epoch 20/50
60000/60000 [==============================] - 19s 321us/step - loss: 140.0469 - val_loss: 142.3035
Epoch 21/50
60000/60000 [==============================] - 19s 321us/step - loss: 139.8623 - val_loss: 141.8777
Epoch 22/50
60000/60000 [==============================] - 19s 315us/step - loss: 139.6149 - val_loss: 143.5447
Epoch 23/50
60000/60000 [==============================] - 19s 313us/step - loss: 139.4788 - val_loss: 141.6344
Epoch 24/50
60000/60000 [==============================] - 19s 318us/step - loss: 139.2456 - val_loss: 141.2650
Epoch 25/50
60000/60000 [==============================] - 19s 325us/step - loss: 139.0720 - val_loss: 142.3290
Epoch 26/50
60000/60000 [==============================] - 19s 324us/step - loss: 138.9251 - val_loss: 142.9038
Epoch 27/50
60000/60000 [==============================] - 19s 323us/step - loss: 138.7974 - val_loss: 141.5682
Epoch 28/50
60000/60000 [==============================] - 19s 324us/step - loss: 138.6049 - val_loss: 142.6869
Epoch 29/50
60000/60000 [==============================] - 19s 322us/step - loss: 138.5010 - val_loss: 141.6946
Epoch 30/50
60000/60000 [==============================] - 19s 323us/step - loss: 138.3845 - val_loss: 142.1279
Epoch 31/50
60000/60000 [==============================] - 19s 324us/step - loss: 138.1869 - val_loss: 141.3341
Epoch 32/50
60000/60000 [==============================] - 19s 322us/step - loss: 138.0990 - val_loss: 142.1770
Epoch 33/50
60000/60000 [==============================] - 19s 323us/step - loss: 137.9107 - val_loss: 142.6077
Epoch 34/50
60000/60000 [==============================] - 19s 323us/step - loss: 137.8544 - val_loss: 141.0046
Epoch 35/50
60000/60000 [==============================] - 19s 323us/step - loss: 137.7781 - val_loss: 141.2873
Epoch 36/50
60000/60000 [==============================] - 20s 326us/step - loss: 137.6061 - val_loss: 141.2535
Epoch 37/50
60000/60000 [==============================] - 19s 316us/step - loss: 137.5119 - val_loss: 142.1105
Epoch 38/50
60000/60000 [==============================] - 19s 312us/step - loss: 137.4402 - val_loss: 141.7549
Epoch 39/50
60000/60000 [==============================] - 19s 311us/step - loss: 137.2969 - val_loss: 142.1960
Epoch 40/50
60000/60000 [==============================] - 19s 310us/step - loss: 137.2415 - val_loss: 142.8475
Epoch 41/50
60000/60000 [==============================] - 19s 310us/step - loss: 137.0451 - val_loss: 142.7166
Epoch 42/50
60000/60000 [==============================] - 19s 311us/step - loss: 137.0122 - val_loss: 141.9489
Epoch 43/50
60000/60000 [==============================] - 19s 309us/step - loss: 136.8721 - val_loss: 141.7996
Epoch 44/50
60000/60000 [==============================] - 19s 311us/step - loss: 136.8495 - val_loss: 141.9924
Epoch 45/50
60000/60000 [==============================] - 19s 311us/step - loss: 136.8023 - val_loss: 142.5958
Epoch 46/50
60000/60000 [==============================] - 19s 311us/step - loss: 136.6451 - val_loss: 141.2919
Epoch 47/50
60000/60000 [==============================] - 18s 307us/step - loss: 136.6442 - val_loss: 141.6761
Epoch 48/50
60000/60000 [==============================] - 19s 310us/step - loss: 136.5052 - val_loss: 141.7434
Epoch 49/50
60000/60000 [==============================] - 19s 311us/step - loss: 136.4566 - val_loss: 142.9795
Epoch 50/50
60000/60000 [==============================] - 19s 313us/step - loss: 136.2598 - val_loss: 141.7574

Model Evaluation

In [25]:
golden_size = lambda width: (width, 2. * width / (1 + np.sqrt(5)))

NELBO

In [26]:
fig, ax = plt.subplots(figsize=golden_size(6))

hist_df = pd.DataFrame(hist.history)
hist_df.plot(ax=ax)

ax.set_ylabel('NELBO')
ax.set_xlabel('# epochs')

ax.set_ylim(.99*hist_df[1:].values.min(), 
            1.1*hist_df[1:].values.max())

plt.savefig('../../images/vae/nelbo_conv.svg', format='svg')
plt.show()

Observed space manifold

In [27]:
# display a 2D manifold of the images
n = 15  # figure with 15x15 images
digit_size = 28
quantile_min = 0.01
quantile_max = 0.99

# linearly spaced coordinates on the unit square were transformed
# through the inverse CDF (ppf) of the Gaussian to produce values
# of the latent variables z, since the prior of the latent space
# is Gaussian

z1 = norm.ppf(np.linspace(quantile_min, quantile_max, n))
z2 = norm.ppf(np.linspace(quantile_max, quantile_min, n))
z_grid = np.dstack(np.meshgrid(z1, z2))
In [28]:
x_pred_grid = decoder.predict(z_grid.reshape(n*n, latent_dim)) \
                     .reshape(n, n, img_rows, img_cols)
In [35]:
fig, ax = plt.subplots(figsize=(5, 5))

ax.imshow(np.block(list(map(list, x_pred_grid))), cmap='gray')

ax.set_xticks(np.arange(0, n*img_rows, img_rows) + .5 * img_rows)
ax.set_xticklabels(map('{:.2f}'.format, z1), rotation=90)

ax.set_yticks(np.arange(0, n*img_cols, img_cols) + .5 * img_cols)
ax.set_yticklabels(map('{:.2f}'.format, z2))

ax.set_xlabel('$z_1$')
ax.set_ylabel('$z_2$')

plt.savefig('../../images/vae/result_manifold_conv.png')
plt.show()
In [30]:
# deterministic test time encoder
test_encoder = Model(x, z_mu)

# display a 2D plot of the digit classes in the latent space
z_test = test_encoder.predict(x_test, batch_size=batch_size)
In [36]:
fig, ax = plt.subplots(figsize=(6, 5))

cbar = ax.scatter(z_test[:, 0], z_test[:, 1], c=y_test,
                   alpha=.4, s=3**2, cmap='viridis')
fig.colorbar(cbar, ax=ax)

ax.set_xlim(2.*norm.ppf((quantile_min, quantile_max)))
ax.set_ylim(2.*norm.ppf((quantile_min, quantile_max)))

ax.set_xlabel('$z_1$')
ax.set_ylabel('$z_2$')

plt.savefig('../../images/vae/result_latent_space_conv.png')
plt.show()
In [37]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 4.5))

ax1.imshow(np.block(list(map(list, x_pred_grid))), cmap='gray')

ax1.set_xticks(np.arange(0, n*img_rows, img_rows) + .5 * img_rows)
ax1.set_xticklabels(map('{:.2f}'.format, z1), rotation=90)

ax1.set_yticks(np.arange(0, n*img_cols, img_cols) + .5 * img_cols)
ax1.set_yticklabels(map('{:.2f}'.format, z2))

ax.set_xlabel('$z_1$')
ax.set_ylabel('$z_2$')

cbar = ax2.scatter(z_test[:, 0], z_test[:, 1], c=y_test,
                   alpha=.4, s=3**2, cmap='viridis')
fig.colorbar(cbar, ax=ax2)

ax2.set_xlim(norm.ppf((quantile_min, quantile_max)))
ax2.set_ylim(norm.ppf((quantile_min, quantile_max)))

ax2.set_xlabel('$z_1$')
ax2.set_ylabel('$z_2$')

plt.savefig('../../images/vae/result_combined_conv.png')
plt.show()