variational_autoencoder_bak.ipynb (Source)

Preamble

In [1]:
%matplotlib notebook
In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

from keras import backend as K

from keras.layers import (Input, InputLayer, Dense, Lambda, Layer, 
                          Add, Multiply)
from keras.models import Model, Sequential
from keras.datasets import mnist
Using TensorFlow backend.
In [3]:
import pandas as pd

from matplotlib.ticker import FormatStrFormatter
from keras.utils.vis_utils import model_to_dot, plot_model
from IPython.display import SVG

Notebook Configuration

In [4]:
np.set_printoptions(precision=2,
                    edgeitems=3,
                    linewidth=80,
                    suppress=True)
In [5]:
'TensorFlow version: ' + K.tf.__version__
Out[5]:
'TensorFlow version: 1.4.0'
Constant definitions
In [6]:
batch_size = 100
original_dim = 784
latent_dim = 2
intermediate_dim = 256
epochs = 50
epsilon_std = 1.0

Model specification

Encoder

Figure 1: Reparameterization using Keras Layers

In [7]:
z_mu = Input(shape=(latent_dim,), name='mu')
z_sigma = Input(shape=(latent_dim,), name='sigma')
eps = Input(shape=(latent_dim,), name='eps')
z_eps = Multiply(name='z_eps')([z_sigma, eps])
z = Add(name='z')([z_mu, z_eps])
In [8]:
m = Model(inputs=[eps, z_mu, z_sigma], outputs=z)
In [9]:
SVG(model_to_dot(m, show_shapes=False).create(prog='dot', format='svg'))
Out[9]:
G 139739431546272 sigma: InputLayer139739431604352 z_eps: Multiply139739431546272->139739431604352 139739431546776 eps: InputLayer139739431546776->139739431604352 139739431546328 mu: InputLayer139739431604576 z: Add139739431546328->139739431604576 139739431604352->139739431604576
In [10]:
plot_model(
    model=m, show_shapes=False,
    to_file='../../images/vae/reparameterization.svg'
)
In [11]:
plot_model(
    model=m, show_shapes=False,
    to_file='../../images/vae/reparameterization.svg'
)
Figure 2: Encoder architecture
In [12]:
x = Input(shape=(original_dim,), name='x')
h = Dense(intermediate_dim, activation='relu', name='encoder_hidden')(x)
z_mu = Dense(latent_dim, name='mu')(h)
z_log_var = Dense(latent_dim, name='log_var')(h)
z_sigma = Lambda(lambda t: K.exp(.5*t), name='sigma')(z_log_var)
In [13]:
eps = Input(shape=(latent_dim,), name='eps')
z_eps = Multiply(name='z_eps')([z_sigma, eps])
z = Add(name='z')([z_mu, z_eps])
In [14]:
encoder = Model(inputs=[x, eps], outputs=z)
In [15]:
SVG(model_to_dot(encoder, show_shapes=False)
    .create(prog='dot', format='svg'))
Out[15]:
G 139739430199872 x: InputLayer139739430199816 encoder_hidden: Dense139739430199872->139739430199816 139739430199648 log_var: Dense139739430199816->139739430199648 139739431418568 mu: Dense139739430199816->139739431418568 139739430140728 sigma: Lambda139739430199648->139739430140728 139739430193136 z_eps: Multiply139739430140728->139739430193136 139739430191568 eps: InputLayer139739430191568->139739430193136 139739430192072 z: Add139739431418568->139739430192072 139739430193136->139739430192072
In [16]:
plot_model(
    model=encoder, show_shapes=False,
    to_file='../../images/vae/encoder.svg'
)
In [17]:
plot_model(
    model=encoder, show_shapes=True,
    to_file='../../images/vae/encoder_shapes.svg'
)
Figure 3: Full Encoder architecture with auxiliary layers
In [18]:
class KLDivergenceLayer(Layer):

    """ Identity transform layer that adds KL divergence
    to the final model loss.
    """

    def __init__(self, *args, **kwargs):
        self.is_placeholder = True
        super(KLDivergenceLayer, self).__init__(*args, **kwargs)

    def call(self, inputs):

        mu, log_var = inputs

        kl_batch = - .5 * K.sum(1 + log_var -
                                K.square(mu) -
                                K.exp(log_var), axis=-1)

        self.add_loss(K.mean(kl_batch), inputs=inputs)

        return inputs
In [19]:
z_mu, z_log_var = KLDivergenceLayer(name='kl')([z_mu, z_log_var])
z_sigma = Lambda(lambda t: K.exp(.5*t), name='sigma')(z_log_var)
In [20]:
eps = Input(shape=(latent_dim,), name='eps')
z_eps = Multiply(name='sigma_eps')([z_sigma, eps])
z = Add(name='z')([z_mu, z_eps])
In [21]:
encoder = Model(inputs=[x, eps], outputs=z)
In [22]:
SVG(model_to_dot(encoder, show_shapes=False)
    .create(prog='dot', format='svg'))
Out[22]:
G 139739430199872 x: InputLayer139739430199816 encoder_hidden: Dense139739430199872->139739430199816 139739431418568 mu: Dense139739430199816->139739431418568 139739430199648 log_var: Dense139739430199816->139739430199648 139739429354576 kl: KLDivergenceLayer139739431418568->139739429354576 139739430199648->139739429354576 139739429353064 sigma: Lambda139739429354576->139739429353064 139739429354800 z: Add139739429354576->139739429354800 139739429354408 sigma_eps: Multiply139739429353064->139739429354408 139739429351496 eps: InputLayer139739429351496->139739429354408 139739429354408->139739429354800
In [23]:
plot_model(
    model=encoder, show_shapes=False,
    to_file='../../images/vae/encoder_full.svg'
)
In [24]:
plot_model(
    model=encoder, show_shapes=True,
    to_file='../../images/vae/encoder_full_shapes.svg'
)

Decoder

In [25]:
decoder = Sequential([
    Dense(intermediate_dim, input_dim=latent_dim, 
          activation='relu', name='decoder_hidden'),
    Dense(original_dim, activation='sigmoid', name='x_mean')
], name='decoder')
In [26]:
# equivalent to above. Writing InputLayer explicitly 
# to set layer name in architecture diagram 
decoder = Sequential([
    InputLayer(input_shape=(latent_dim,), name='z'),
    Dense(intermediate_dim, input_shape=(latent_dim,),
          activation='relu', name='decoder_hidden'),
    Dense(original_dim, activation='sigmoid', name='x_mean')
], name='decoder')
In [27]:
SVG(model_to_dot(decoder, show_shapes=False)
    .create(prog='dot', format='svg'))
Out[27]:
G 139739429114432 z: InputLayer139739429211888 decoder_hidden: Dense139739429114432->139739429211888 139739429260424 x_mean: Dense139739429211888->139739429260424
In [28]:
plot_model(
    model=decoder, show_shapes=False,
    to_file='../../images/vae/decoder.svg'
)
In [29]:
plot_model(
    model=decoder, show_shapes=True,
    to_file='../../images/vae/decoder_shapes.svg'
)
In [30]:
x_decoded = decoder(z)
In [31]:
# again, equivalent to above. writing out fully
# for final end-to-end vae architecture visualization;
# otherwise, sequential models just get chunked into
# single layer
h_decoded = Dense(intermediate_dim, 
                  activation='relu', 
                  name='decoder_hidden')(z)
x_decoded = Dense(original_dim, 
                  activation='sigmoid', 
                  name='x_mean')(h_decoded)
In [32]:
vae = Model(inputs=[x, eps], outputs=x_decoded)
In [33]:
SVG(model_to_dot(vae, show_shapes=True)
    .create(prog='dot', format='svg'))
Out[33]:
G 139739430199872 x: InputLayerinput:output:(None, 784)(None, 784)139739430199816 encoder_hidden: Denseinput:output:(None, 784)(None, 256)139739430199872->139739430199816 139739431418568 mu: Denseinput:output:(None, 256)(None, 2)139739430199816->139739431418568 139739430199648 log_var: Denseinput:output:(None, 256)(None, 2)139739430199816->139739430199648 139739429354576 kl: KLDivergenceLayerinput:output:[(None, 2), (None, 2)][(None, 2), (None, 2)]139739431418568->139739429354576 139739430199648->139739429354576 139739429353064 sigma: Lambdainput:output:(None, 2)(None, 2)139739429354576->139739429353064 139739429354800 z: Addinput:output:[(None, 2), (None, 2)](None, 2)139739429354576->139739429354800 139739429354408 sigma_eps: Multiplyinput:output:[(None, 2), (None, 2)](None, 2)139739429353064->139739429354408 139739429351496 eps: InputLayerinput:output:(None, 2)(None, 2)139739429351496->139739429354408 139739429354408->139739429354800 139739429143384 decoder_hidden: Denseinput:output:(None, 2)(None, 256)139739429354800->139739429143384 139739430966104 x_mean: Denseinput:output:(None, 256)(None, 784)139739429143384->139739430966104
In [34]:
plot_model(
    model=vae, show_shapes=False,
    to_file='../../images/vae/vae_full.svg'
)
In [35]:
plot_model(
    model=vae, show_shapes=True,
    to_file='../../images/vae/vae_full_shapes.svg'
)

Putting it all together

In [36]:
x = Input(shape=(original_dim,))
h = Dense(intermediate_dim, activation='relu')(x)

z_mu = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)

z_mu, z_log_var = KLDivergenceLayer()([z_mu, z_log_var])
z_sigma = Lambda(lambda t: K.exp(.5*t))(z_log_var)

eps = Input(tensor=K.random_normal(shape=(K.shape(x)[0], latent_dim)))
z_eps = Multiply()([z_sigma, eps])
z = Add()([z_mu, z_eps])

decoder = Sequential([
    Dense(intermediate_dim, input_dim=latent_dim, activation='relu'),
    Dense(original_dim, activation='sigmoid')
])

x_mean = decoder(z)
In [37]:
def nll(y_true, y_pred):
    """ Negative log likelihood. """

    # keras.losses.binary_crossentropy give the mean
    # over the last axis. we require the sum
    return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1)
In [38]:
vae = Model(inputs=[x, eps], outputs=x_mean)
vae.compile(optimizer='rmsprop', loss=nll)

Model fitting

In [39]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, original_dim) / 255.
x_test = x_test.reshape(-1, original_dim) / 255.
In [40]:
hist = vae.fit(
    x_train,
    x_train,
    shuffle=True,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(x_test, x_test)
)
Train on 60000 samples, validate on 10000 samples
Epoch 1/50
60000/60000 [==============================] - 3s 51us/step - loss: 190.3018 - val_loss: 171.8964
Epoch 2/50
60000/60000 [==============================] - 2s 37us/step - loss: 169.7807 - val_loss: 168.2034
Epoch 3/50
60000/60000 [==============================] - 3s 43us/step - loss: 166.4752 - val_loss: 165.1833
Epoch 4/50
60000/60000 [==============================] - 2s 37us/step - loss: 164.3577 - val_loss: 164.0862
Epoch 5/50
60000/60000 [==============================] - 2s 37us/step - loss: 162.8561 - val_loss: 162.7869
Epoch 6/50
60000/60000 [==============================] - 2s 38us/step - loss: 161.6364 - val_loss: 161.6133
Epoch 7/50
60000/60000 [==============================] - 2s 34us/step - loss: 160.5345 - val_loss: 160.3500
Epoch 8/50
60000/60000 [==============================] - 2s 35us/step - loss: 159.4476 - val_loss: 159.1735
Epoch 9/50
60000/60000 [==============================] - 2s 37us/step - loss: 158.4652 - val_loss: 158.3976
Epoch 10/50
60000/60000 [==============================] - 2s 35us/step - loss: 157.6303 - val_loss: 157.7174
Epoch 11/50
60000/60000 [==============================] - 2s 35us/step - loss: 156.9311 - val_loss: 157.1340
Epoch 12/50
60000/60000 [==============================] - 2s 35us/step - loss: 156.3077 - val_loss: 156.5334
Epoch 13/50
60000/60000 [==============================] - 2s 35us/step - loss: 155.7424 - val_loss: 156.0899
Epoch 14/50
60000/60000 [==============================] - 2s 35us/step - loss: 155.2605 - val_loss: 155.8402
Epoch 15/50
60000/60000 [==============================] - 2s 36us/step - loss: 154.8583 - val_loss: 155.7435
Epoch 16/50
60000/60000 [==============================] - 2s 38us/step - loss: 154.4981 - val_loss: 155.0737
Epoch 17/50
60000/60000 [==============================] - 2s 35us/step - loss: 154.1406 - val_loss: 154.8999
Epoch 18/50
60000/60000 [==============================] - 2s 37us/step - loss: 153.8294 - val_loss: 154.4372
Epoch 19/50
60000/60000 [==============================] - 2s 36us/step - loss: 153.5345 - val_loss: 154.7647
Epoch 20/50
60000/60000 [==============================] - 2s 36us/step - loss: 153.2651 - val_loss: 154.3442
Epoch 21/50
60000/60000 [==============================] - 2s 36us/step - loss: 152.9985 - val_loss: 154.0450
Epoch 22/50
60000/60000 [==============================] - 2s 35us/step - loss: 152.7820 - val_loss: 153.8597
Epoch 23/50
60000/60000 [==============================] - 2s 35us/step - loss: 152.5669 - val_loss: 153.3388
Epoch 24/50
60000/60000 [==============================] - 2s 35us/step - loss: 152.3693 - val_loss: 153.3492
Epoch 25/50
60000/60000 [==============================] - 2s 35us/step - loss: 152.1779 - val_loss: 154.4324
Epoch 26/50
60000/60000 [==============================] - 2s 35us/step - loss: 152.0114 - val_loss: 153.2328
Epoch 27/50
60000/60000 [==============================] - 2s 35us/step - loss: 151.8423 - val_loss: 153.2307
Epoch 28/50
60000/60000 [==============================] - 2s 34us/step - loss: 151.6743 - val_loss: 152.8942
Epoch 29/50
60000/60000 [==============================] - 2s 34us/step - loss: 151.5279 - val_loss: 153.3617
Epoch 30/50
60000/60000 [==============================] - 2s 35us/step - loss: 151.3726 - val_loss: 152.9144
Epoch 31/50
60000/60000 [==============================] - 2s 34us/step - loss: 151.2452 - val_loss: 153.0004
Epoch 32/50
60000/60000 [==============================] - 2s 34us/step - loss: 151.0975 - val_loss: 152.7193
Epoch 33/50
60000/60000 [==============================] - 2s 34us/step - loss: 150.9517 - val_loss: 152.4762
Epoch 34/50
60000/60000 [==============================] - 2s 34us/step - loss: 150.8338 - val_loss: 152.5368
Epoch 35/50
60000/60000 [==============================] - 2s 35us/step - loss: 150.7152 - val_loss: 152.2429
Epoch 36/50
60000/60000 [==============================] - 2s 34us/step - loss: 150.5991 - val_loss: 152.3446
Epoch 37/50
60000/60000 [==============================] - 2s 34us/step - loss: 150.4907 - val_loss: 152.1743
Epoch 38/50
60000/60000 [==============================] - 2s 34us/step - loss: 150.3642 - val_loss: 151.9590
Epoch 39/50
60000/60000 [==============================] - 2s 35us/step - loss: 150.2468 - val_loss: 152.5479
Epoch 40/50
60000/60000 [==============================] - 2s 35us/step - loss: 150.1564 - val_loss: 151.9927
Epoch 41/50
60000/60000 [==============================] - 2s 36us/step - loss: 150.0573 - val_loss: 151.9203
Epoch 42/50
60000/60000 [==============================] - 2s 35us/step - loss: 149.9820 - val_loss: 151.9821
Epoch 43/50
60000/60000 [==============================] - 2s 35us/step - loss: 149.8897 - val_loss: 152.4142
Epoch 44/50
60000/60000 [==============================] - 2s 35us/step - loss: 149.7837 - val_loss: 151.7826
Epoch 45/50
60000/60000 [==============================] - 2s 35us/step - loss: 149.6695 - val_loss: 151.7681
Epoch 46/50
60000/60000 [==============================] - 2s 35us/step - loss: 149.5891 - val_loss: 151.4949
Epoch 47/50
60000/60000 [==============================] - 2s 36us/step - loss: 149.5253 - val_loss: 151.7220
Epoch 48/50
60000/60000 [==============================] - 2s 36us/step - loss: 149.4207 - val_loss: 152.3142
Epoch 49/50
60000/60000 [==============================] - 2s 36us/step - loss: 149.3375 - val_loss: 151.6437
Epoch 50/50
60000/60000 [==============================] - 2s 36us/step - loss: 149.2422 - val_loss: 151.3537
In [41]:
golden_size = lambda width: (width, 2. * width / (1 + np.sqrt(5)))
In [42]:
fig, ax = plt.subplots(figsize=golden_size(6))

pd.DataFrame(hist.history).plot(ax=ax)

ax.set_ylabel('NELBO')
ax.set_xlabel('# epochs')

plt.savefig('../../images/vae/nelbo.svg', format='svg')
plt.show()
In [43]:
# deterministic test time encoder
test_encoder = Model(x, z_mu)

# display a 2D plot of the digit classes in the latent space
z_test = test_encoder.predict(x_test, batch_size=batch_size)
In [44]:
# display a 2D manifold of the digits
n = 15  # figure with 15x15 digits
digit_size = 28

# linearly spaced coordinates on the unit square were transformed
# through the inverse CDF (ppf) of the Gaussian to produce values
# of the latent variables z, since the prior of the latent space
# is Gaussian
u_grid = np.dstack(np.meshgrid(np.linspace(0.05, 0.95, n),
                               np.linspace(0.05, 0.95, n)))
z_grid = norm.ppf(u_grid)
x_decoded = decoder.predict(z_grid.reshape(n*n, 2))
x_decoded = x_decoded.reshape(n, n, digit_size, digit_size)
In [45]:
fig, ax = plt.subplots(figsize=(6, 6))

ax.imshow(np.block(list(map(list, x_decoded))), cmap='gray')

ax.set_xticks(np.arange(0, n*digit_size, digit_size) + .5 * digit_size)
ax.set_xticklabels(map('{:.2f}'.format, norm.ppf(np.linspace(0.05, 0.95, n))),
                    rotation=90)

ax.set_yticks(np.arange(0, n*digit_size, digit_size) + .5 * digit_size)
ax.set_yticklabels(map('{:.2f}'.format, -norm.ppf(np.linspace(0.05, 0.95, n))))

ax.set_xlabel('$z_1$')
ax.set_ylabel('$z_2$')

plt.savefig('../../images/vae/result_manifold.png')
plt.show()
In [46]:
fig, ax = plt.subplots(figsize=(6, 5))

cbar = ax.scatter(z_test[:, 0], z_test[:, 1], c=y_test,
                   alpha=.4, s=3**2, cmap='viridis')
fig.colorbar(cbar, ax=ax)

ax.set_xlim(-4.5, 4.5)
ax.set_ylim(-4.5, 4.5)

ax.set_xlabel('$z_1$')
ax.set_ylabel('$z_2$')

plt.savefig('../../images/vae/result_latent_space.png')
plt.show()
In [47]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 4.5))

ax1.imshow(np.block(list(map(list, x_decoded))), cmap='gray')

ax1.set_xticks(np.arange(0, n*digit_size, digit_size) + .5 * digit_size)
ax1.set_xticklabels(map('{:.2f}'.format, norm.ppf(np.linspace(0.05, 0.95, n))),
                    rotation=90)

ax1.set_yticks(np.arange(0, n*digit_size, digit_size) + .5 * digit_size)
ax1.set_yticklabels(map('{:.2f}'.format, -norm.ppf(np.linspace(0.05, 0.95, n))))

ax1.set_xlabel('$z_1$')
ax1.set_ylabel('$z_2$')

cbar = ax2.scatter(z_test[:, 0], z_test[:, 1], c=y_test,
                   alpha=.4, s=3**2, cmap='viridis')
fig.colorbar(cbar, ax=ax2)

ax2.set_xlim(-4.5, 4.5)
ax2.set_ylim(-4.5, 4.5)

ax2.set_xlabel('$z_1$')
ax2.set_ylabel('$z_2$')

plt.savefig('../../images/vae/result_combined.png')
plt.show()