Source code for stylish.vgg

# :coding: utf-8

"""Training model computation module from a :term:`Vgg19` model.

The :term:`Vgg19` model pre-trained for image classification is used as a loss
network in order to define perceptual loss functions that measure perceptual
differences in content and style between images.

The loss network remains fixed during the training process.

.. seealso::

    Johnson et al. (2016). Perceptual losses for real-time style transfer and
    superresolution. `CoRR, abs/1603.08155
    <https://arxiv.org/abs/1603.08155>`_.

.. seealso::

    Simonyan et al. (2014). Very Deep Convolutional Networks for
    Large-Scale Image Recognition. `CoRR, abs/1409.1556
    <https://arxiv.org/abs/1409.1556>`_.

    And the corresponding `Vgg19 pre-trained model
    <http://www.robots.ox.ac.uk/~vgg/research/very_deep/>`_ in the
    :term:`MatConvNet` data format.

"""

import tensorflow as tf
import numpy as np
import scipy.io

import stylish.logging


# Mean pixels value from pre-trained Vgg19 model.
VGG19_MEAN = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))

#: List of layers used to extract style features.
STYLE_LAYERS = [
    "conv1_1/Relu",
    "conv2_1/Relu",
    "conv3_1/Relu",
    "conv4_1/Relu",
    "conv5_1/Relu"
]

#: Layer used to extract the content features.
CONTENT_LAYER = "conv4_2/Relu"


[docs]def extract_mapping(path): """Compute and return weights and biases mapping from :term:`Vgg19` model *path*. The mapping should be returned in the form of:: { "conv1_1": { "weight": numpy.ndarray([...]), "bias": numpy.ndarray([...]) }, "conv1_2": { "weight": numpy.ndarray([...]), "bias": numpy.ndarray([...]) }, ... } *path* should be the path to the :term:`Vgg19` pre-trained model in the :term:`MatConvNet` data format. .. seealso:: http://www.vlfeat.org/matconvnet/pretrained/ Raise :exc:`RuntimeError` if the model loaded is incorrect. """ logger = stylish.logging.Logger(__name__ + ".extract_mapping") # All layers and index that should be extracted from the Vgg19 model. vgg_layers = [ ("conv1_1", 0), ("conv1_2", 2), ("conv2_1", 5), ("conv2_2", 7), ("conv3_1", 10), ("conv3_2", 12), ("conv3_3", 14), ("conv3_4", 16), ("conv4_1", 19), ("conv4_2", 21), ("conv4_3", 23), ("conv4_4", 25), ("conv5_1", 28), ("conv5_2", 30), ("conv5_3", 32), ("conv5_4", 34) ] # Compute the mapping model. mapping = {} try: data = scipy.io.loadmat(path) layers = data["layers"] for name, index in vgg_layers: _name = layers[0][index][0][0][0][0] values = layers[0][index][0][0][2] if name != _name: raise RuntimeError( "Layer index '{}' should be called '{}'".format(index, name) ) mapping[name] = { "weight": values[0][0], "bias": values[0][1] } except Exception as error: raise RuntimeError("The VGG19 model is incorrect [{}]".format(error)) logger.info( "Extract weights and biases from Vgg19 pre-trained model: {}" .format(path) ) return mapping
[docs]def network(vgg_mapping, input_node): """Compute and return network from *mapping* with an *input_node*. *vgg_mapping* should gather all weight and bias matrices extracted from a pre-trained :term:`Vgg19` model (e.g. :func:`extract_mapping`). *input_node* should be a 3-D Tensor representing an image of undefined size with 3 channels (Red, Green and Blue). It will be the input of the graph model. """ layer = conv2d_layer("conv1_1", vgg_mapping, input_node) layer = conv2d_layer("conv1_2", vgg_mapping, layer) layer = pool_layer("max_pool1", layer) layer = conv2d_layer("conv2_1", vgg_mapping, layer) layer = conv2d_layer("conv2_2", vgg_mapping, layer) layer = pool_layer("max_pool2", layer) layer = conv2d_layer("conv3_1", vgg_mapping, layer) layer = conv2d_layer("conv3_2", vgg_mapping, layer) layer = conv2d_layer("conv3_3", vgg_mapping, layer) layer = conv2d_layer("conv3_4", vgg_mapping, layer) layer = pool_layer("max_pool3", layer) layer = conv2d_layer("conv4_1", vgg_mapping, layer) layer = conv2d_layer("conv4_2", vgg_mapping, layer) layer = conv2d_layer("conv4_3", vgg_mapping, layer) layer = conv2d_layer("conv4_4", vgg_mapping, layer) layer = pool_layer("max_pool4", layer) layer = conv2d_layer("conv5_1", vgg_mapping, layer) layer = conv2d_layer("conv5_2", vgg_mapping, layer) layer = conv2d_layer("conv5_3", vgg_mapping, layer) layer = conv2d_layer("conv5_4", vgg_mapping, layer) layer = pool_layer("max_pool5", layer) return layer
[docs]def conv2d_layer(name, vgg_mapping, input_node): """Add 2D convolution layer named *name* to *mapping*. The layer returned should contain: - A `2D convolution node <https://www.tensorflow.org/api_docs/python/tf/nn/conv2d>`_ - A `ReLU activation node <https://www.tensorflow.org/api_docs/python/tf/nn/relu>`_ *name* should be the name of the convolution layer. *vgg_mapping* should gather all weight and bias matrices extracted from a pre-trained :term:`Vgg19` model (e.g. :func:`extract_mapping`). *input_node* should be a Tensor that will be set as the input of the convolution layer. Raise :exc:`KeyError` if the weight and bias matrices cannot be extracted from *vgg_mapping*. """ logger = stylish.logging.Logger(__name__ + ".conv2d_layer") weight = vgg_mapping[name]["weight"] bias = vgg_mapping[name]["bias"] with tf.name_scope(name): layer = tf.nn.conv2d( input_node, filter=tf.constant(weight), strides=[1, 1, 1, 1], padding="SAME", ) layer = layer + tf.constant(np.reshape(bias, bias.size)) layer = tf.nn.relu(layer, name="Relu") logger.debug( "Conv-2D layer '{}' added with ReLU activation [shape: {}]" .format(name, layer.shape) ) return layer
[docs]def pool_layer(name, input_node): """Return max pooling layer named *name*. The layer returned should contain: - An `max pooling node <https://www.tensorflow.org/api_docs/python/tf/nn/max_pool>`_ *name* should be the name of the max layer. *input_node* should be a Tensor that will be set as the input of the max layer. """ logger = stylish.logging.Logger(__name__ + ".pool_layer") layer = tf.nn.max_pool( input_node, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME", name=name ) logger.debug( "Max Pool layer '{}' added [shape: {}]".format(name, layer.shape) ) return layer