"""Contains utilities that concern TensorFlow and neural networks."""
import numpy as np
import tensorflow as tf
[docs]def fully_connected_params(input_size, output_size, dtype, weights_initializer, bias_initializer):
"""Creates weights and bias variables for a fully connected layer. These can be used in :meth:`fully_connected`.
Args:
input_size (:obj:`int`):
The size of the input layer.
output_size (:obj:`int`):
The output size. Number of units.
dtype (:obj:`tf.DType`):
The data type of the variables.
weights_initializer (:obj:`tf.keras.initializers.Initializer`):
An initializer for the weights.
bias_initializer (:obj:`tf.keras.initializers.Initializer`):
An initializer for the bias.
Returns:
:obj:`tuple` of (:obj:`tf.Variable`, :obj:`tf.Variable`):
A tuple of (`weights`, `bias`).
"""
weights = tf.get_variable('weights', (input_size, output_size), dtype, weights_initializer)
bias = tf.get_variable('bias', (output_size,), dtype, bias_initializer)
return weights, bias
# noinspection PyShadowingBuiltins
[docs]def fully_connected(input, params):
"""Creates a fully connected layer with bias (without activation).
Args:
input (:obj:`tf.Tensor`):
The input values.
params (:obj:`tuple` of (:obj:`tf.Variable`, :obj:`tf.Variable`)):
A tuple of (`weights`, `bias`). Probably obtained by :meth:`fully_connected_params`.
Returns:
:obj:`tf.Tensor`:
The output values.
"""
weights, bias = params
return input @ weights + bias
[docs]def conv2d_params(num_input_channels, num_filters, filter_extent, dtype, weights_initializer, bias_initializer):
"""Creates weights and bias variables for a 2D convolutional layer. These can be used in :meth:`conv2d`.
Args:
num_input_channels (:obj:`int`):
The size of the input layer.
num_filters (:obj:`int`):
The output size. Number of filters to apply.
filter_extent (:obj:`int`):
The spatial extent of the filters. Determines the size of the weights.
dtype (:obj:`tf.DType`):
The data type of the variables.
weights_initializer (:obj:`tf.keras.initializers.Initializer`):
An initializer for the weights.
bias_initializer (:obj:`tf.keras.initializers.Initializer`):
An initializer for the bias.
Returns:
:obj:`tuple` of (:obj:`tf.Variable`, :obj:`tf.Variable`):
A tuple of (`weights`, `bias`).
"""
weights = tf.get_variable(
'weights', (filter_extent, filter_extent, num_input_channels, num_filters), dtype, weights_initializer)
bias = tf.get_variable('bias', (num_filters,), dtype, bias_initializer)
return weights, bias
# noinspection PyShadowingBuiltins
[docs]def conv2d(input, params, stride, padding):
"""Creates a 2D convolutional layer with bias (without activation).
Args:
input (:obj:`tf.Tensor`):
The input values.
params (:obj:`tuple` of (:obj:`tf.Variable`, :obj:`tf.Variable`)):
A tuple of (`weights`, `bias`). Probably obtained by :meth:`conv2d_params`.
stride (:obj:`int`):
The stride of the convolution.
padding (:obj:`string`):
The padding of the convolution. One of `'VALID'`, `'SAME'`.
Returns:
:obj:`tf.Tensor`:
The output values.
"""
strides = (1, stride, stride, 1)
weights, bias = params
return tf.nn.conv2d(input, weights, strides, padding, data_format='NHWC') + bias
# noinspection PyShadowingBuiltins
[docs]def flatten(input):
"""Flattens inputs but keeps the batch size.
Args:
input (:obj:`tf.Tensor`):
Input values of shape [`batch_size`, `d_1`, ..., `d_n`].
Returns:
:obj:`tf.Tensor`:
Flattened input values of shape [`batch_size`, `d1` * ... * `d_n`].
"""
flat_size = np.prod(input.get_shape().as_list()[1:])
return tf.reshape(input, [-1, flat_size])
[docs]def linear_decay(start_value, end_value, step, total_steps, name=None):
"""Applies linear decay from `start_value` to `end_value`. The value at a specific step is computed as::
value = (start_value - end_value) * (1 - step / total_steps) + end_value
Args:
start_value (:obj:`tf.Tensor` or :obj:`float`):
The start value.
end_value (:obj:`tf.Tensor` or :obj:`float`):
The end value.
step (:obj:`tf.Tensor`):
The current step (e.g. global_step).
total_step (:obj:`int` or :obj:`tf.Tensor`):
The total number of steps. Steps to reach end_value.
name (:obj:`string`, optional):
A name for the operation.
Returns:
:obj:`tf.Tensor`:
The linear decayed value.
"""
return tf.train.polynomial_decay(
learning_rate=start_value, global_step=step, decay_steps=total_steps, end_learning_rate=end_value,
power=1., cycle=False, name=name)
[docs]class ClipGlobalNormOptimizer(tf.train.Optimizer):
"""A :obj:`tf.train.Optimizer` that wraps around another optimizer and minimizes the loss by clipping gradients
using the global norm (:meth:`tf.clip_by_global_norm`).
See Also:
* https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/clip_by_global_norm
* https://stackoverflow.com/questions/36498127/how-to-apply-gradient-clipping-in-tensorflow/43486487#43486487
"""
[docs] def __init__(self, optimizer, clip_norm, name=None):
"""
Args:
optimizer (:obj:`tf.train.Optimizer`):
An optimizer whose gradients will be clipped.
clip_norm (:obj:`tf.Tensor` or :obj:`float`):
Value for the global norm (passed to :meth:`tf.clip_by_global_norm`).
name (:obj:`string`, optional):
A name for this optimizer.
"""
super().__init__(use_locking=False, name='ClipGlobalNormOptimizer' if name is None else name)
self._optimizer = optimizer
self._clip_norm = clip_norm
[docs] def apply_gradients(self, grads_and_vars, global_step=None, name=None):
gradients, variables = zip(*grads_and_vars)
gradients, _ = tf.clip_by_global_norm(gradients, self._clip_norm)
optimize_op = self._optimizer.apply_gradients(zip(gradients, variables), global_step=global_step, name=name)
return optimize_op