Source code for actorcritic.nn

"""Contains utilities that concern TensorFlow and neural networks."""


import numpy as np
import tensorflow as tf


[docs]def fully_connected_params(input_size, output_size, dtype, weights_initializer, bias_initializer): """Creates weights and bias variables for a fully connected layer. These can be used in :meth:`fully_connected`. Args: input_size (:obj:`int`): The size of the input layer. output_size (:obj:`int`): The output size. Number of units. dtype (:obj:`tf.DType`): The data type of the variables. weights_initializer (:obj:`tf.keras.initializers.Initializer`): An initializer for the weights. bias_initializer (:obj:`tf.keras.initializers.Initializer`): An initializer for the bias. Returns: :obj:`tuple` of (:obj:`tf.Variable`, :obj:`tf.Variable`): A tuple of (`weights`, `bias`). """ weights = tf.get_variable('weights', (input_size, output_size), dtype, weights_initializer) bias = tf.get_variable('bias', (output_size,), dtype, bias_initializer) return weights, bias
# noinspection PyShadowingBuiltins
[docs]def fully_connected(input, params): """Creates a fully connected layer with bias (without activation). Args: input (:obj:`tf.Tensor`): The input values. params (:obj:`tuple` of (:obj:`tf.Variable`, :obj:`tf.Variable`)): A tuple of (`weights`, `bias`). Probably obtained by :meth:`fully_connected_params`. Returns: :obj:`tf.Tensor`: The output values. """ weights, bias = params return input @ weights + bias
[docs]def conv2d_params(num_input_channels, num_filters, filter_extent, dtype, weights_initializer, bias_initializer): """Creates weights and bias variables for a 2D convolutional layer. These can be used in :meth:`conv2d`. Args: num_input_channels (:obj:`int`): The size of the input layer. num_filters (:obj:`int`): The output size. Number of filters to apply. filter_extent (:obj:`int`): The spatial extent of the filters. Determines the size of the weights. dtype (:obj:`tf.DType`): The data type of the variables. weights_initializer (:obj:`tf.keras.initializers.Initializer`): An initializer for the weights. bias_initializer (:obj:`tf.keras.initializers.Initializer`): An initializer for the bias. Returns: :obj:`tuple` of (:obj:`tf.Variable`, :obj:`tf.Variable`): A tuple of (`weights`, `bias`). """ weights = tf.get_variable( 'weights', (filter_extent, filter_extent, num_input_channels, num_filters), dtype, weights_initializer) bias = tf.get_variable('bias', (num_filters,), dtype, bias_initializer) return weights, bias
# noinspection PyShadowingBuiltins
[docs]def conv2d(input, params, stride, padding): """Creates a 2D convolutional layer with bias (without activation). Args: input (:obj:`tf.Tensor`): The input values. params (:obj:`tuple` of (:obj:`tf.Variable`, :obj:`tf.Variable`)): A tuple of (`weights`, `bias`). Probably obtained by :meth:`conv2d_params`. stride (:obj:`int`): The stride of the convolution. padding (:obj:`string`): The padding of the convolution. One of `'VALID'`, `'SAME'`. Returns: :obj:`tf.Tensor`: The output values. """ strides = (1, stride, stride, 1) weights, bias = params return tf.nn.conv2d(input, weights, strides, padding, data_format='NHWC') + bias
# noinspection PyShadowingBuiltins
[docs]def flatten(input): """Flattens inputs but keeps the batch size. Args: input (:obj:`tf.Tensor`): Input values of shape [`batch_size`, `d_1`, ..., `d_n`]. Returns: :obj:`tf.Tensor`: Flattened input values of shape [`batch_size`, `d1` * ... * `d_n`]. """ flat_size = np.prod(input.get_shape().as_list()[1:]) return tf.reshape(input, [-1, flat_size])
[docs]def linear_decay(start_value, end_value, step, total_steps, name=None): """Applies linear decay from `start_value` to `end_value`. The value at a specific step is computed as:: value = (start_value - end_value) * (1 - step / total_steps) + end_value Args: start_value (:obj:`tf.Tensor` or :obj:`float`): The start value. end_value (:obj:`tf.Tensor` or :obj:`float`): The end value. step (:obj:`tf.Tensor`): The current step (e.g. global_step). total_step (:obj:`int` or :obj:`tf.Tensor`): The total number of steps. Steps to reach end_value. name (:obj:`string`, optional): A name for the operation. Returns: :obj:`tf.Tensor`: The linear decayed value. """ return tf.train.polynomial_decay( learning_rate=start_value, global_step=step, decay_steps=total_steps, end_learning_rate=end_value, power=1., cycle=False, name=name)
[docs]class ClipGlobalNormOptimizer(tf.train.Optimizer): """A :obj:`tf.train.Optimizer` that wraps around another optimizer and minimizes the loss by clipping gradients using the global norm (:meth:`tf.clip_by_global_norm`). See Also: * https://www.tensorflow.org/versions/r1.2/api_docs/python/tf/clip_by_global_norm * https://stackoverflow.com/questions/36498127/how-to-apply-gradient-clipping-in-tensorflow/43486487#43486487 """
[docs] def __init__(self, optimizer, clip_norm, name=None): """ Args: optimizer (:obj:`tf.train.Optimizer`): An optimizer whose gradients will be clipped. clip_norm (:obj:`tf.Tensor` or :obj:`float`): Value for the global norm (passed to :meth:`tf.clip_by_global_norm`). name (:obj:`string`, optional): A name for this optimizer. """ super().__init__(use_locking=False, name='ClipGlobalNormOptimizer' if name is None else name) self._optimizer = optimizer self._clip_norm = clip_norm
[docs] def apply_gradients(self, grads_and_vars, global_step=None, name=None): gradients, variables = zip(*grads_and_vars) gradients, _ = tf.clip_by_global_norm(gradients, self._clip_norm) optimize_op = self._optimizer.apply_gradients(zip(gradients, variables), global_step=global_step, name=name) return optimize_op