[mlpack-git] master: Refactor dropout layer for new network API. (fd33623)

Sat Aug 29 08:23:46 EDT 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/ea45ace1ff744390a4c35183528eda881eda5c61...fd336238de224ed72fc23b84e1e2f02ae3c879d6

>---------------------------------------------------------------

commit fd336238de224ed72fc23b84e1e2f02ae3c879d6
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Fri Aug 28 15:48:47 2015 +0200

    Refactor dropout layer for new network API.


>---------------------------------------------------------------

fd336238de224ed72fc23b84e1e2f02ae3c879d6
 src/mlpack/methods/ann/layer/bias_layer.hpp    |   4 +-
 src/mlpack/methods/ann/layer/conv_layer.hpp    |   2 +-
 src/mlpack/methods/ann/layer/dropout_layer.hpp | 255 ++++++-------------------
 3 files changed, 60 insertions(+), 201 deletions(-)

diff --git a/src/mlpack/methods/ann/layer/bias_layer.hpp b/src/mlpack/methods/ann/layer/bias_layer.hpp
index db91120..f693fea 100644
--- a/src/mlpack/methods/ann/layer/bias_layer.hpp
+++ b/src/mlpack/methods/ann/layer/bias_layer.hpp
@@ -126,7 +126,7 @@ class BiasLayer
    * @param g The calculated gradient.
    */
   template<typename eT, typename GradientDataType>
-  void Gradient(arma::Cube<eT>& d, GradientDataType& g)
+  void Gradient(const arma::Cube<eT>& d, GradientDataType& g)
   {
     g = arma::Mat<eT>(weights.n_rows, weights.n_cols);
     for (size_t s = 0; s < d.n_slices; s++)
@@ -142,7 +142,7 @@ class BiasLayer
    * @param g The calculated gradient.
    */
   template<typename eT, typename GradientDataType>
-  void Gradient(arma::Mat<eT>& d, GradientDataType& g)
+  void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
   {
     g = d * bias;
   }
diff --git a/src/mlpack/methods/ann/layer/conv_layer.hpp b/src/mlpack/methods/ann/layer/conv_layer.hpp
index 7fac606..a6498c4 100644
--- a/src/mlpack/methods/ann/layer/conv_layer.hpp
+++ b/src/mlpack/methods/ann/layer/conv_layer.hpp
@@ -170,7 +170,7 @@ class ConvLayer
    * @param g The calculated gradient.
    */
   template<typename eT>
-  void Gradient(arma::Cube<eT>& d, arma::Cube<eT>& g)
+  void Gradient(const arma::Cube<eT>& d, arma::Cube<eT>& g)
   {
     g = arma::zeros<arma::Cube<eT> >(weights.n_rows, weights.n_cols,
         weights.n_slices);
diff --git a/src/mlpack/methods/ann/layer/dropout_layer.hpp b/src/mlpack/methods/ann/layer/dropout_layer.hpp
index a3be6b3..5b16436 100644
--- a/src/mlpack/methods/ann/layer/dropout_layer.hpp
+++ b/src/mlpack/methods/ann/layer/dropout_layer.hpp
@@ -3,7 +3,7 @@
  * @author Marcus Edel
  *
  * Definition of the DropoutLayer class, which implements a regularizer that
- * randomly sets units to zero. This prevents units from co-adapting too much.
+ * randomly sets units to zero. Preventing units from co-adapting.
  */
 #ifndef __MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP
 #define __MLPACK_METHODS_ANN_LAYER_DROPOUT_LAYER_HPP
@@ -37,87 +37,26 @@ namespace ann /** Artificial Neural Network. */ {
  * }
  * @endcode
  *
- * @tparam DataType Type of data (arma::colvec, arma::mat arma::sp_mat or
- *    arma::cube).
+ * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
+ * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
+ *         arma::sp_mat or arma::cube).
  */
 template <
-    typename DataType = arma::colvec
+    typename InputDataType = arma::mat,
+    typename OutputDataType = arma::mat
 >
 class DropoutLayer
 {
  public:
-  /**
-   * Create the DropoutLayer object using the specified parameter.
-   *
-   * @param layerSize The number of neurons.
-   * @param ratio The probability of setting a value to zero.
-   * @param rescale If true the input is rescaled when deterministic is False.
-   */
-  DropoutLayer(const size_t layerSize,
-               const double ratio = 0.5,
-               const bool rescale = true) :
-      inputActivations(arma::zeros<DataType>(layerSize)),
-      delta(arma::zeros<DataType>(layerSize)),
-      layerRows(layerSize),
-      layerCols(1),
-      layerSlices(1),
-      outputMaps(1),
-      ratio(ratio),
-      rescale(rescale)
-  {
-    // Nothing to do here.
-  }
 
   /**
-   * Create 2-dimensional DropoutLayer object using the specified rows and
-   * columns. In this case, DataType must be arma::mat or arma::sp_mat.
+   * Create the BaseLayer object using the specified number of units.
    *
-   * @param layerRows The number of rows of neurons.
-   * @param layerCols The number of columns of neurons.
-   * @param ratio The probability of setting a value to zero.
-   * @param rescale If true the input is rescaled when deterministic is False.
+   * @param outSize The number of output units.
    */
-  DropoutLayer(const size_t layerRows,
-               const size_t layerCols,
-               const double ratio = 0.5,
+  DropoutLayer(const double ratio = 0.5,
                const bool rescale = true) :
-      inputActivations(arma::zeros<DataType>(layerRows, layerCols)),
-      delta(arma::zeros<DataType>(layerRows, layerCols)),
-      layerRows(layerRows),
-      layerCols(layerCols),
-      layerSlices(1),
-      outputMaps(1),
-      ratio(ratio),
-      rescale(rescale)
-  {
-    // Nothing to do here.
-  }
-
-  /**
-   * Create n-dimensional DropoutLayer object using the specified rows and
-   * columns and number of slices. In this case, DataType must be arma::cube.
-   *
-   * @param layerRows The number of rows of neurons.
-   * @param layerCols The number of columns of neurons.
-   * @param layerCols The number of slices of neurons.
-   * @param layerCols The number of output maps.
-   * @param ratio The probability of setting a value to zero.
-   * @param rescale If true the input is rescaled when deterministic is False.
-   */
-  DropoutLayer(const size_t layerRows,
-               const size_t layerCols,
-               const size_t layerSlices,
-               const size_t outputMaps = 1,
-               const double ratio = 0.5,
-               const bool rescale = true) :
-      inputActivations(arma::zeros<DataType>(layerRows, layerCols,
-          layerSlices * outputMaps)),
-      delta(arma::zeros<DataType>(layerRows, layerCols,
-          layerSlices * outputMaps)),
-      layerRows(layerRows),
-      layerCols(layerCols),
-      layerSlices(layerSlices),
-      outputMaps(outputMaps),
       ratio(ratio),
       rescale(rescale)
   {
@@ -127,147 +66,61 @@ class DropoutLayer
   /**
    * Ordinary feed forward pass of the dropout layer.
    *
-   * @param inputActivation Input data used for evaluating the dropout layer.
-   * @param outputActivation Data to store the resulting output activation.
+   * @param input Input data used for evaluating the specified function.
+   * @param output Resulting output activation.
    */
   template<typename eT>
-  void FeedForward(const arma::Mat<eT>& inputActivation,
-                   arma::Mat<eT>& outputActivation)
+  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output)
   {
     // The dropout mask will not be multiplied in the deterministic mode
     // (during testing).
     if (deterministic)
     {
-      outputActivation = inputActivation;
+      output = input;
 
       if (rescale)
-        outputActivation *= scale;
+        output *= scale;
     }
     else
     {
       // Scale with input / (1 - ratio) and set values to zero with probability
       // ratio.
       scale = 1.0 / (1.0 - ratio);
-      mask = arma::randu<arma::Mat<eT> >(layerRows, layerCols);
+      mask = arma::randu<arma::Mat<eT> >(input.n_rows, input.n_cols);
       mask.transform( [&](double val) { return val > ratio; } );
-      outputActivation = inputActivation % mask * scale;
-    }
-  }
-
-  /**
-   * Ordinary feed forward pass of the dropout layer.
-   *
-   * @param inputActivation Input data used for evaluating the dropout layer.
-   * @param outputActivation Data to store the resulting output activation.
-   */
-  template<typename eT>
-  void FeedForward(const arma::Cube<eT>& inputActivation,
-                   arma::Cube<eT>& outputActivation)
-  {
-    // The dropout mask will not be multiplied in the deterministic mode
-    // (during testing).
-    if (deterministic)
-    {
-      outputActivation = inputActivation;
-
-      if (rescale)
-        outputActivation *= scale;
-    }
-    else
-    {
-      // Scale with input / (1 - ratio) and set values to zero with probability
-      // ratio.
-      scale = 1.0 / (1.0 - ratio);
-      mask = arma::randu<arma::Cube<eT> >(layerRows, layerCols,
-          layerSlices * outputMaps);
-      mask.transform( [&](double val) { return (val > ratio); } );
-      outputActivation = inputActivation % mask * scale;
+      output = input % mask * scale;
     }
   }
 
   /**
    * Ordinary feed backward pass of the dropout layer.
    *
-   * @param error The backpropagated error.
-   * @param delta The calculating delta using the delta from the previous layer.
-   */
-  void FeedBackward(const DataType& /* unused */,
-                    const DataType& error,
-                    DataType& delta)
-  {
-    delta = error % mask * scale;
-  }
-
-  /**
-   * Ordinary feed backward pass of the dropout layer.
-   *
-   * @param inputActivation Input data used to map the error from the previous
-   *    layer.
-   * @param error The backpropagated error.
-   * @param delta The calculating delta using the delta from the previous layer.
+   * @param input The propagated input activation.
+   * @param gy The backpropagated error.
+   * @param g The calculated gradient.
    */
   template<typename eT>
-  void FeedBackward(const arma::Cube<eT>& inputActivation,
-                    const arma::Mat<eT>& error,
-                    arma::Cube<eT>& delta)
+  void Backward(const arma::Mat<eT>& /* unused */,
+                const arma::Mat<eT>& gy,
+                arma::Mat<eT>& g)
   {
-    delta = delta % mask * scale;
-
-    // Generate a cube from the error matrix.
-    arma::Cube<eT> mappedError = arma::zeros<arma::cube>(inputActivation.n_rows,
-        inputActivation.n_cols, inputActivation.n_slices);
-
-    for (size_t s = 0, j = 0; s < mappedError.n_slices; s+= error.n_cols, j++)
-    {
-      for (size_t i = 0; i < error.n_cols; i++)
-      {
-        arma::Col<eT> temp = error.col(i).subvec(
-            j * inputActivation.n_rows * inputActivation.n_cols,
-            (j + 1) * inputActivation.n_rows * inputActivation.n_cols - 1);
-
-        mappedError.slice(s + i) = arma::Mat<eT>(temp.memptr(),
-            inputActivation.n_rows, inputActivation.n_cols);
-      }
-    }
-
-    delta = mappedError;
+    g = gy % mask * scale;
   }
 
-  //! Get the input activations.
-  DataType& InputActivation() const { return inputActivations; }
-  //! Modify the input activations.
-  DataType& InputActivation() { return inputActivations; }
+  //! Get the input parameter.
+  InputDataType& InputParameter() const {return inputParameter; }
+  //! Modify the input parameter.
+  InputDataType& InputParameter() { return inputParameter; }
 
-  //! Get the detla.
-  DataType& Delta() const { return delta; }
-  //! Modify the delta.
-  DataType& Delta() { return delta; }
+  //! Get the output parameter.
+  OutputDataType& OutputParameter() const {return outputParameter; }
+  //! Modify the output parameter.
+  OutputDataType& OutputParameter() { return outputParameter; }
 
-  //! Get input size.
-  size_t InputSize() const { return layerRows; }
+  //! Get the detla.
+  OutputDataType& Delta() const { return delta; }
   //! Modify the delta.
-  size_t& InputSize() { return layerRows; }
-
-  //! Get output size.
-  size_t OutputSize() const { return layerRows; }
-  //! Modify the output size.
-  size_t& OutputSize() { return layerRows; }
-
-  //! Get the number of layer rows.
-  size_t LayerRows() const { return layerRows; }
-  //! Modify the number of layer rows.
-  size_t& LayerRows() { return layerRows; }
-
-  //! Get the number of layer columns.
-  size_t LayerCols() const { return layerCols; }
-  //! Modify the number of layer columns.
-  size_t& LayerCols() { return layerCols; }
-
-  //! Get the number of layer slices.
-  size_t LayerSlices() const { return layerSlices; }
-
-  //! Get the number of output maps.
-  size_t OutputMaps() const { return outputMaps; }
+  OutputDataType& Delta() { return delta; }
 
   //! The value of the deterministic parameter.
   bool Deterministic() const {return deterministic; }
@@ -285,26 +138,17 @@ class DropoutLayer
   bool& Rescale() {return rescale; }
 
  private:
-  //! Locally-stored input activation object.
-  DataType inputActivations;
-
   //! Locally-stored delta object.
-  DataType delta;
+  OutputDataType delta;
 
-  //! Locally-stored mast object.
-  DataType mask;
-
-  //! Locally-stored number of layer rows.
-  size_t layerRows;
+  //! Locally-stored input parameter object.
+  InputDataType inputParameter;
 
-  //! Locally-stored number of layer cols.
-  size_t layerCols;
+  //! Locally-stored output parameter object.
+  OutputDataType outputParameter;
 
-  //! Locally-stored number of layer slices.
-  size_t layerSlices;
-
-  //! Locally-stored number of output maps.
-  size_t outputMaps;
+  //! Locally-stored mast object.
+  OutputDataType mask;
 
   //! The probability of setting a value to zero.
   double ratio;
@@ -319,6 +163,21 @@ class DropoutLayer
   bool rescale;
 }; // class DropoutLayer
 
+//! Layer traits for the bias layer.
+template<
+  typename InputDataType,
+  typename OutputDataType
+>
+class LayerTraits<DropoutLayer<InputDataType, OutputDataType> >
+{
+ public:
+  static const bool IsBinary = false;
+  static const bool IsOutputLayer = false;
+  static const bool IsBiasLayer = false;
+  static const bool IsLSTMLayer = false;
+  static const bool IsConnection = true;
+};
+
 }; // namespace ann
 }; // namespace mlpack