[mlpack-git] master: Use the new optimizer interface. (e03cc28)

Tue Jun 16 14:50:46 EDT 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/9264f7544f7c4d93ff735f00f35b0f5287abf59d...7df836c2f5a2287cda82801ca20f4b4b410cf4e1

>---------------------------------------------------------------

commit e03cc2862d684a29086e9d60aebd3cbe3fa571aa
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Tue Jun 16 14:11:31 2015 +0200

    Use the new optimizer interface.


>---------------------------------------------------------------

e03cc2862d684a29086e9d60aebd3cbe3fa571aa
 src/mlpack/methods/ann/cnn.hpp  | 118 +---------------------------------------
 src/mlpack/methods/ann/ffnn.hpp |  71 +-----------------------
 src/mlpack/methods/ann/rnn.hpp  |  23 +-------
 3 files changed, 9 insertions(+), 203 deletions(-)

diff --git a/src/mlpack/methods/ann/cnn.hpp b/src/mlpack/methods/ann/cnn.hpp
index e80ae2f..03dea65 100644
--- a/src/mlpack/methods/ann/cnn.hpp
+++ b/src/mlpack/methods/ann/cnn.hpp
@@ -78,11 +78,6 @@ class CNN
     template <typename ErrorType>
     void FeedBackward(const ErrorType& error)
     {
-      // Initialize the gradient storage only once.
-      if (!gradients.size())
-        InitLayer(network);
-
-      gradientNum = 0;
       LayerBackward(network, error);
       UpdateGradients(network);
     }
@@ -93,7 +88,6 @@ class CNN
      */
     void ApplyGradients()
     {
-      gradientNum = 0;
       ApplyGradients(network);
 
       // Reset the overall error.
@@ -373,10 +367,7 @@ class CNN
       if (!ConnectionTraits<typename std::remove_reference<decltype(
           std::get<I>(t))>::type>::IsPoolingConnection)
       {
-        DataType gradient;
-        std::get<I>(t).Gradient(gradient);
-
-        gradients[gradientNum++] += gradient;
+        std::get<I>(t).Optimzer().Update();
       }
 
       Gradients<I + 1, Tp...>(t);
@@ -404,48 +395,6 @@ class CNN
     }
 
     /**
-     * Helper function to update the weights using the gradients from the
-     * gradient store.
-     *
-     * enable_if (SFINAE) is used to select between two template overloads of
-     * the get function - one for when I is equal the size of the tuple of
-     * connections, and one for the general case which peels off the first type
-     * and recurses, as usual with variadic function templates.
-     */
-    template<size_t I = 0, typename eT, typename... Tp>
-    void UpdateWeights(arma::Mat<eT>& weights, std::tuple<Tp...>& t)
-    {
-      std::get<I>(t).Optimzer().UpdateWeights(weights,
-          gradients[gradientNum].slice(0), trainError);
-    }
-
-    template<size_t I = 0, typename eT, typename... Tp>
-    void UpdateWeights(arma::Cube<eT>& weights, std::tuple<Tp...>& t)
-    {
-      if (gradientNum == std::get<I>(t).InputLayer().OutputMaps() != 1)
-      {
-        for (size_t i = 0, g = 0;
-            i < std::get<I>(t).OutputLayer().OutputMaps(); i++)
-        {
-          for (size_t j = i; j < weights.n_slices;
-              j+= std::get<I>(t).OutputLayer().OutputMaps(), g++)
-          {
-            std::get<I>(t).Optimzer().UpdateWeights(weights.slice(j),
-                gradients[gradientNum].slice(g), trainError);
-          }
-        }
-      }
-      else
-      {
-        for (size_t i = 0; i < weights.n_slices; i++)
-        {
-          std::get<I>(t).Optimzer().UpdateWeights(weights.slice(i),
-              gradients[gradientNum].slice(i), trainError);
-        }
-      }
-    }
-
-    /**
      * Update the weights using the gradients from the gradient store.
      *
      * enable_if (SFINAE) is used to iterate through the network connections.
@@ -460,17 +409,11 @@ class CNN
     typename std::enable_if<I < sizeof...(Tp), void>::type
     Apply(std::tuple<Tp...>& t)
     {
-      // Take a mean gradient step over the number of inputs.
-      if (seqNum > 1)
-        gradients[gradientNum] /= seqNum;
-
       if (!ConnectionTraits<typename std::remove_reference<decltype(
           std::get<I>(t))>::type>::IsPoolingConnection)
       {
-        UpdateWeights<I>(std::get<I>(t).Weights(), t);
-
-        // Reset the gradient storage.
-        gradients[gradientNum++].zeros();
+        std::get<I>(t).Optimzer().Optimize();
+        std::get<I>(t).Optimzer().Reset();
       }
 
       Apply<I + 1, Tp...>(t);
@@ -497,55 +440,6 @@ class CNN
       InitLayer<I + 1, Tp...>(t);
     }
 
-    /**
-     * Iterate through all connections and build the the gradient storage.
-     *
-     * enable_if (SFINAE) is used to select between two template overloads of
-     * the get function - one for when I is equal the size of the tuple of
-     * connections, and one for the general case which peels off the first type
-     * and recurses, as usual with variadic function templates.
-     */
-    template<size_t I = 0, typename... Tp>
-    typename std::enable_if<I == sizeof...(Tp), void>::type
-    Layer(std::tuple<Tp...>& /* unused */) { }
-
-    template<size_t I = 0, typename... Tp>
-    typename std::enable_if<I < sizeof...(Tp), void>::type
-    Layer(std::tuple<Tp...>& t)
-    {
-      if (!ConnectionTraits<typename std::remove_reference<decltype(
-          std::get<I>(t))>::type>::IsPoolingConnection)
-      {
-          gradients.push_back(new DataType(std::get<I>(t).Weights().n_rows,
-              std::get<I>(t).Weights().n_cols,
-              ElementCount(std::get<I>(t).Weights()), arma::fill::zeros));
-      }
-
-      Layer<I + 1, Tp...>(t);
-    }
-
-    /*
-     * Get the number of elements.
-     *
-     * @param data The reference data.
-     */
-    template<typename eT>
-    size_t ElementCount(const arma::Mat<eT>& /* unused */) const
-    {
-      return 1;
-    }
-
-    /*
-     * Get the number of elements.
-     *
-     * @param data The reference data.
-     */
-    template<typename eT>
-    size_t ElementCount(const arma::Cube<eT>& data) const
-    {
-      return data.n_slices;
-    }
-
     //! The connection modules used to build the network.
     ConnectionTypes network;
 
@@ -555,12 +449,6 @@ class CNN
     //! The current training error of the network.
     double trainError;
 
-    //! The gradient storage we are using to perform the feed backward pass.
-    boost::ptr_vector<DataType> gradients;
-
-    //! The index of the currently activate gradient.
-    size_t gradientNum;
-
     //! The number of the current input sequence.
     size_t seqNum;
 }; // class CNN
diff --git a/src/mlpack/methods/ann/ffnn.hpp b/src/mlpack/methods/ann/ffnn.hpp
index 9711078..20a7dea 100644
--- a/src/mlpack/methods/ann/ffnn.hpp
+++ b/src/mlpack/methods/ann/ffnn.hpp
@@ -78,11 +78,6 @@ class FFNN
     template <typename VecType>
     void FeedBackward(const VecType& error)
     {
-      // Initialize the gradient storage only once.
-      if (!gradients.size())
-        InitLayer(network);
-
-      gradientNum = 0;
       LayerBackward(network, error);
       UpdateGradients(network);
     }
@@ -93,7 +88,6 @@ class FFNN
      */
     void ApplyGradients()
     {
-      gradientNum = 0;
       ApplyGradients(network);
 
       // Reset the overall error.
@@ -366,10 +360,7 @@ class FFNN
     typename std::enable_if<I < sizeof...(Tp), void>::type
     Gradients(std::tuple<Tp...>& t)
     {
-      MatType gradient;
-      std::get<I>(t).Gradient(gradient);
-      gradients[gradientNum++] += gradient;
-
+      std::get<I>(t).Optimzer().Update();
       Gradients<I + 1, Tp...>(t);
     }
 
@@ -409,62 +400,12 @@ class FFNN
     typename std::enable_if<I < sizeof...(Tp), void>::type
     Apply(std::tuple<Tp...>& t)
     {
-      // Take a mean gradient step over the number of inputs.
-      if (seqNum > 1)
-        gradients[gradientNum] /= seqNum;
-
-      std::get<I>(t).Optimzer().UpdateWeights(std::get<I>(t).Weights(),
-          gradients[gradientNum], trainError);
-
-      // Reset the gradient storage.
-      gradients[gradientNum++].zeros();
+      std::get<I>(t).Optimzer().Optimize();
+      std::get<I>(t).Optimzer().Reset();
 
       Apply<I + 1, Tp...>(t);
     }
 
-    /**
-     * Helper function to iterate through all connection modules and to build
-     * gradient storage.
-     *
-     * enable_if (SFINAE) is used to select between two template overloads of
-     * the get function - one for when I is equal the size of the tuple of
-     * connections, and one for the general case which peels off the first type
-     * and recurses, as usual with variadic function templates.
-     */
-    template<size_t I = 0, typename... Tp>
-    typename std::enable_if<I == sizeof...(Tp), void>::type
-    InitLayer(std::tuple<Tp...>& /* unused */) { }
-
-    template<size_t I = 0, typename... Tp>
-    typename std::enable_if<I < sizeof...(Tp), void>::type
-    InitLayer(std::tuple<Tp...>& t)
-    {
-      Layer(std::get<I>(t));
-      InitLayer<I + 1, Tp...>(t);
-    }
-
-    /**
-     * Iterate through all connections and build the the gradient storage.
-     *
-     * enable_if (SFINAE) is used to select between two template overloads of
-     * the get function - one for when I is equal the size of the tuple of
-     * connections, and one for the general case which peels off the first type
-     * and recurses, as usual with variadic function templates.
-     */
-    template<size_t I = 0, typename... Tp>
-    typename std::enable_if<I == sizeof...(Tp), void>::type
-    Layer(std::tuple<Tp...>& /* unused */) { }
-
-    template<size_t I = 0, typename... Tp>
-    typename std::enable_if<I < sizeof...(Tp), void>::type
-    Layer(std::tuple<Tp...>& t)
-    {
-      gradients.push_back(new MatType(std::get<I>(t).Weights().n_rows,
-          std::get<I>(t).Weights().n_cols, arma::fill::zeros));
-
-      Layer<I + 1, Tp...>(t);
-    }
-
     //! The connection modules used to build the network.
     ConnectionTypes network;
 
@@ -474,12 +415,6 @@ class FFNN
     //! The current training error of the network.
     double trainError;
 
-    //! The gradient storage we are using to perform the feed backward pass.
-    boost::ptr_vector<MatType> gradients;
-
-    //! The index of the currently activate gradient.
-    size_t gradientNum;
-
     //! The number of the current input sequence.
     size_t seqNum;
 }; // class FFNN
diff --git a/src/mlpack/methods/ann/rnn.hpp b/src/mlpack/methods/ann/rnn.hpp
index 102fcc5..6240f43 100644
--- a/src/mlpack/methods/ann/rnn.hpp
+++ b/src/mlpack/methods/ann/rnn.hpp
@@ -149,7 +149,6 @@ class RNN
       // Iterate through the input sequence and perform the feed backward pass.
       for (seqNum = seqLen - 1; seqNum >= 0; seqNum--)
       {
-        gradientNum = 0;
         deltaNum = 0;
 
         // Perform the backward pass and update the gradient storage.
@@ -173,7 +172,6 @@ class RNN
      */
     void ApplyGradients()
     {
-      gradientNum = 0;
       ApplyGradients(network);
 
       // Reset the overall error.
@@ -548,10 +546,7 @@ class RNN
     typename std::enable_if<I < sizeof...(Tp), void>::type
     Gradients(std::tuple<Tp...>& t)
     {
-      MatType gradient;
-      std::get<I>(t).Gradient(gradient);
-      gradients[gradientNum++] += gradient;
-
+      std::get<I>(t).Optimzer().Update();
       Gradients<I + 1, Tp...>(t);
     }
 
@@ -593,11 +588,8 @@ class RNN
     typename std::enable_if<I < sizeof...(Tp), void>::type
     Apply(std::tuple<Tp...>& t)
     {
-      std::get<I>(t).Optimzer().UpdateWeights(std::get<I>(t).Weights(),
-          gradients[gradientNum], trainError);
-
-      // // Reset the gradient storage.
-      gradients[gradientNum++].zeros();
+      std::get<I>(t).Optimzer().Optimize();
+      std::get<I>(t).Optimzer().Reset();
 
       Apply<I + 1, Tp...>(t);
     }
@@ -678,9 +670,6 @@ class RNN
       activations.push_back(new MatType(
         std::get<I>(t).InputLayer().OutputSize(), input.n_elem));
 
-      gradients.push_back(new MatType(std::get<I>(t).Weights().n_rows,
-          std::get<I>(t).Weights().n_cols, arma::fill::zeros));
-
       Layer<I + 1, VecType, Tp...>(t, input, layer);
     }
 
@@ -788,18 +777,12 @@ class RNN
     //! The activation storage we are using to perform the feed backward pass.
     boost::ptr_vector<MatType> activations;
 
-    //! The gradient storage we are using to perform the feed backward pass.
-    boost::ptr_vector<MatType> gradients;
-
     //! The index of the current sequence number.
     size_t seqNum;
 
     //! The index of the currently activate layer.
     size_t layerNum;
 
-    //! The index of the currently activate gradient.
-    size_t gradientNum;
-
     //! The index of the currently activate delta.
     size_t deltaNum;