[mlpack-git] master: Store the gradients to allow batch learning. (92d8ee1)

Thu Jan 8 16:14:45 EST 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/373a41e102ebe036b7c0d0b9e541c7b0ee85095a...92d8ee1d3f8c0b573133553be93556be8b1de810

>---------------------------------------------------------------

commit 92d8ee1d3f8c0b573133553be93556be8b1de810
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Thu Jan 8 22:14:39 2015 +0100

    Store the gradients to allow batch learning.


>---------------------------------------------------------------

92d8ee1d3f8c0b573133553be93556be8b1de810
 src/mlpack/methods/ann/ffnn.hpp | 154 ++++++++++++++++++++++++++++++++++------
 1 file changed, 133 insertions(+), 21 deletions(-)

diff --git a/src/mlpack/methods/ann/ffnn.hpp b/src/mlpack/methods/ann/ffnn.hpp
index 55f770c..6bb30c3 100644
--- a/src/mlpack/methods/ann/ffnn.hpp
+++ b/src/mlpack/methods/ann/ffnn.hpp
@@ -9,6 +9,8 @@
 
 #include <mlpack/core.hpp>
 
+#include <boost/ptr_container/ptr_vector.hpp>
+
 #include <mlpack/methods/ann/network_traits.hpp>
 #include <mlpack/methods/ann/performance_functions/cee_function.hpp>
 #include <mlpack/methods/ann/layer/layer_traits.hpp>
@@ -23,11 +25,13 @@ namespace ann /** Artificial Neural Network. */ {
  * be used to construct the network.
  * @tparam OutputLayerType The outputlayer type used to evaluate the network.
  * @tparam PerformanceFunction Performance strategy used to claculate the error.
+ * @tparam MaType of gradients. (arma::mat or arma::sp_mat).
  */
 template <
   typename ConnectionTypes,
   typename OutputLayerType,
-  class PerformanceFunction = CrossEntropyErrorFunction<>
+  class PerformanceFunction = CrossEntropyErrorFunction<>,
+  typename MatType = arma::mat
 >
 class FFNN
 {
@@ -40,7 +44,7 @@ class FFNN
      * @param outputLayer The outputlayer used to evaluate the network.
      */
     FFNN(const ConnectionTypes& network, OutputLayerType& outputLayer)
-        : network(network), outputLayer(outputLayer)
+        : network(network), outputLayer(outputLayer), err(0)
     {
       // Nothing to do here.
     }
@@ -76,7 +80,13 @@ class FFNN
     template <typename VecType>
     void FeedBackward(const VecType& error)
     {
+      // Initialize the gradient storage only once.
+      if (!gradients.size())
+        InitLayer(network);
+
+      gradientNum = 0;
       FeedBackward(network, error);
+      UpdateGradients(network);
     }
 
     /**
@@ -86,11 +96,18 @@ class FFNN
      * @tparam VecType Type of data (arma::colvec, arma::mat or arma::sp_mat).
      */
     template <typename VecType>
-    void ApplyGradients(const VecType& input)
+    void ApplyGradients(const VecType& /* unused */)
     {
-      ApplyGradients(network, input);
+      gradientNum = 0;
+      ApplyGradients(network);
+
+      // Reset the overall error.
+      err = 0;
     }
 
+    //! Get the error of the network.
+    double Error() const { return trainError; }
+
   private:
     /**
      * Helper function to reset the network by zeroing the layer activations.
@@ -154,8 +171,11 @@ class FFNN
 
       // Masures the network's performance with the specified performance
       // function.
-      err = PerformanceFunction::error(std::get<0>(
+      err += PerformanceFunction::error(std::get<0>(
           std::get<I - 1>(t)).OutputLayer().InputActivation(), target);
+
+      // Update the final training error.
+      trainError = err;
     }
 
     template<size_t I = 0, typename VecType, typename... Tp>
@@ -262,30 +282,50 @@ class FFNN
     }
 
     /**
-     * Helper function to update the weights using the specified optimizer and
-     * the given input.
+     * Helper function to iterate through all connection modules and to update
+     * the gradient storage.
      *
      * enable_if (SFINAE) is used to select between two template overloads of
      * the get function - one for when I is equal the size of the tuple of
      * connections, and one for the general case which peels off the first type
      * and recurses, as usual with variadic function templates.
      */
-    template<size_t I = 0, typename VecType, typename... Tp>
+    template<size_t I = 0, typename... Tp>
     typename std::enable_if<I == sizeof...(Tp), void>::type
-    ApplyGradients(std::tuple<Tp...>& /* unused */,
-                   const VecType& /* unused */) { }
+    UpdateGradients(std::tuple<Tp...>& /* unused */) { }
 
-    template<size_t I = 0, typename VecType, typename... Tp>
+    template<size_t I = 0, typename... Tp>
     typename std::enable_if<I < sizeof...(Tp), void>::type
-    ApplyGradients(std::tuple<Tp...>& t, const VecType& input)
+    UpdateGradients(std::tuple<Tp...>& t)
     {
       Gradients(std::get<I>(t));
-      ApplyGradients<I + 1, VecType, Tp...>(t, input);
+      UpdateGradients<I + 1, Tp...>(t);
+    }
+
+    /**
+     * Sum up all gradients and store the results in the gradients storage.
+     *
+     * enable_if (SFINAE) is used to iterate through the network connections.
+     * The general case peels off the first type and recurses, as usual with
+     * variadic function templates.
+     */
+    template<size_t I = 0, typename... Tp>
+    typename std::enable_if<I == sizeof...(Tp), void>::type
+    Gradients(std::tuple<Tp...>& /* unused */) { }
+
+    template<size_t I = 0, typename... Tp>
+    typename std::enable_if<I < sizeof...(Tp), void>::type
+    Gradients(std::tuple<Tp...>& t)
+    {
+      gradients[gradientNum++] += std::get<I>(t).OutputLayer().Delta() *
+          std::get<I>(t).InputLayer().InputActivation().t();
+
+      Gradients<I + 1, Tp...>(t);
     }
 
     /**
-     * Update the weights using the specified optimizer,the given input and the
-     * calculated delta.
+     * Helper function to update the weights using the specified optimizer and
+     * the given input.
      *
      * enable_if (SFINAE) is used to select between two template overloads of
      * the get function - one for when I is equal the size of the tuple of
@@ -294,17 +334,81 @@ class FFNN
      */
     template<size_t I = 0, typename... Tp>
     typename std::enable_if<I == sizeof...(Tp), void>::type
-    Gradients(std::tuple<Tp...>& /* unused */) { }
+    ApplyGradients(std::tuple<Tp...>& /* unused */) { }
 
     template<size_t I = 0, typename... Tp>
     typename std::enable_if<I < sizeof...(Tp), void>::type
-    Gradients(std::tuple<Tp...>& t)
+    ApplyGradients(std::tuple<Tp...>& t)
+    {
+      Apply(std::get<I>(t));
+      ApplyGradients<I + 1, Tp...>(t);
+    }
+
+    /**
+     * Update the weights using the gradients from the gradient store.
+     *
+     * enable_if (SFINAE) is used to iterate through the network connections.
+     * The general case peels off the first type and recurses, as usual with
+     * variadic function templates.
+     */
+    template<size_t I = 0, typename... Tp>
+    typename std::enable_if<I == sizeof...(Tp), void>::type
+    Apply(std::tuple<Tp...>& /* unused */) { }
+
+    template<size_t I = 0, typename... Tp>
+    typename std::enable_if<I < sizeof...(Tp), void>::type
+    Apply(std::tuple<Tp...>& t)
     {
       std::get<I>(t).Optimzer().UpdateWeights(std::get<I>(t).Weights(),
-          std::get<I>(t).OutputLayer().Delta() *
-          std::get<I>(t).InputLayer().InputActivation().t(), err);
+          gradients[gradientNum], err);
 
-      Gradients<I + 1, Tp...>(t);
+      // Reset the gradient storage.
+      gradients[gradientNum++].zeros();
+
+      Apply<I + 1, Tp...>(t);
+    }
+
+    /**
+     * Helper function to iterate through all connection modules and to build
+     * gradient storage.
+     *
+     * enable_if (SFINAE) is used to select between two template overloads of
+     * the get function - one for when I is equal the size of the tuple of
+     * connections, and one for the general case which peels off the first type
+     * and recurses, as usual with variadic function templates.
+     */
+    template<size_t I = 0, typename... Tp>
+    typename std::enable_if<I == sizeof...(Tp), void>::type
+    InitLayer(std::tuple<Tp...>& /* unused */) { }
+
+    template<size_t I = 0, typename... Tp>
+    typename std::enable_if<I < sizeof...(Tp), void>::type
+    InitLayer(std::tuple<Tp...>& t)
+    {
+      Layer(std::get<I>(t));
+      InitLayer<I + 1, Tp...>(t);
+    }
+
+    /**
+     * Iterate through all connections and build the the gradient storage.
+     *
+     * enable_if (SFINAE) is used to select between two template overloads of
+     * the get function - one for when I is equal the size of the tuple of
+     * connections, and one for the general case which peels off the first type
+     * and recurses, as usual with variadic function templates.
+     */
+    template<size_t I = 0, typename... Tp>
+    typename std::enable_if<I == sizeof...(Tp), void>::type
+    Layer(std::tuple<Tp...>& /* unused */) { }
+
+    template<size_t I = 0, typename... Tp>
+    typename std::enable_if<I < sizeof...(Tp), void>::type
+    Layer(std::tuple<Tp...>& t)
+    {
+      gradients.push_back(new MatType(std::get<I>(t).Weights().n_rows,
+          std::get<I>(t).Weights().n_cols));
+
+      Layer<I + 1, Tp...>(t);
     }
 
     //! The connection modules used to build the network.
@@ -315,6 +419,15 @@ class FFNN
 
     //! The current error of the network.
     double err;
+
+    //! The current training error of the network.
+    double trainError;
+
+    //! The gradient storage we are using to perform the feed backward pass.
+    boost::ptr_vector<MatType> gradients;
+
+    //! The index of the currently activate gradient.
+    size_t gradientNum;
 }; // class FFNN
 
 
@@ -336,4 +449,3 @@ class NetworkTraits<
 }; // namespace mlpack
 
 #endif
-