[mlpack-git] master: Take a mean gradient step over the batchsize. (a2db22d)

Tue Jan 20 04:18:25 EST 2015

Repository : https://github.com/mlpack/mlpack

On branch  : master
Link       : https://github.com/mlpack/mlpack/compare/6404effcf41eb976fdf9d45d0903b765c0e04dd0...a2db22d03d29cd2362d4e3a50640b840cb9c59a1

>---------------------------------------------------------------

commit a2db22d03d29cd2362d4e3a50640b840cb9c59a1
Author: Marcus Edel <marcus.edel at fu-berlin.de>
Date:   Tue Jan 20 10:18:17 2015 +0100

    Take a mean gradient step over the batchsize.


>---------------------------------------------------------------

a2db22d03d29cd2362d4e3a50640b840cb9c59a1
 src/mlpack/methods/ann/ffnn.hpp | 98 +++++++++++++++++++++++++++++------------
 1 file changed, 71 insertions(+), 27 deletions(-)

diff --git a/src/mlpack/methods/ann/ffnn.hpp b/src/mlpack/methods/ann/ffnn.hpp
index 56c35df..4b5e951 100644
--- a/src/mlpack/methods/ann/ffnn.hpp
+++ b/src/mlpack/methods/ann/ffnn.hpp
@@ -65,10 +65,12 @@ class FFNN
                      VecType& error)
     {
       ResetActivations(network);
-      std::get<0>(
-            std::get<0>(network)).InputLayer().InputActivation() = input;
+      seqNum++;
 
-      FeedForward(network, target, error);
+      std::get<0>(std::get<0>(network)).InputLayer().InputActivation() = input;
+
+      FeedForward(network);
+      OutputError(network, target, error);
     }
 
     /**
@@ -101,6 +103,26 @@ class FFNN
 
       // Reset the overall error.
       err = 0;
+      seqNum = 0;
+    }
+
+    /**
+     * Evaluate the network using the given input. The output activation is
+     * stored into the output parameter.
+     *
+     * @param input Input data used to evaluate the network.
+     * @param output Output data used to store the output activation
+     * @tparam VecType Type of data (arma::colvec, arma::mat or arma::sp_mat).
+     */
+    template <typename VecType>
+    void Predict(const VecType& input, VecType& output)
+    {
+      ResetActivations(network);
+
+      std::get<0>(std::get<0>(network)).InputLayer().InputActivation() = input;
+
+      FeedForward(network);
+      OutputPrediction(network, output);
     }
 
     //! Get the error of the network.
@@ -155,32 +177,13 @@ class FFNN
      * connections, and one for the general case which peels off the first type
      * and recurses, as usual with variadic function templates.
      */
-    template<size_t I = 0,  typename VecType, typename... Tp>
+    template<size_t I = 0, typename... Tp>
     typename std::enable_if<I == sizeof...(Tp), void>::type
-    FeedForward(std::tuple<Tp...>& t,
-                const VecType& target,
-                VecType& error)
-
-    {
-      // Calculate and store the output error.
-      outputLayer.calculateError(std::get<0>(
-          std::get<I - 1>(t)).OutputLayer().InputActivation(), target,
-          error);
-
-      // Masures the network's performance with the specified performance
-      // function.
-      err += PerformanceFunction::error(std::get<0>(
-          std::get<I - 1>(t)).OutputLayer().InputActivation(), target);
+    FeedForward(std::tuple<Tp...>& /* unused */) { }
 
-      // Update the final training error.
-      trainError = err;
-    }
-
-    template<size_t I = 0, typename VecType, typename... Tp>
+    template<size_t I = 0, typename... Tp>
     typename std::enable_if<I < sizeof...(Tp), void>::type
-    FeedForward(std::tuple<Tp...>& t,
-                const VecType& target,
-                VecType& error)
+    FeedForward(std::tuple<Tp...>& t)
     {
       Forward(std::get<I>(t));
 
@@ -189,7 +192,7 @@ class FFNN
           std::get<0>(std::get<I>(t)).OutputLayer().InputActivation(),
           std::get<0>(std::get<I>(t)).OutputLayer().InputActivation());
 
-      FeedForward<I + 1, VecType, Tp...>(t, target, error);
+      FeedForward<I + 1, Tp...>(t);
     }
 
     /**
@@ -211,6 +214,41 @@ class FFNN
       Forward<I + 1, Tp...>(t);
     }
 
+    /*
+     * Calculate the output error and update the overall error.
+     */
+    template<typename VecType, typename... Tp>
+    void OutputError(std::tuple<Tp...>& t,
+                     const VecType& target,
+                     VecType& error)
+    {
+       // Calculate and store the output error.
+      outputLayer.calculateError(std::get<0>(
+          std::get<sizeof...(Tp) - 1>(t)).OutputLayer().InputActivation(),
+          target, error);
+
+      // Masures the network's performance with the specified performance
+      // function.
+      err += PerformanceFunction::error(std::get<0>(
+          std::get<sizeof...(Tp) - 1>(t)).OutputLayer().InputActivation(),
+          target);
+
+      // Update the final training error.
+      trainError = err;
+    }
+
+    /*
+     * Calculate and store the output activation.
+     */
+    template<typename VecType, typename... Tp>
+    void OutputPrediction(std::tuple<Tp...>& t, VecType& output)
+    {
+       // Calculate and store the output prediction.
+      outputLayer.outputClass(std::get<0>(
+          std::get<sizeof...(Tp) - 1>(t)).OutputLayer().InputActivation(),
+          output);
+    }
+
     /**
      * Run a single iteration of the feed backward algorithm, using the given
      * error of the output layer. Note that we iterate backward through the
@@ -357,6 +395,9 @@ class FFNN
     typename std::enable_if<I < sizeof...(Tp), void>::type
     Apply(std::tuple<Tp...>& t)
     {
+      if (seqNum > 1)
+        gradients[gradientNum] /= seqNum;
+
       std::get<I>(t).Optimzer().UpdateWeights(std::get<I>(t).Weights(),
           gradients[gradientNum], err);
 
@@ -426,6 +467,9 @@ class FFNN
 
     //! The index of the currently activate gradient.
     size_t gradientNum;
+
+    //! The number of the current input sequence.
+    size_t seqNum;
 }; // class FFNN